From 75530368cfd8b746cdf14f0a94132d5d90fdcb18 Mon Sep 17 00:00:00 2001
From: kirk gardner <kirk@worm.local>
Date: Mon, 2 Apr 2018 21:58:37 -0400
Subject: [PATCH] dict examples

---
 tests/one.txt       | 78 +++++++++++++++++++++++++++++++++++++++++++++
 tests/two.txt       | 78 +++++++++++++++++++++++++++++++++++++++++++++
 wff_dict.jl         | 25 +++++++++++++++
 wff_dict_verbose.jl | 48 ++++++++++++++++++++++++++++
 4 files changed, 229 insertions(+)
 create mode 100755 tests/one.txt
 create mode 100755 tests/two.txt
 create mode 100755 wff_dict.jl
 create mode 100755 wff_dict_verbose.jl

diff --git a/tests/one.txt b/tests/one.txt
new file mode 100755
index 0000000..e9295ab
--- /dev/null
+++ b/tests/one.txt
@@ -0,0 +1,78 @@
+SE4102 Programming Language Assignments (PLA)
+Due Dates Listed for Each Project
+
+PLA1 C Program  - Due January 29, 11:59pm
+
+Complete the Word Count Functionality (WCF) and  Word Frequency Functionality
+(WFF) in C as described in: CommonProbBackground.pdf
+Test your program using: http://www.engr.uconn.edu/~steve
+Utilize gcc and send me a .c file.  Make sure it compiles and runs on tge command line
+in Linux.
+
+PLA3 Modula-2 Program	 - Due February 26, 11:59pm
+
+Implement the Word Frequency Function (WFF) in Modula-2 using findwords.mod as a
+basis, as described in: CommonProbBackground.pdf
+You need to develop a dual solution (two programs) for this project:
+
+I.	Single module solution using records based off of findwords.mod sample code
+that outputs to stdout a list of words and their frequency.
+II.	Single module reworking your solution from I into a read procedure that reads the
+lines into the document variable of type AllLines, a wff procedure that  generates
+the word frequency in the wordsindoc variable of type AllWords and the
+WordFreq record, and a sortandprint procedure that outputs to stdout a list of
+words and their frequency in alphabetical order.
+
+Utilize XDS Modula-2 https://www.excelsior-usa.com/xds.htmland send me your .mod
+file.
+
+PLA4 Software Evolution of an Ada Program	 - Due March 9, 11:59pm
+
+Modify an existing word frequency function Ada program (u_words_min.adb) or
+(u_words_tight.adb) that does word frequency by assuming that there is one word in
+every line of the input.   Both of the adb files assume a word of 20 characters as a string
+? in the posted web copies of the two files, this has been increased to 120 which has the
+result of reading the entire line and treating the entire line as one large word which can
+include spaces.  So, when the program is compiled and run, the input generates the
+following output:
+Note the Control Z carriage return that is EOF and then generates the output.  This
+programming project is an exercise in learning a new language by having to modify and
+extend someone else?s code.  The specific extensions are as follows for three separate
+versions:
+
+V1.	Change the original program to alphabetically sort the outputted list of words?
+first executable.
+
+V2.	+ V1 Change the logic so that the words within each line are identified? this
+would then result in a full word frequency functionality and if the sort has been
+implemented, it will now sort the entire list ? second executable.
+
+V3.	 +V1+V2 Change the program so that the data is read from input.txt and written to
+output.tex? third executable.
+
+PLA5 Word Index Using Strings in Ada     - Due March 26, 11:59pm
+
+This assignment extends PLA3v3 with a word index that tracks the line(s) within the
+input file where each word occurs.   There may be multiple occurrences of the word in a
+file. Current output of PLA3v3 is:
+
+Goodbye 4
+Hello 1
+World 1
+
+You are to update the output to:
+
+Goodbye: in lines 1-2-5-6-6 wc=5
+Hello: in lines 3 wc=1
+World: in lines 3 wc=1
+
+In addition, change you assumptions on words:
+Words for the word count and index must take into consideration the following: Each
+word must be at least one character and start with a letter.  If a word has 2 or more
+characters, then the second and successive characters can be letters, digits, the
+underscore, or the hyphen.  Continue to recognize and discard other characters.  Also,
+note that you must eliminate white space (multiple spaces or tabs) between words.
+For a suggested solution approach, extend the various data structures (Word and
+Word_List) with the LineNo (that word occurred in) and curr_line (of the file).  In
+the new sample code, new_u_words_min,  these have both been added.  The output
+is shown below where the first number is the last line and the second the word count.
diff --git a/tests/two.txt b/tests/two.txt
new file mode 100755
index 0000000..1a584fc
--- /dev/null
+++ b/tests/two.txt
@@ -0,0 +1,78 @@
+SE4102 Team Semester Project
+Due Dates as Below
+
+For your team semester project, form teams of 2students to explore indepth a programming 
+language, and submit a multiple part report and PowerPoint presentation  with Specific 
+content/objectives and responsibilities as given as four deliverables: 
+
+
+A.	Identify the Issues and Questions of your topic by providing a one-page discussion 
+of your chosen programming language. Why did you choose your programming 
+language? What types of problems does it solve? What application domain means is 
+it relevant for? When how and who were the inventors of the programming 
+language?
+
+
+B.	10-15 page final report your chosen programming language.  Expand the one piece 
+discussion more detail. Discuss from the perspective of technology perspective,  the 
+compiler available and the platforms  the IDE etc.
+
+
+C.	Demonstrate your chosen programming language through code perhaps using 
+word count or word frequency program if relevant.  
+
+
+D.	40-50 slide PPT presentation on your chosen programming language using the 
+CSE4102 template on the course web page.  This presentation should include the 
+material from A, B, and C. Note that you also need to prepare a maximum 25 slide 
+reduced version for presentation in the class.
+
+
+The intent is for one team member to be responsible for B in a second team member to be 
+responsible for D in both team members to share A and C.
+
+Possible Languages to consider are based on categories or application domains or for 
+specific purposes:
+
+
+1.	Languages for IoT, concurrency/parallelism, machine learning, secure transactions
+2.	Categories as in List_of_programming_languages_by_type 
+3.	Time period organization as in 
+https://en.m.wikipedia.org/wiki/Timeline_of_programming_languages  
+
+
+You cannot choose any language from the course programming assignments (C, C++, 
+Pascal, Modula-2, Ada, Prolog, Go), if it?s covered extensively in class (Fortran, COBOL), 
+from a CSE class (Python, scheme), or is  widely in use  (Java, JavaScript, C#, Objective-
+C, Swift, etc.).
+
+
+Make sure you select a language that has an available compiler to utilize for demonstration 
+and testing purposes.
+
+
+Your team needs to get approval by the instructor for the chosen language.
+
+
+Formatting and Submission Requirements:
+
+*	All slides must be formatted with  cse4102template.pptx 
+*	Slide 2 of  cse4102template.pptx has organization of final PPT.
+*	For written reports A and B,, 1 page is 12pt, 1in margins, single spaced, Times New 
+Roman fonts. You must submit an MS Word Document.
+*	For presentations, use PowerPoint.
+*	Tables, Figures, etc. are not counted in the page requirements of B.
+
+Important dates
+ 
+By  January 29  at 11:59pm: submit to Steve: a list of team members, your chosen 
+programming language and a one-paragraph description of the language, and which team 
+member is doing which deliverable (B and D).
+ 
+By  April 16 at 11:59pm: submit to Steve the two PPT presentations (short and long). 
+
+Presentations in Class April 17/19/24/26 and May 1/3
+
+By  May 4 at midnight at 11:59pm: submit your final report.
+
+
diff --git a/wff_dict.jl b/wff_dict.jl
new file mode 100755
index 0000000..3c756c7
--- /dev/null
+++ b/wff_dict.jl
@@ -0,0 +1,25 @@
+#!/usr/local/bin/julia -i
+
+files = length(ARGS) == 0 ? (println("please provide at least one file name!"); exit()) : ARGS
+delims = [' ','\n','\t','.',',','(',')','?'] # single quotes: character literal
+
+addword!(d,word) = haskey(d, word) ? d[word] += 1 : d[word] = 1
+
+function findwords(file)
+    d = Dict{String,Int}()
+    println("reading $(file)")
+    open(file) do f
+        map(word->addword!(d,word),split(readstring(f),delims)) # example using map function
+        # findwords(file) = [addword!(d,word) for word in split(readstring(f),delims)]
+    end
+    d # if no return statement last line is returned
+end
+
+word_dicts = Dict(file=>findwords(file) for file  in files) # as dict list comprehension
+
+function printwords(file)
+    println("\nword frequencies for $file")
+    [println("\t$kv") for kv in word_dicts[file]] # word_dicts is in global scope!
+end
+
+map(printwords,files)
diff --git a/wff_dict_verbose.jl b/wff_dict_verbose.jl
new file mode 100755
index 0000000..d264d1a
--- /dev/null
+++ b/wff_dict_verbose.jl
@@ -0,0 +1,48 @@
+#!/usr/local/bin/julia -i
+# header not necessary, allows one to call
+#   ./wff1.jl ARGS...
+# after chmod +x wff1.jl
+
+# Alternatively call
+#   julia -i wff1.jl
+# the -i denotes "interactive mode" similar to the cli in python.
+# to just execute the code omit -i
+
+if length(ARGS) > 0 # ARGS is an array of command line arguments as strings (separated by spaces)
+    files = ARGS[1:end] # indexing starts at 1!
+else
+    println("please provide at least one file name!")
+    exit() # ... exit interpreter
+end
+
+# instantiate a dictionary of dictionaries
+# one dictionary for each file provided, with the file name as key
+word_dicts = Dict(file=>Dict{String, Int}() for file in files) # optional typing
+delims = [' ','\n','\t','.',',','(',')','?'] # single quotes: character literal
+
+for file in files
+    println("reading $(file)") # string interpolation (parentheses not necessary)!
+    open(file) do f # alternatively, f = open(file), just have to remember close(f) when done
+        s = readstring(f) # read entire contents as string
+        for word in split(s,delims) # split string by literals in array 'delims'
+            if haskey(word_dicts[file], word) # check if word has already been seen
+                word_dicts[file][word] += 1 # if so, increment counter
+            else
+                word_dicts[file][word] = 1 # otherwise add word as key with value 1
+            end
+        end
+    end
+end
+
+# print word frequencies for each file
+for file in files
+    println("\nword frequencies for $file") # linebreak (\n) interpolation
+    for kv in word_dicts[file]
+        println("\t$(kv)") # tab (\t) interpolation
+    end
+end
+
+println("\nyou're now in interactive mode!")
+println("type word_dicts in the command line to view your dictionary")
+println("\tlooking up key values is the same syntax as indexing")
+println("\te.x. word_dicts[\"$(files[1])\"] would give you the dictionary of word frequencies for $(files[1])")