From 75530368cfd8b746cdf14f0a94132d5d90fdcb18 Mon Sep 17 00:00:00 2001 From: kirk gardner Date: Mon, 2 Apr 2018 21:58:37 -0400 Subject: [PATCH] dict examples --- tests/one.txt | 78 +++++++++++++++++++++++++++++++++++++++++++++ tests/two.txt | 78 +++++++++++++++++++++++++++++++++++++++++++++ wff_dict.jl | 25 +++++++++++++++ wff_dict_verbose.jl | 48 ++++++++++++++++++++++++++++ 4 files changed, 229 insertions(+) create mode 100755 tests/one.txt create mode 100755 tests/two.txt create mode 100755 wff_dict.jl create mode 100755 wff_dict_verbose.jl diff --git a/tests/one.txt b/tests/one.txt new file mode 100755 index 0000000..e9295ab --- /dev/null +++ b/tests/one.txt @@ -0,0 +1,78 @@ +SE4102 Programming Language Assignments (PLA) +Due Dates Listed for Each Project + +PLA1 C Program - Due January 29, 11:59pm + +Complete the Word Count Functionality (WCF) and Word Frequency Functionality +(WFF) in C as described in: CommonProbBackground.pdf +Test your program using: http://www.engr.uconn.edu/~steve +Utilize gcc and send me a .c file. Make sure it compiles and runs on tge command line +in Linux. + +PLA3 Modula-2 Program - Due February 26, 11:59pm + +Implement the Word Frequency Function (WFF) in Modula-2 using findwords.mod as a +basis, as described in: CommonProbBackground.pdf +You need to develop a dual solution (two programs) for this project: + +I. Single module solution using records based off of findwords.mod sample code +that outputs to stdout a list of words and their frequency. +II. Single module reworking your solution from I into a read procedure that reads the +lines into the document variable of type AllLines, a wff procedure that generates +the word frequency in the wordsindoc variable of type AllWords and the +WordFreq record, and a sortandprint procedure that outputs to stdout a list of +words and their frequency in alphabetical order. + +Utilize XDS Modula-2 https://www.excelsior-usa.com/xds.htmland send me your .mod +file. + +PLA4 Software Evolution of an Ada Program - Due March 9, 11:59pm + +Modify an existing word frequency function Ada program (u_words_min.adb) or +(u_words_tight.adb) that does word frequency by assuming that there is one word in +every line of the input. Both of the adb files assume a word of 20 characters as a string +? in the posted web copies of the two files, this has been increased to 120 which has the +result of reading the entire line and treating the entire line as one large word which can +include spaces. So, when the program is compiled and run, the input generates the +following output: +Note the Control Z carriage return that is EOF and then generates the output. This +programming project is an exercise in learning a new language by having to modify and +extend someone else?s code. The specific extensions are as follows for three separate +versions: + +V1. Change the original program to alphabetically sort the outputted list of words? +first executable. + +V2. + V1 Change the logic so that the words within each line are identified? this +would then result in a full word frequency functionality and if the sort has been +implemented, it will now sort the entire list ? second executable. + +V3. +V1+V2 Change the program so that the data is read from input.txt and written to +output.tex? third executable. + +PLA5 Word Index Using Strings in Ada - Due March 26, 11:59pm + +This assignment extends PLA3v3 with a word index that tracks the line(s) within the +input file where each word occurs. There may be multiple occurrences of the word in a +file. Current output of PLA3v3 is: + +Goodbye 4 +Hello 1 +World 1 + +You are to update the output to: + +Goodbye: in lines 1-2-5-6-6 wc=5 +Hello: in lines 3 wc=1 +World: in lines 3 wc=1 + +In addition, change you assumptions on words: +Words for the word count and index must take into consideration the following: Each +word must be at least one character and start with a letter. If a word has 2 or more +characters, then the second and successive characters can be letters, digits, the +underscore, or the hyphen. Continue to recognize and discard other characters. Also, +note that you must eliminate white space (multiple spaces or tabs) between words. +For a suggested solution approach, extend the various data structures (Word and +Word_List) with the LineNo (that word occurred in) and curr_line (of the file). In +the new sample code, new_u_words_min, these have both been added. The output +is shown below where the first number is the last line and the second the word count. diff --git a/tests/two.txt b/tests/two.txt new file mode 100755 index 0000000..1a584fc --- /dev/null +++ b/tests/two.txt @@ -0,0 +1,78 @@ +SE4102 Team Semester Project +Due Dates as Below + +For your team semester project, form teams of 2students to explore indepth a programming +language, and submit a multiple part report and PowerPoint presentation with Specific +content/objectives and responsibilities as given as four deliverables: + + +A. Identify the Issues and Questions of your topic by providing a one-page discussion +of your chosen programming language. Why did you choose your programming +language? What types of problems does it solve? What application domain means is +it relevant for? When how and who were the inventors of the programming +language? + + +B. 10-15 page final report your chosen programming language. Expand the one piece +discussion more detail. Discuss from the perspective of technology perspective, the +compiler available and the platforms the IDE etc. + + +C. Demonstrate your chosen programming language through code perhaps using +word count or word frequency program if relevant. + + +D. 40-50 slide PPT presentation on your chosen programming language using the +CSE4102 template on the course web page. This presentation should include the +material from A, B, and C. Note that you also need to prepare a maximum 25 slide +reduced version for presentation in the class. + + +The intent is for one team member to be responsible for B in a second team member to be +responsible for D in both team members to share A and C. + +Possible Languages to consider are based on categories or application domains or for +specific purposes: + + +1. Languages for IoT, concurrency/parallelism, machine learning, secure transactions +2. Categories as in List_of_programming_languages_by_type +3. Time period organization as in +https://en.m.wikipedia.org/wiki/Timeline_of_programming_languages + + +You cannot choose any language from the course programming assignments (C, C++, +Pascal, Modula-2, Ada, Prolog, Go), if it?s covered extensively in class (Fortran, COBOL), +from a CSE class (Python, scheme), or is widely in use (Java, JavaScript, C#, Objective- +C, Swift, etc.). + + +Make sure you select a language that has an available compiler to utilize for demonstration +and testing purposes. + + +Your team needs to get approval by the instructor for the chosen language. + + +Formatting and Submission Requirements: + +* All slides must be formatted with cse4102template.pptx +* Slide 2 of cse4102template.pptx has organization of final PPT. +* For written reports A and B,, 1 page is 12pt, 1in margins, single spaced, Times New +Roman fonts. You must submit an MS Word Document. +* For presentations, use PowerPoint. +* Tables, Figures, etc. are not counted in the page requirements of B. + +Important dates + +By January 29 at 11:59pm: submit to Steve: a list of team members, your chosen +programming language and a one-paragraph description of the language, and which team +member is doing which deliverable (B and D). + +By April 16 at 11:59pm: submit to Steve the two PPT presentations (short and long). + +Presentations in Class April 17/19/24/26 and May 1/3 + +By May 4 at midnight at 11:59pm: submit your final report. + + diff --git a/wff_dict.jl b/wff_dict.jl new file mode 100755 index 0000000..3c756c7 --- /dev/null +++ b/wff_dict.jl @@ -0,0 +1,25 @@ +#!/usr/local/bin/julia -i + +files = length(ARGS) == 0 ? (println("please provide at least one file name!"); exit()) : ARGS +delims = [' ','\n','\t','.',',','(',')','?'] # single quotes: character literal + +addword!(d,word) = haskey(d, word) ? d[word] += 1 : d[word] = 1 + +function findwords(file) + d = Dict{String,Int}() + println("reading $(file)") + open(file) do f + map(word->addword!(d,word),split(readstring(f),delims)) # example using map function + # findwords(file) = [addword!(d,word) for word in split(readstring(f),delims)] + end + d # if no return statement last line is returned +end + +word_dicts = Dict(file=>findwords(file) for file in files) # as dict list comprehension + +function printwords(file) + println("\nword frequencies for $file") + [println("\t$kv") for kv in word_dicts[file]] # word_dicts is in global scope! +end + +map(printwords,files) diff --git a/wff_dict_verbose.jl b/wff_dict_verbose.jl new file mode 100755 index 0000000..d264d1a --- /dev/null +++ b/wff_dict_verbose.jl @@ -0,0 +1,48 @@ +#!/usr/local/bin/julia -i +# header not necessary, allows one to call +# ./wff1.jl ARGS... +# after chmod +x wff1.jl + +# Alternatively call +# julia -i wff1.jl +# the -i denotes "interactive mode" similar to the cli in python. +# to just execute the code omit -i + +if length(ARGS) > 0 # ARGS is an array of command line arguments as strings (separated by spaces) + files = ARGS[1:end] # indexing starts at 1! +else + println("please provide at least one file name!") + exit() # ... exit interpreter +end + +# instantiate a dictionary of dictionaries +# one dictionary for each file provided, with the file name as key +word_dicts = Dict(file=>Dict{String, Int}() for file in files) # optional typing +delims = [' ','\n','\t','.',',','(',')','?'] # single quotes: character literal + +for file in files + println("reading $(file)") # string interpolation (parentheses not necessary)! + open(file) do f # alternatively, f = open(file), just have to remember close(f) when done + s = readstring(f) # read entire contents as string + for word in split(s,delims) # split string by literals in array 'delims' + if haskey(word_dicts[file], word) # check if word has already been seen + word_dicts[file][word] += 1 # if so, increment counter + else + word_dicts[file][word] = 1 # otherwise add word as key with value 1 + end + end + end +end + +# print word frequencies for each file +for file in files + println("\nword frequencies for $file") # linebreak (\n) interpolation + for kv in word_dicts[file] + println("\t$(kv)") # tab (\t) interpolation + end +end + +println("\nyou're now in interactive mode!") +println("type word_dicts in the command line to view your dictionary") +println("\tlooking up key values is the same syntax as indexing") +println("\te.x. word_dicts[\"$(files[1])\"] would give you the dictionary of word frequencies for $(files[1])")