From a0a41010663632c44e5a14d2abe73fe953c54c1b Mon Sep 17 00:00:00 2001 From: jendapang Date: Thu, 17 Apr 2025 21:46:16 -0400 Subject: [PATCH 1/4] Unit tests for Levenshtein Edit Distance --- Levenshtein_Edit_Distance.py | 3 +- Test_Levenshtein_Edit_Distance.py | 42 ++++++++++++++++++ .../Levenshtein_Edit_Distance.cpython-313.pyc | Bin 0 -> 3630 bytes 3 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 Test_Levenshtein_Edit_Distance.py create mode 100644 __pycache__/Levenshtein_Edit_Distance.cpython-313.pyc diff --git a/Levenshtein_Edit_Distance.py b/Levenshtein_Edit_Distance.py index b076a99..014c64b 100644 --- a/Levenshtein_Edit_Distance.py +++ b/Levenshtein_Edit_Distance.py @@ -20,5 +20,4 @@ def edit_distance(str1, str2): #input is 2 strings that we will be comparing cost = 1 #no match: substitution D[i][j] = min(D[i-1][j] + 1, D[i][j-1] + 1,D[i-1][j-1] + cost) - return D[len(str1)][len(str2)] #last cell of matrix contains optimal solution - + return D[len(str1)][len(str2)] #last cell of matrix contains optimal solution \ No newline at end of file diff --git a/Test_Levenshtein_Edit_Distance.py b/Test_Levenshtein_Edit_Distance.py new file mode 100644 index 0000000..934a275 --- /dev/null +++ b/Test_Levenshtein_Edit_Distance.py @@ -0,0 +1,42 @@ +import unittest +import Levenshtein_Edit_Distance + +class TestEditDistance(unittest.TestCase): + def test_empty_strings(self): + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("", ""), 0) + + def test_identical_strings(self): + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("abcde", "abcde"), 0) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("hello_world", "hello_world"), 0) + + def test_different_lengths(self): + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("apple", "pineapple"), 4) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("programming", "programmer"), 3) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("classification", "clarification"), 2) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("comp sci", "computer science"), 8) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("saturday", "sunday"), 3) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("video", "nvidia"), 3) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("uconn", "basketball"), 10) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("national", "champions"), 6) + + + def test_same_lengths(self): + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("intention", "execution"), 5) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("storrs", "stores"), 1) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("student", "faculty"), 7) + + def test_one_empty_string(self): + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("computer", ""), 8) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("", "science"), 7) + + def test_different_cases(self): + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("UConn", "uconn"), 2) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("AbC", "abc"), 2) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("ALGORITHM", "algorithm"), 9) + + def test_special_characters(self): + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("a|b^c}", "a&b#c."), 3) + +if __name__ == '__main__': + unittest.main() + diff --git a/__pycache__/Levenshtein_Edit_Distance.cpython-313.pyc b/__pycache__/Levenshtein_Edit_Distance.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..827d2c77e45db5d8cdd23f8df6768afbc9a1f351 GIT binary patch literal 3630 zcmc&$%~KRd6z`syot=S2z7$=E3J4-ekX>;73MAr!A2F=xfJI8HriNjFQJ7t4hLn0D zv65h!1C&^Wr3bt%RXHq`97s;4Dt|z5L8*?1ROQ5t06BQe>)Bm)g+(bZ>8gF({rbJv zuYdjP*Rz_JmxG`^isFGwZiN1(gH&@RJFA1xd59z=;S0z_5>tvyya!bxi7iDEcMV@`kuo$N+PDMAQro%Paov{#7&Nkuv8$NjjLU!voX$|TkFPlO+hxjF{g zhv+J%O4_a_S3;-@1(I3`*TGC`RE(}LS6GX!M;23z&V7WC!DRKvDUVJ5ti>d~0TqLl zm=2~L@XAHy5Yu2`1PjuU!5J{l8ht&VjnU5mCI;Jmq>H7HOH2*Oc26Q+)<|3C=|p0> zt0aa->e?cbgx`W$5_7<5h-Q^u{|&E=8Kk{J;Kg*aut(Sx&7!*~-FJ8Z=Ormnmz z!*!LKIITOFw(D~8PvF!YOzb5+o6@H>T@Wj^UYE-``y?w;-VUZ62~69=iT^Gx8xC}} z4E_2}slEzMJ8|NDi5_dP2GfUX+0YWDo=I`2Wr@pFjDC%F9u31``+-;oUj zUnf{6xRr2f*s?7ioXVX>uGDu~nrYR4t{L^MVRd*&Gi<9qD7Do!27JD{vu1cWQg^hj z!wg^7RJ%@!n7Ywl*Whai)PJFUr5Vt+`eBC-)c=zl-Eb{DI z6SiqHRyYxy4J-uW}SGig{!Bo_fza);oFW zQRBEC7xypb7u@6i$}bq@UKJFNwM^o1b^HwEa~{0fr~FTkL~*oYntggAdh=;>tT1+I zrg5fZ7SC4v!p@Y<+H?M3yr30w?j7~t?O)ABI}XsqE0zmTPVNtlqxSt1_dZCZF(;P) z!IZyVsX;=`>o^~@N&w8q``Z})S5IrR=QrLB-QSyCAXS)IZ@(lOQjG{O@*gRP*Tx5 zu-XLTA=JjpnX{myEzNTYOE{3c#S)Y1K|QA2CJACsGbC5BfS-2T&0%HGVbL@OPHTgG z7L8tR?f^-OY{*X5h9dTD8CVYtwa9^u86VzkR0htL7sLo!6g)45J#)gICwr&NXhYn4 zIEEL5hE29CC--h zUbxe`stm4UQl;v$Nwz znAsL=>zqgfQT}dL!P~zM8=vZJ!_O@VDQihg^&B?lgb05_w{57bZVUc_T}zbbMAQvX z+IrX^qIOGDhiP|-R&C-^B9TFjh!Ne;obD3+VtLyk_3C|nnhD`#DB=C~fJL@Aq)mpE z5ZO8l0uD*cdntP7MDLTjs1U1-mp08R^Wv#Yb#ClxytL(+e_m|MRJ&tk@zUnmn+syg zTh4CVlz+doVY#K%@!_*pEw(xMxokVBFM6# z3~91Va%6cZ+&er-`yN@Q?^kjrUzYoH)3OJlI2d7BHt7pw?x*5oDk%HRqg0#(L3YVd zw-qShvSP#A)-wz{0w)Tgv#|37nuT;o1$~z-3jcR#g~6`I6`pZ@wj!{u(|9F^aUER| zVEnIuT}=sZ>O<2WQs6UAhVMpU*r3%8-ckC|Mg)pe5Y9ibL^IWK)ObS%L}*Iv_fzu> z*wEM6ItF5y#TfsI@>iG=T(RaW!A)bgUhY0LxBJi Date: Thu, 17 Apr 2025 22:26:32 -0400 Subject: [PATCH 2/4] Add files via upload --- Test_Levenshtein_Edit_Distance.py | 9 +++++++- Time_Levenshtein_Edit_Distance.py | 34 +++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 Time_Levenshtein_Edit_Distance.py diff --git a/Test_Levenshtein_Edit_Distance.py b/Test_Levenshtein_Edit_Distance.py index 934a275..0a8624c 100644 --- a/Test_Levenshtein_Edit_Distance.py +++ b/Test_Levenshtein_Edit_Distance.py @@ -4,6 +4,7 @@ class TestEditDistance(unittest.TestCase): def test_empty_strings(self): self.assertEqual(Levenshtein_Edit_Distance.edit_distance("", ""), 0) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance(" ", " "), 0) def test_identical_strings(self): self.assertEqual(Levenshtein_Edit_Distance.edit_distance("abcde", "abcde"), 0) @@ -18,7 +19,6 @@ def test_different_lengths(self): self.assertEqual(Levenshtein_Edit_Distance.edit_distance("video", "nvidia"), 3) self.assertEqual(Levenshtein_Edit_Distance.edit_distance("uconn", "basketball"), 10) self.assertEqual(Levenshtein_Edit_Distance.edit_distance("national", "champions"), 6) - def test_same_lengths(self): self.assertEqual(Levenshtein_Edit_Distance.edit_distance("intention", "execution"), 5) @@ -36,6 +36,13 @@ def test_different_cases(self): def test_special_characters(self): self.assertEqual(Levenshtein_Edit_Distance.edit_distance("a|b^c}", "a&b#c."), 3) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("hello@world", "hello#world"), 1) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("test*#C$@", "test_(@$)"), 4) + + def test_numeric_strings(self): + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("12345", "54321"), 4) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("123abc", "abc123"), 6) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("456789", "456789"), 0) if __name__ == '__main__': unittest.main() diff --git a/Time_Levenshtein_Edit_Distance.py b/Time_Levenshtein_Edit_Distance.py new file mode 100644 index 0000000..c60c961 --- /dev/null +++ b/Time_Levenshtein_Edit_Distance.py @@ -0,0 +1,34 @@ +import random +import time +import sys +from Levenshtein_Edit_Distance import edit_distance +sys.setrecursionlimit(10**4) + +# define a random string with any possible character from the alphabet +def random_string(length): + return ''.join(random.choices('abcdefghijklmnopqrstuvwxyz', k=length)) + +if __name__ == "__main__": + # sizes of the strings being tested + sizes = [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000] + + # number of trials per string size + trials = 10 + + + print(f"{'Size of Input':<15}{'Average Time (seconds)':>20}") + + for n in sizes: + times = [] + + for i in range(trials): + string1 = random_string(n) + string2 = random_string(n) + start_time = time.time() + edit_distance(string1, string2) + end_time = time.time() + times.append(end_time - start_time) + + average_time = sum(times) / trials + + print(f"{n:<15}{average_time:>20.6f}") From c1ce893bdeb8193d0516077a8e407d0aa3934156 Mon Sep 17 00:00:00 2001 From: Sean J Gannon Date: Sat, 19 Apr 2025 17:50:33 -0400 Subject: [PATCH 3/4] Update README.md --- README.md | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 726c095..2845de9 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,20 @@ -# CSE-3500-Group-6-Project-Spring-2025 +# CSE-3500-SEC001-Group-6-Project-Spring-2025 Repository to Implement Sec 001 Group 6 Algorithm Code + +# Group Members +Sean Gannon sjg21010 +Jennifer Chen jhc18015 +Kieren Ghidela kag22034 +Rikki Vetcha riv23005 + +# The files here as follows: +Levenshtein_Edit_Distance.py: +Our actual implementation of the Levenshtein Distance Algorithm. + +Test_Levenshtein_Edit_Distance.py: +Unit test code for our implementation, + +Time_Levenshtein_Edit_Distance.py: +Code we used to actually test the running time complexity of our implementation. + +# Thanks for viewing our repo! From 65f509846d3922f9692b07362c7f872b9360a024 Mon Sep 17 00:00:00 2001 From: Sean J Gannon Date: Sat, 19 Apr 2025 17:51:12 -0400 Subject: [PATCH 4/4] Update README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 2845de9..4db4fe7 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,10 @@ Repository to Implement Sec 001 Group 6 Algorithm Code # Group Members -Sean Gannon sjg21010 -Jennifer Chen jhc18015 -Kieren Ghidela kag22034 -Rikki Vetcha riv23005 +Sean Gannon sjg21010 +Jennifer Chen jhc18015 +Kieren Ghidela kag22034 +Rikki Vetcha riv23005 # The files here as follows: Levenshtein_Edit_Distance.py: