diff --git a/Levenshtein_Edit_Distance.py b/Levenshtein_Edit_Distance.py index b076a99..014c64b 100644 --- a/Levenshtein_Edit_Distance.py +++ b/Levenshtein_Edit_Distance.py @@ -20,5 +20,4 @@ def edit_distance(str1, str2): #input is 2 strings that we will be comparing cost = 1 #no match: substitution D[i][j] = min(D[i-1][j] + 1, D[i][j-1] + 1,D[i-1][j-1] + cost) - return D[len(str1)][len(str2)] #last cell of matrix contains optimal solution - + return D[len(str1)][len(str2)] #last cell of matrix contains optimal solution \ No newline at end of file diff --git a/README.md b/README.md index 726c095..4db4fe7 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,20 @@ -# CSE-3500-Group-6-Project-Spring-2025 +# CSE-3500-SEC001-Group-6-Project-Spring-2025 Repository to Implement Sec 001 Group 6 Algorithm Code + +# Group Members +Sean Gannon sjg21010 +Jennifer Chen jhc18015 +Kieren Ghidela kag22034 +Rikki Vetcha riv23005 + +# The files here as follows: +Levenshtein_Edit_Distance.py: +Our actual implementation of the Levenshtein Distance Algorithm. + +Test_Levenshtein_Edit_Distance.py: +Unit test code for our implementation, + +Time_Levenshtein_Edit_Distance.py: +Code we used to actually test the running time complexity of our implementation. + +# Thanks for viewing our repo! diff --git a/Test_Levenshtein_Edit_Distance.py b/Test_Levenshtein_Edit_Distance.py new file mode 100644 index 0000000..0a8624c --- /dev/null +++ b/Test_Levenshtein_Edit_Distance.py @@ -0,0 +1,49 @@ +import unittest +import Levenshtein_Edit_Distance + +class TestEditDistance(unittest.TestCase): + def test_empty_strings(self): + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("", ""), 0) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance(" ", " "), 0) + + def test_identical_strings(self): + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("abcde", "abcde"), 0) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("hello_world", "hello_world"), 0) + + def test_different_lengths(self): + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("apple", "pineapple"), 4) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("programming", "programmer"), 3) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("classification", "clarification"), 2) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("comp sci", "computer science"), 8) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("saturday", "sunday"), 3) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("video", "nvidia"), 3) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("uconn", "basketball"), 10) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("national", "champions"), 6) + + def test_same_lengths(self): + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("intention", "execution"), 5) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("storrs", "stores"), 1) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("student", "faculty"), 7) + + def test_one_empty_string(self): + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("computer", ""), 8) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("", "science"), 7) + + def test_different_cases(self): + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("UConn", "uconn"), 2) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("AbC", "abc"), 2) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("ALGORITHM", "algorithm"), 9) + + def test_special_characters(self): + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("a|b^c}", "a&b#c."), 3) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("hello@world", "hello#world"), 1) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("test*#C$@", "test_(@$)"), 4) + + def test_numeric_strings(self): + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("12345", "54321"), 4) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("123abc", "abc123"), 6) + self.assertEqual(Levenshtein_Edit_Distance.edit_distance("456789", "456789"), 0) + +if __name__ == '__main__': + unittest.main() + diff --git a/Time_Levenshtein_Edit_Distance.py b/Time_Levenshtein_Edit_Distance.py new file mode 100644 index 0000000..c60c961 --- /dev/null +++ b/Time_Levenshtein_Edit_Distance.py @@ -0,0 +1,34 @@ +import random +import time +import sys +from Levenshtein_Edit_Distance import edit_distance +sys.setrecursionlimit(10**4) + +# define a random string with any possible character from the alphabet +def random_string(length): + return ''.join(random.choices('abcdefghijklmnopqrstuvwxyz', k=length)) + +if __name__ == "__main__": + # sizes of the strings being tested + sizes = [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000] + + # number of trials per string size + trials = 10 + + + print(f"{'Size of Input':<15}{'Average Time (seconds)':>20}") + + for n in sizes: + times = [] + + for i in range(trials): + string1 = random_string(n) + string2 = random_string(n) + start_time = time.time() + edit_distance(string1, string2) + end_time = time.time() + times.append(end_time - start_time) + + average_time = sum(times) / trials + + print(f"{n:<15}{average_time:>20.6f}") diff --git a/__pycache__/Levenshtein_Edit_Distance.cpython-313.pyc b/__pycache__/Levenshtein_Edit_Distance.cpython-313.pyc new file mode 100644 index 0000000..827d2c7 Binary files /dev/null and b/__pycache__/Levenshtein_Edit_Distance.cpython-313.pyc differ