From f4f6c976d67f499bf42381977b83369fcda1c173 Mon Sep 17 00:00:00 2001 From: Antonia Lewis Date: Tue, 12 Apr 2016 23:17:03 -0400 Subject: [PATCH] Trying to optimize the optimizing. --- cblexicon.py | 83 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 64 insertions(+), 19 deletions(-) diff --git a/cblexicon.py b/cblexicon.py index b2375a8..a329d90 100644 --- a/cblexicon.py +++ b/cblexicon.py @@ -47,11 +47,48 @@ class cblexicon: bestSwapWord = word if(bestSwapWord in set1): - set1.remove(word) - set2.append(word) + set1.remove(bestSwapWord) + set2.append(bestSwapWord) else: - set2.remove(word) - set1.append(word) + set2.remove(bestSwapWord) + set1.append(bestSwapWord) + # Return the optimized sets + return set1,set2 + + def optimize2(set1,set2,conjSet,defSet,dis): + i = 0 + currentMin = 999999 + consideredMin = calcScore(set1,conjSet,dis) + calcScore(set2,conjSet,dis) + bestSwapWord = "" + print consideredMin + # Calculate the best word to remove until no moves lessen the function + while( currentMin > consideredMin): + print i + i = i + 1 + currentMin = consideredMin + currentS1 = calcScore(set1,conjSet,dis) + currentS2 = calcScore(set2,conjSet,dis) + for word in set1: + test = calcSwap(word,set1,set2,currentS1,currentS2,conjSet,dis) + if (test < consideredMin): + print "found1" + consideredMin = test + bestSwapWord = word + for word in set2: + test = calcSwap(word,set2,set1,currentS1,currentS2,conjSet,dis) + if (test < consideredMin): + print "found2" + consideredMin = test + bestSwapWord = word + print consideredMin + + if(bestSwapWord in set1): + set1.remove(bestSwapWord) + set2.append(bestSwapWord) + else: + set2.remove(bestSwapWord) + set1.append(bestSwapWord) + # Return the optimized sets return set1,set2 @@ -73,23 +110,31 @@ class cblexicon: set1.append(word) return set1,set2 - def calcScore(set1,set2,conjSet,dis): - score1 = 0 - score2 = 0 - for curr in set1: - for word in set1: + def calcScore(set,conjSet,dis): + score = 0 + for curr in set: + for word in set: if word != curr: cats = dis[conjSet[curr][0]][conjSet[word][0]] - score1 = score1 + cats - score1 = score1 * (1.0/len(set1)) + score = score + cats + return score * (1.0/len(set1)) - for curr in set2: - for word in set2: - if word != curr: - cats = dis[conjSet[curr][0]][conjSet[word][0]] - score2 = score2 + cats - score2 = score2 * (1.0/len(set2)) - return score1 + score2 + def calcSwap(word,currSet,opSet,currentCount,otherCount,conjSet,dis): + score1 = 0 + score2 = 0 + for w in currSet: + if word != w: + cats = dis[conjSet[word][0]][conjSet[w][0]] + score1 = score1 + cats + currentCount = ((currentCount* len(currSet)) - score1 ) * (1/(len(currSet)-1)) + + for word in set2: + if word != w: + cats = dis[conjSet[word][0]][conjSet[w][0]] + score2 = score2 + cats + otherCount = ((otherCount* len(opSet)) + score2 ) * (1/(len(opSet)-1)) + + return currentCount + otherCount def normalize_word(word): return SnowballStemmer("english").stem(word) @@ -183,7 +228,7 @@ class cblexicon: set2 = defSet[:len(defSet)//2] # Optimize objective function - sets = optimize(set1,set2,conjSet,defSet,dis) + sets = optimize2(set1,set2,conjSet,defSet,dis) set1 = sets[0] set2 = sets[1]