From 927943fe752508ed3ce7e2f4ab13d22b9f1d12e6 Mon Sep 17 00:00:00 2001 From: Jack Date: Sun, 17 Apr 2016 15:40:59 -0400 Subject: [PATCH] Added random restart to CB lexicon --- cblexicon.py | 56 +++++++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/cblexicon.py b/cblexicon.py index 99c1cdb..0f33cb7 100644 --- a/cblexicon.py +++ b/cblexicon.py @@ -5,6 +5,7 @@ import nltk.stem from nltk.corpus import brown import random from nltk.stem import * +import time from sets import Set @@ -47,12 +48,13 @@ def optimize(set1,set2,conjSet,defSet,dis): return set1,set2 def optimize2(set1,set2,conjSet,defSet,dis): - i = 0 currentMin = 999999 consideredMin = calcScore(set1,conjSet,dis) + calcScore(set2,conjSet,dis) bestSwapWord = None # Calculate the best word to remove until no moves lessen the function + i = 1 while( currentMin > consideredMin): + t1 = time.time() currentMin = consideredMin currentS1 = calcScore(set1,conjSet,dis) currentS2 = calcScore(set2,conjSet,dis) @@ -74,7 +76,9 @@ def optimize2(set1,set2,conjSet,defSet,dis): elif(bestSwapWord in set2): set2.remove(bestSwapWord) set1.append(bestSwapWord) - i = i + 1 + t2 = time.time() + print "Iteration: %d\tScore: %f\tTime: %f sec" % (i, consideredMin, t2-t1) + i += 1 # Return the optimized sets return set1,set2 @@ -275,25 +279,24 @@ def returnCBLexicon(): dis = vectorize(conjSet,defSet) # Its Cluster time - set1 = defSet[len(defSet)//2:] - set2 = defSet[:len(defSet)//2] - """ - set1 = random.sample(defSet, len(defSet)//4) - set2 = [x for x in defSet if x not in set1] - """ - # Optimize objective function - (set1,set2) = optimize2(set1,set2,conjSet,defSet,dis) - # Check the constraint - #(set1,set2) = constraintSwap(set1,set2,conjSet,defSet,dis) - - #f1 = open('set1.txt', 'w+') - #f2 = open('set2.txt', 'w+') - #for word in set1: - # f1.write(word + "\n") - #for word in set2: - # f2.write(word + "\n") - #f1.close() - #f2.close() + bestSet1 = [] + bestSet2 = [] + bestScore = 999999 + numIterations = 10 + for i in range(numIterations): + set1 = random.sample(defSet, len(defSet)//2) + set2 = [x for x in defSet if x not in set1] + + # Optimize objective function + (set1,set2) = optimize2(set1,set2,conjSet,defSet,dis) + # Check the constraint + (set1,set2) = constraintSwap(set1,set2,conjSet,defSet,dis) + score = calcScore(set1,conjSet,dis) + calcScore(set2,conjSet,dis) + print "*** score = %f, bestScore = %f ***" % (score, bestScore) + if score < bestScore: + bestSet1 = set1 + bestSet2 = set2 + bestScore = score #Find which set has a higher frequency in the training set (set1Freq,set2Freq) = findFrequency(set1,set2) @@ -301,13 +304,16 @@ def returnCBLexicon(): positive = set1 if (set1Freq>set2Freq) else set2 negative = set1 if (set1Freq