Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Added random restart to CB lexicon
  • Loading branch information
job13011 committed Apr 17, 2016
1 parent d834a06 commit 927943f
Showing 1 changed file with 31 additions and 25 deletions.
56 changes: 31 additions & 25 deletions cblexicon.py
Expand Up @@ -5,6 +5,7 @@ import nltk.stem
from nltk.corpus import brown
import random
from nltk.stem import *
import time

from sets import Set

Expand Down Expand Up @@ -47,12 +48,13 @@ def optimize(set1,set2,conjSet,defSet,dis):
return set1,set2

def optimize2(set1,set2,conjSet,defSet,dis):
i = 0
currentMin = 999999
consideredMin = calcScore(set1,conjSet,dis) + calcScore(set2,conjSet,dis)
bestSwapWord = None
# Calculate the best word to remove until no moves lessen the function
i = 1
while( currentMin > consideredMin):
t1 = time.time()
currentMin = consideredMin
currentS1 = calcScore(set1,conjSet,dis)
currentS2 = calcScore(set2,conjSet,dis)
Expand All @@ -74,7 +76,9 @@ def optimize2(set1,set2,conjSet,defSet,dis):
elif(bestSwapWord in set2):
set2.remove(bestSwapWord)
set1.append(bestSwapWord)
i = i + 1
t2 = time.time()
print "Iteration: %d\tScore: %f\tTime: %f sec" % (i, consideredMin, t2-t1)
i += 1

# Return the optimized sets
return set1,set2
Expand Down Expand Up @@ -275,39 +279,41 @@ def returnCBLexicon():
dis = vectorize(conjSet,defSet)

# Its Cluster time
set1 = defSet[len(defSet)//2:]
set2 = defSet[:len(defSet)//2]
"""
set1 = random.sample(defSet, len(defSet)//4)
set2 = [x for x in defSet if x not in set1]
"""
# Optimize objective function
(set1,set2) = optimize2(set1,set2,conjSet,defSet,dis)
# Check the constraint
#(set1,set2) = constraintSwap(set1,set2,conjSet,defSet,dis)

#f1 = open('set1.txt', 'w+')
#f2 = open('set2.txt', 'w+')
#for word in set1:
# f1.write(word + "\n")
#for word in set2:
# f2.write(word + "\n")
#f1.close()
#f2.close()
bestSet1 = []
bestSet2 = []
bestScore = 999999
numIterations = 10
for i in range(numIterations):
set1 = random.sample(defSet, len(defSet)//2)
set2 = [x for x in defSet if x not in set1]

# Optimize objective function
(set1,set2) = optimize2(set1,set2,conjSet,defSet,dis)
# Check the constraint
(set1,set2) = constraintSwap(set1,set2,conjSet,defSet,dis)
score = calcScore(set1,conjSet,dis) + calcScore(set2,conjSet,dis)
print "*** score = %f, bestScore = %f ***" % (score, bestScore)
if score < bestScore:
bestSet1 = set1
bestSet2 = set2
bestScore = score

#Find which set has a higher frequency in the training set
(set1Freq,set2Freq) = findFrequency(set1,set2)

positive = set1 if (set1Freq>set2Freq) else set2
negative = set1 if (set1Freq<set2Freq) else set2

print "Positive set of length %d" % len(positive)
#print positive
print "Negative set of length %d" % len(negative)

conjunctionData(set1,set2)

# Generate Dictionary in correct format
lexicon = dict([(word,1) for word in positive])
lexicon.update(dict([(word,-1) for word in negative]))
return lexicon

lex = returnCBLexicon()
f = open("cblex.txt", "w")
for key in lex.keys():
f.write("%s, %d\n" % (key, lex[key]))

f.close()

0 comments on commit 927943f

Please sign in to comment.