Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
fixed anias code
  • Loading branch information
job13011 committed Apr 13, 2016
1 parent f4f6c97 commit efbde4b
Showing 1 changed file with 28 additions and 19 deletions.
47 changes: 28 additions & 19 deletions cblexicon.py
Expand Up @@ -59,35 +59,36 @@ class cblexicon:
i = 0
currentMin = 999999
consideredMin = calcScore(set1,conjSet,dis) + calcScore(set2,conjSet,dis)
bestSwapWord = ""
bestSwapWord = None
print consideredMin
# Calculate the best word to remove until no moves lessen the function
while( currentMin > consideredMin):
print i
i = i + 1
print "Iteration #%d: (%d, %d)" % (i, len(set1), len(set2))
currentMin = consideredMin
currentS1 = calcScore(set1,conjSet,dis)
currentS2 = calcScore(set2,conjSet,dis)
consideredMin = currentS1 + currentS2 #
for word in set1:
test = calcSwap(word,set1,set2,currentS1,currentS2,conjSet,dis)
if (test < consideredMin):
print "found1"
consideredMin = test
bestSwapWord = word
for word in set2:
test = calcSwap(word,set2,set1,currentS1,currentS2,conjSet,dis)
test = calcSwap(word,set2,set1,currentS2,currentS1,conjSet,dis)
if (test < consideredMin):
print "found2"
consideredMin = test
bestSwapWord = word
print consideredMin
print "New min: %f" % consideredMin

if(bestSwapWord in set1):
set1.remove(bestSwapWord)
set2.append(bestSwapWord)
else:
elif(bestSwapWord in set2):
set2.remove(bestSwapWord)
set1.append(bestSwapWord)
i = i + 1

# Return the optimized sets
return set1,set2
Expand All @@ -112,12 +113,13 @@ class cblexicon:

def calcScore(set,conjSet,dis):
score = 0
for curr in set:
for word in set:
if word != curr:
cats = dis[conjSet[curr][0]][conjSet[word][0]]
score = score + cats
return score * (1.0/len(set1))
for i in range(len(set)):
w1 = set[i]
for j in range(i, len(set)):
w2 = set[j]
cats = dis[conjSet[w1][0]][conjSet[w2][0]]
score = score + cats
return score / len(set)

def calcSwap(word,currSet,opSet,currentCount,otherCount,conjSet,dis):
score1 = 0
Expand All @@ -126,13 +128,14 @@ class cblexicon:
if word != w:
cats = dis[conjSet[word][0]][conjSet[w][0]]
score1 = score1 + cats
currentCount = ((currentCount* len(currSet)) - score1 ) * (1/(len(currSet)-1))
currentCount = ((currentCount* len(currSet)) - score1 )/(len(currSet)-1)

for word in set2:
#for word in set2:
for w in opSet:
if word != w:
cats = dis[conjSet[word][0]][conjSet[w][0]]
score2 = score2 + cats
otherCount = ((otherCount* len(opSet)) + score2 ) * (1/(len(opSet)-1))
otherCount = ((otherCount* len(opSet)) + score2 )/(len(opSet)+1)

return currentCount + otherCount

Expand Down Expand Up @@ -202,10 +205,12 @@ class cblexicon:
return d

#Get the Data#
"""
negids = movie_reviews.fileids('neg')
posids = movie_reviews.fileids('pos')
training = set(negids[:500] + posids[:500])
testing = set(negids[500:] + posids[500:])
"""
# Generate positive and negative initial sets
sets = genSets()
positive = random.sample(sets[0], min(len(sets[0]), len(sets[1])))
Expand All @@ -223,25 +228,29 @@ class cblexicon:
# Generate dissimilarity matrix
dis = vectorize(conjSet,defSet)

"""
# Its Cluster time
set1 = defSet[len(defSet)//2:]
set2 = defSet[:len(defSet)//2]

"""
set1 = random.sample(defSet, len(defSet)//4)
set2 = [x for x in defSet if x not in set1]

# Optimize objective function
sets = optimize2(set1,set2,conjSet,defSet,dis)
set1 = sets[0]
set2 = sets[1]

print(set1)
print(set2)

"""
f = open('set1.txt', 'w+')
f2 = open('set1.txt', 'w+')
for word in set1:
f.write(word + "/n")
for word in set2:
f2.write(word + "/n")

"""


# Can we classify and then run bag of words?
Expand Down

0 comments on commit efbde4b

Please sign in to comment.