diff --git a/SentiWordLex.py b/SentiWordLex.py
new file mode 100644
index 0000000..85583f5
--- /dev/null
+++ b/SentiWordLex.py
@@ -0,0 +1,44 @@
+from __future__ import division
+import sys
+import time
+
+import nltk
+from nltk.corpus import movie_reviews
+from nltk.corpus import sentiwordnet as swn
+from nltk.corpus import wordnet as wn
+
+start_time = time.time()
+count = 0.00
+correct = 0.00
+ids = sorted(movie_reviews.fileids())
+
+for reviews in ids:     #For every review
+    score = 0.0
+    positive = 0.0
+    negative = 0.0
+    tokens = nltk.pos_tag(nltk.word_tokenize(movie_reviews.raw(fileids=[reviews])))     #Tokenize all words with POS
+    for token in tokens:
+        if (token[1]== "JJ" or token[1] == "JJR" or token[1] == "JJS"):         # If adjective, check value
+            if len(wn.synsets(token[0], pos=wn.ADJ)) != 0 and swn.senti_synset(wn.synsets(token[0], pos=wn.ADJ)[0].name()) :
+                word = wn.synsets(token[0], pos=wn.ADJ)[0].name()
+                print word
+                print swn.senti_synset(word)
+                positive = positive + swn.senti_synset(word).pos_score()
+                negative = negative + swn.senti_synset(word).neg_score()
+                print "%s, %d, %d" %(word,positive,negative)
+    score = positive - negative
+    if (score < 0):
+        print "Negative at %f" % (score)
+        sentiment = 'neg'
+    else:
+        sentiment = 'pos'
+        print "Positive at %d" % (score)
+    if (sentiment == movie_reviews.categories(fileids=[reviews])[0]):
+        print "Correct"
+        correct = correct + 1.00
+    count = count + 1.00
+
+print correct/count
+print "Seconds: %d" %(time.time() - start_time)
+print "correct:", correct/len(ids)
+print "positive:", positive/len(ids)
\ No newline at end of file
diff --git a/cblexicon.py b/cblexicon.py
index ac41cca..99c1cdb 100644
--- a/cblexicon.py
+++ b/cblexicon.py
@@ -1,264 +1,313 @@
 from __future__ import division
-import math
 import nltk
-from nltk.corpus import wordnet as wn
-from collections import Counter
 import numpy
-from nltk.corpus import movie_reviews
 import nltk.stem
-from nltk.cluster import KMeansClusterer, GAAClusterer, euclidean_distance
-from nltk.classify import NaiveBayesClassifier
+from nltk.corpus import brown
 import random
 from nltk.stem import *
 
 from sets import Set
 
-class cblexicon:
-
-    def process(self):
-
-        def optimize(set1,set2,conjSet,defSet,dis):
-            i = 0
-            currentMin = 999999
-            consideredMin = calcScore(set1,set2,conjSet,dis)
-            bestSwapWord = ""
-            # Calculate the best word to remove until no moves lessen the function
-            while( currentMin > consideredMin):
-                print i
-                i = i + 1
-                currentMin = consideredMin
-                for word in set1:
-                    set1.remove(word)
-                    set2.append(word)
-                    test = calcScore(set1,set2,conjSet,dis)
-                    set2.remove(word)
-                    set1.append(word)
-                    if (test < consideredMin):
-                        consideredMin = test
-                        bestSwapWord = word
-                for word in set2:
-                    set2.remove(word)
-                    set1.append(word)
-                    test = calcScore(set1,set2,conjSet,dis)
-                    set1.remove(word)
-                    set2.append(word)
-                    if (test < consideredMin):
-                        consideredMin = test
-                        bestSwapWord = word
-
-                if(bestSwapWord in set1):
-                    set1.remove(bestSwapWord)
-                    set2.append(bestSwapWord)
-                else:
-                    set2.remove(bestSwapWord)
-                    set1.append(bestSwapWord)
-            # Return the optimized sets
-            return set1,set2
-
-        def optimize2(set1,set2,conjSet,defSet,dis):
-            i = 0
-            currentMin = 999999
-            consideredMin = calcScore(set1,conjSet,dis) + calcScore(set2,conjSet,dis)
-            bestSwapWord = None
-            print consideredMin
-            # Calculate the best word to remove until no moves lessen the function
-            while( currentMin > consideredMin):
-                print "Iteration #%d: (%d, %d)" % (i, len(set1), len(set2))
-                currentMin = consideredMin
-                currentS1 = calcScore(set1,conjSet,dis)
-                currentS2 = calcScore(set2,conjSet,dis)
-                consideredMin = currentS1 + currentS2 #
-                for word in set1:
-                    test = calcSwap(word,set1,set2,currentS1,currentS2,conjSet,dis)
-                    if (test < consideredMin):
-                        print "found1"
-                        consideredMin = test
-                        bestSwapWord = word
-                for word in set2:
-                    test = calcSwap(word,set2,set1,currentS2,currentS1,conjSet,dis)
-                    if (test < consideredMin):
-                        print "found2"
-                        consideredMin = test
-                        bestSwapWord = word
-                print "New min: %f" % consideredMin
-
-                if(bestSwapWord in set1):
-                    set1.remove(bestSwapWord)
-                    set2.append(bestSwapWord)
-                elif(bestSwapWord in set2):
-                    set2.remove(bestSwapWord)
-                    set1.append(bestSwapWord)
-                i = i + 1
-
-            # Return the optimized sets
-            return set1,set2
-
-        def cluster(set1,set2,conjSet,defSet,dis):
-            for word in set1:
-                score1 = calcScore(word,set1,conjSet,dis)
-                #print "Score 1: %f" % score1
-                score2 = calcScore(word,set2,conjSet,dis)
-                #print "Score 2: %f" % score2
-                if score2 < score1:
-                    print "swap"
-                    set1.remove(word)
-                    set2.append(word)
-            for word in set2:
-                score1 = calcScore(word,set1,conjSet,dis)
-                score2 = calcScore(word,set2,conjSet,dis)
-                if score1 < score2:
-                    set2.remove(word)
-                    set1.append(word)
-            return set1,set2
-
-        def calcScore(set,conjSet,dis):
-            score = 0
-            for i in range(len(set)):
-                w1 = set[i]
-                for j in range(i, len(set)):
-                    w2 = set[j]
-                    cats = dis[conjSet[w1][0]][conjSet[w2][0]]
-                    score = score + cats
-            return score / len(set)
-
-        def calcSwap(word,currSet,opSet,currentCount,otherCount,conjSet,dis):
-            score1 = 0
-            score2 = 0
-            for w in currSet:
-                if word != w:
-                    cats = dis[conjSet[word][0]][conjSet[w][0]]
-                    score1 = score1 + cats
-            currentCount = ((currentCount* len(currSet)) - score1 )/(len(currSet)-1)
-
-            #for word in set2:
-            for w in opSet:
-                if word != w:
-                    cats = dis[conjSet[word][0]][conjSet[w][0]]
-                    score2 = score2 + cats
-            otherCount = ((otherCount* len(opSet)) + score2 )/(len(opSet)+1)
-
-            return currentCount + otherCount
-
-        def normalize_word(word):
-            return SnowballStemmer("english").stem(word)
-
-        def vectorize(conjSet,defSet):
-            dis = numpy.zeros((len(defSet),len(defSet)))
-            dis.fill(.5)
-            for word in defSet:
-                similar = conjSet[word][1]
-                dissimilar = conjSet[word][2]
-                for sim in similar:
-                    dis[conjSet[word][0]][conjSet[sim][0]] = 0
-                for d in dissimilar:
-                    dis[conjSet[word][0]][conjSet[d][0]] = 1
-            return dis
-
-        def word_feats(words):
-            return dict([(word, True) for word in words])
-
-        def genSets():
-            f = open('words.txt', 'r+')
-            content = f.readlines()
-            positive = Set([])
-            negative = Set([])
-
-            for pair in content:
-                current = pair.split(' ')
-                if (current[1][0] == 'p'):
-                    positive.add(current[0])
-                elif (current[1][0] == 'n'):
-                    negative.add(current[0])
-
-            return positive,negative
-
-        def getConj():
-            # Set up the tuple (index, similar, dissimilar)
-            f = open('conj.txt', 'r+')
-            content = f.readlines()
-            d = dict()
-            i = 0
-            for line in content:
-                current = line.split(' ')
-                if current[2] == "but":
-                    if current[0] in d:
-                        d[current[0]][2].add(current[1])
-                    else:
-                        d[current[0]] = (i,Set(),Set([current[1]]))
-                        i = i+1
-                    if current[1] in d:
-                        d[current[1]][2].add(current[0])
-                    else:
-                        d[current[1]] = (i,Set(),Set([current[0]]))
-                        i = i+1
-                else:
-                    if current[0] in d:
-                        d[current[0]][1].add(current[1])
-                    else:
-                        d[current[0]] = (i,Set([current[1]]),Set())
-                        i = i+1
-                    if current[1] in d:
-                        d[current[1]][1].add(current[0])
-                    else:
-                        d[current[1]] = (i,Set([current[0]]),Set())
-                        i = i+1
-            return d
-
-        #Get the Data#
-        """
-        negids = movie_reviews.fileids('neg')
-        posids = movie_reviews.fileids('pos')
-        training = set(negids[:500] + posids[:500])
-        testing = set(negids[500:] + posids[500:])
-        """
-        # Generate positive and negative initial sets
-        sets = genSets()
-        positive = random.sample(sets[0], min(len(sets[0]), len(sets[1])))
-        negative = random.sample(sets[1], min(len(sets[0]), len(sets[1])))
-
-        # Clustering Setup
-        stopwords = set(nltk.corpus.stopwords.words('english'))
-
-        # Create dictionary (adj, (index, similar, dissimilar))
-        conjSet = getConj()
-
-        # Create list out of all keys of conjSet
-        defSet = conjSet.keys()
-
-        # Generate dissimilarity matrix
-        dis = vectorize(conjSet,defSet)
-
-        # Its Cluster time
-        set1 = defSet[len(defSet)//2:]
-        set2 = defSet[:len(defSet)//2]
-        """
-        set1 = random.sample(defSet, len(defSet)//4)
-        set2 = [x for x in defSet if x not in set1]
-        """
-        # Optimize objective function
-        sets = optimize2(set1,set2,conjSet,defSet,dis)
-        set1 = sets[0]
-        set2 = sets[1]
-        
-        print(set1)
-        print(set2)
-        f1 = open('set1.txt', 'w+')
-        f2 = open('set2.txt', 'w+')
+def optimize(set1,set2,conjSet,defSet,dis):
+    i = 0
+    currentMin = 999999
+    consideredMin = calcScore(set1,set2,conjSet,dis)
+    bestSwapWord = ""
+    # Calculate the best word to remove until no moves lessen the function
+    while( currentMin > consideredMin):
+        print i
+        i = i + 1
+        currentMin = consideredMin
         for word in set1:
-            f1.write(word + "\n")
+            set1.remove(word)
+            set2.append(word)
+            test = calcScore(set1,set2,conjSet,dis)
+            set2.remove(word)
+            set1.append(word)
+            if (test < consideredMin):
+                consideredMin = test
+                bestSwapWord = word
         for word in set2:
-            f2.write(word + "\n")
-        f1.close()
-        f2.close()
+            set2.remove(word)
+            set1.append(word)
+            test = calcScore(set1,set2,conjSet,dis)
+            set1.remove(word)
+            set2.append(word)
+            if (test < consideredMin):
+                consideredMin = test
+                bestSwapWord = word
 
-        # Can we classify and then run bag of words?
-        #negfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
-        #posfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
-        #trainfeats = [({word : True},"pos") for word in positive] + [({word : True},"neg") for word in negative]
-        #testfeats = negfeats[500:] + posfeats[500:]
-        #classifier1 = NaiveBayesClassifier.train(trainfeats)
-        #print 'accuracy:', nltk.classify.util.accuracy(classifier1,({"Bad": True},"neg"))
+        if(bestSwapWord in set1):
+            set1.remove(bestSwapWord)
+            set2.append(bestSwapWord)
+        else:
+            set2.remove(bestSwapWord)
+            set1.append(bestSwapWord)
+    # Return the optimized sets
+    return set1,set2
 
+def optimize2(set1,set2,conjSet,defSet,dis):
+    i = 0
+    currentMin = 999999
+    consideredMin = calcScore(set1,conjSet,dis) + calcScore(set2,conjSet,dis)
+    bestSwapWord = None
+    # Calculate the best word to remove until no moves lessen the function
+    while( currentMin > consideredMin):
+        currentMin = consideredMin
+        currentS1 = calcScore(set1,conjSet,dis)
+        currentS2 = calcScore(set2,conjSet,dis)
+        consideredMin = currentS1 + currentS2 #
+        for word in set1:
+            test = calcSwap(word,set1,set2,currentS1,currentS2,conjSet,dis)
+            if (test < consideredMin):
+                consideredMin = test
+                bestSwapWord = word
+        for word in set2:
+            test = calcSwap(word,set2,set1,currentS2,currentS1,conjSet,dis)
+            if (test < consideredMin):
+                consideredMin = test
+                bestSwapWord = word
+
+        if(bestSwapWord in set1):
+            set1.remove(bestSwapWord)
+            set2.append(bestSwapWord)
+        elif(bestSwapWord in set2):
+            set2.remove(bestSwapWord)
+            set1.append(bestSwapWord)
+        i = i + 1
+
+    # Return the optimized sets
+    return set1,set2
+
+def constraintSwap(set1,set2,conjSet,defSet,dis):
+    for word in set1:
+        stay = 0
+        swap = 0
+        for otherword in set1:
+            if otherword != word:
+                cats = dis[conjSet[word][0]][conjSet[otherword][0]]
+                stay = stay + cats
+        stay = stay * (1/(len(set1)-1))
+        for otherword in set2:
+            if otherword != word:
+                cats = dis[conjSet[word][0]][conjSet[otherword][0]]
+                swap = swap + cats
+        swap = swap * (1/(len(set2)))
+        if(stay > swap):
+            set1.remove(word)
+            set2.append(word)
+
+    for word in set2:
+        stay = 0
+        swap = 0
+        for otherword in set2:
+            if otherword != word:
+                cats = dis[conjSet[word][0]][conjSet[otherword][0]]
+                stay = stay + cats
+        stay = stay * (1/(len(set2)-1))
+        for otherword in set1:
+            if otherword != word:
+                cats = dis[conjSet[word][0]][conjSet[otherword][0]]
+                swap = swap + cats
+        swap = swap * (1/(len(set1)))
+        if(stay > swap):
+            set2.remove(word)
+            set1.append(word)
+    return set1,set2
+
+
+def calcScore(set,conjSet,dis):
+    score = 0
+    for i in range(len(set)):
+        w1 = set[i]
+        for j in range(i, len(set)):
+            w2 = set[j]
+            cats = dis[conjSet[w1][0]][conjSet[w2][0]]
+            score = score + cats
+    return score / len(set)
+
+def calcSwap(word,currSet,opSet,currentCount,otherCount,conjSet,dis):
+    score1 = 0
+    score2 = 0
+    for w in currSet:
+        if word != w:
+            cats = dis[conjSet[word][0]][conjSet[w][0]]
+            score1 = score1 + cats
+    currentCount = ((currentCount* len(currSet)) - score1 )/(len(currSet)-1)
+
+    #for word in set2:
+    for w in opSet:
+        if word != w:
+            cats = dis[conjSet[word][0]][conjSet[w][0]]
+            score2 = score2 + cats
+    otherCount = ((otherCount* len(opSet)) + score2 )/(len(opSet)+1)
+
+    return currentCount + otherCount
+
+def normalize_word(word):
+    return SnowballStemmer("english").stem(word)
+
+def vectorize(conjSet,defSet):
+    dis = numpy.zeros((len(defSet),len(defSet)))
+    dis.fill(.5)
+    for word in defSet:
+        similar = conjSet[word][1]
+        dissimilar = conjSet[word][2]
+        for sim in similar:
+            dis[conjSet[word][0]][conjSet[sim][0]] = 0
+        for d in dissimilar:
+            dis[conjSet[word][0]][conjSet[d][0]] = 1
+    return dis
+
+def word_feats(words):
+    return dict([(word, True) for word in words])
+
+def genSets():
+    f = open('words.txt', 'r+')
+    content = f.readlines()
+    positive = Set([])
+    negative = Set([])
+
+    for pair in content:
+        current = pair.split(' ')
+        if (current[1][0] == 'p'):
+            positive.add(current[0])
+        elif (current[1][0] == 'n'):
+            negative.add(current[0])
+
+    return positive,negative
+
+def getConj():
+    # Set up the tuple (index, similar, dissimilar)
+    f = open('conj.txt', 'r+')
+    content = f.readlines()
+    d = dict()
+    i = 0
+    for line in content:
+        current = line.split(' ')
+        if current[2] == "but":
+            if current[0] in d:
+                d[current[0]][2].add(current[1])
+            else:
+                d[current[0]] = (i,Set(),Set([current[1]]))
+                i = i+1
+            if current[1] in d:
+                d[current[1]][2].add(current[0])
+            else:
+                d[current[1]] = (i,Set(),Set([current[0]]))
+                i = i+1
+        else:
+            if current[0] in d:
+                d[current[0]][1].add(current[1])
+            else:
+                d[current[0]] = (i,Set([current[1]]),Set())
+                i = i+1
+            if current[1] in d:
+                d[current[1]][1].add(current[0])
+            else:
+                d[current[1]] = (i,Set([current[0]]),Set())
+                i = i+1
+    return d
+
+def findFrequency(set1,set2):
+    set1Freq = 0
+    set2Freq = 0
+
+    for word in brown.words():
+        set1Freq = (set1Freq+1) if (word in set1) else set1Freq
+        set2Freq = (set2Freq+1) if (word in set2) else set2Freq
+
+    return set1Freq, set2Freq
+
+def conjunctionData(set1,set2):
+    f = open('conj.txt', 'r+')
+    content = f.readlines()
+    totalConj = 0
+    totalbuts = 0
+    correctbuts = 0
+    totalands = 0
+    correctands = 0
+    totalors = 0
+    correctors = 0
+    totalnors =0
+    correctnors = 0
+    for line in content:
+        totalConj = totalConj +1
+        current = line.split(' ')
+        if current[2] == "but":
+            totalbuts = totalbuts +1
+            if( (current[0] in set1 and current[1] in set2) or (current[0] in set2 and current[1] in set1) ):
+                correctbuts = correctbuts +1
+        elif current[2] == "and":
+            totalands = totalands +1
+            if( (current[0] in set1 and current[1] in set1) or (current[0] in set2 and current[1] in set2) ):
+                correctands = correctands +1
+        elif current[2] == "or":
+            totalors = totalors +1
+            if( (current[0] in set1 and current[1] in set1) or (current[0] in set2 and current[1] in set2) ):
+                correctors = correctors +1
+        elif current[2] == "nor":
+            totalnors = totalnors +1
+            if( (current[0] in set1 and current[1] in set1) or (current[0] in set2 and current[1] in set2) ):
+                correctnors = correctnors +1
+    print "Total Conjunctions: %d" % totalConj
+    print "Total ands: %d \n Ands in same set: %d" % (totalands,correctands)
+    print "Total ors: %d \n Ors in same set: %d" % (totalors,correctors)
+    print "Total nors: %d \n Nors in same set: %d" % (totalnors,correctnors)
+    print "Total buts: %d \n Buts in opposite sets: %d" % (totalbuts,correctbuts)
+
+def returnCBLexicon():
+    # Generate positive and negative initial sets
+    sets = genSets()
+    positive = random.sample(sets[0], min(len(sets[0]), len(sets[1])))
+    negative = random.sample(sets[1], min(len(sets[0]), len(sets[1])))
+
+    # Clustering Setup
+    stopwords = set(nltk.corpus.stopwords.words('english'))
+
+    # Create dictionary (adj, (index, similar, dissimilar))
+    conjSet = getConj()
+
+    # Create list out of all keys of conjSet
+    defSet = conjSet.keys()
+
+    # Generate dissimilarity matrix
+    dis = vectorize(conjSet,defSet)
+
+    # Its Cluster time
+    set1 = defSet[len(defSet)//2:]
+    set2 = defSet[:len(defSet)//2]
+    """
+    set1 = random.sample(defSet, len(defSet)//4)
+    set2 = [x for x in defSet if x not in set1]
+    """
+    # Optimize objective function
+    (set1,set2) = optimize2(set1,set2,conjSet,defSet,dis)
+    # Check the constraint
+    #(set1,set2) = constraintSwap(set1,set2,conjSet,defSet,dis)
+
+    #f1 = open('set1.txt', 'w+')
+    #f2 = open('set2.txt', 'w+')
+    #for word in set1:
+    #    f1.write(word + "\n")
+    #for word in set2:
+    #    f2.write(word + "\n")
+    #f1.close()
+    #f2.close()
+
+    #Find which set has a higher frequency in the training set
+    (set1Freq,set2Freq) = findFrequency(set1,set2)
+
+    positive = set1 if (set1Freq>set2Freq) else set2
+    negative = set1 if (set1Freq<set2Freq) else set2
+
+    print "Positive set of length %d" % len(positive)
+    #print positive
+    print "Negative set of length %d" % len(negative)
 
+    conjunctionData(set1,set2)
 
-cblexicon().process()
\ No newline at end of file
+    # Generate Dictionary in correct format
+    lexicon = dict([(word,1) for word in positive])
+    lexicon.update(dict([(word,-1) for word in negative]))
+    return lexicon