Working

job13011 · Mar 29, 2016 · eb7a6d0 · eb7a6d0
1 parent 6855e36
commit eb7a6d0
Show file tree

Hide file tree

Showing 2 changed files with 51 additions and 33 deletions.
diff --git a/GlossBayes.py b/GlossBayes.py
@@ -12,7 +12,6 @@ class Solver:
     def demo(self):
         def word_feats(words):
             return dict([(word, True) for word in words])
-
         def expand_sets(positive,negative,neutral):
             newPositive = set(positive)
             newNegative = set(negative)
@@ -22,21 +21,36 @@ def expand_sets(positive,negative,neutral):
                 for syn in wn.synsets(word, pos=wn.ADJ):
                     for lemma in syn.lemmas():
                         curr = lemma.name().split('.')[0]
-                        if( curr not in newPositive and curr not in newNegative and curr not in newNeutral):
+                        if(curr not in newNegative and curr not in newNeutral):
                             newPositive.add(curr)
                         elif( curr in newNegative):
                             newNegative.discard(curr)
                             newNeutral.add(curr)
+                        # Deal with antonyms
+                        for ant in lemma.antonyms():
+                            if(ant not in newPositive and ant not in newNeutral):
+                                newNegative.add(ant)
+                            elif(ant in newPositive):
+                                newPositive.discard(ant)
+                                newNeutral.add(ant)
             # Add Syns to Negative
             for word in negative:
                 for syn in wn.synsets(word, pos=wn.ADJ):
                     for lemma in syn.lemmas():
                         curr = lemma.name().split('.')[0]
-                        if( curr not in newPositive and curr not in newNegative and curr not in newNeutral):
+                        print curr
+                        if(curr not in newPositive and curr not in newNeutral):
                             newNegative.add(curr)
                         elif(curr in newPositive):
                             newPositive.discard(curr)
                             newNeutral.add(curr)
+                        # Deal with antonyms
+                        for ant in lemma.antonyms():
+                            if(ant not in newNegative and ant not in newNeutral):
+                                newPositive.add(ant)
+                            elif(ant in newNegative):
+                                newNegative.discard(ant)
+                                newNeutral.add(ant)
             return (newPositive,newNegative,newNeutral)
 
         # Set up initial Sets S_p and S_n
@@ -45,36 +59,38 @@ def expand_sets(positive,negative,neutral):
         neutral = Set([''])
 
         # Expand on Sets to get S_p' and S_n'
-        for num in range(1,3):
+        for num in range(1,2):
             newsets = expand_sets(positive,negative,neutral);
             positive = set(newsets[0])
             negative = set(newsets[1])
             neutral = set(newsets[2])
-
-        # Learn Classifier
-        trainfeats = [({word : True},"pos") for word in positive] + [({word : True},"neg") for word in negative]
-
-        #negfeats = [({'insulting': True},'neg'),({'bad':True},'neg')]
-
-        #trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
-        classifier = NaiveBayesClassifier.train(trainfeats)
-
-
-        # Testing
-        negids = movie_reviews.fileids('neg')
-        posids = movie_reviews.fileids('pos')
-
-        negfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
-        posfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
-        negcutoff = len(negfeats)*3/4
-        poscutoff = len(posfeats)*3/4
-
-        testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
-
-        print 'Dictionary of %d positive words and %d negative words, tested on %d instances' % (len(positive),len(negative), len(testfeats))
-
-        print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
-        classifier.show_most_informative_features()
+            print positive
+            print negative
+
+        # # Learn Classifier
+        # trainfeats = [({word : True},"pos") for word in positive] + [({word : True},"neg") for word in negative]
+        #
+        # #negfeats = [({'insulting': True},'neg'),({'bad':True},'neg')]
+        #
+        # #trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
+        # classifier = NaiveBayesClassifier.train(trainfeats)
+        #
+        #
+        # # Testing
+        # negids = movie_reviews.fileids('neg')
+        # posids = movie_reviews.fileids('pos')
+        #
+        # negfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
+        # posfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
+        # negcutoff = len(negfeats)*3/4
+        # poscutoff = len(posfeats)*3/4
+        #
+        # testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
+        #
+        # print 'Dictionary of %d positive words and %d negative words, tested on %d instances' % (len(positive),len(negative), len(testfeats))
+        #
+        # print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
+        # classifier.show_most_informative_features()
 
 
     #text = nltk.word_tokenize("And now for a production unlike any other a very fuzzy and cute dog")

diff --git a/GlossCount.py b/GlossCount.py
@@ -27,7 +27,7 @@ def expand_sets(positive,negative,neutral):
                         for syn in wn.synsets(word, pos=wn.ADJ):
                             for lemma in syn.lemmas():
                                 curr = lemma.name().split('.')[0]
-                                if( curr not in newPositive and curr not in newNegative and curr not in newNeutral):
+                                if(curr not in newNegative and curr not in newNeutral):
                                     newPositive.add(curr)
                                 elif( curr in newNegative):
                                     newNegative.discard(curr)
@@ -37,7 +37,7 @@ def expand_sets(positive,negative,neutral):
                         for syn in wn.synsets(word, pos=wn.ADJ):
                             for lemma in syn.lemmas():
                                 curr = lemma.name().split('.')[0]
-                                if( curr not in newPositive and curr not in newNegative and curr not in newNeutral):
+                                if( curr not in newPositive and curr not in newNeutral):
                                     newNegative.add(curr)
                                 elif(curr in newPositive):
                                     newPositive.discard(curr)
@@ -50,7 +50,7 @@ def expand_sets(positive,negative,neutral):
         neutral = Set([''])
 
         # Expand on Sets to get S_p' and S_n'
-        for num in range(1,2):
+        for num in range(1,3):
             newsets = expand_sets(positive,negative,neutral);
             positive = set(newsets[0])
             negative = set(newsets[1])
@@ -63,12 +63,14 @@ def expand_sets(positive,negative,neutral):
         classifier = NaiveBayesClassifier.train(trainfeats)
         #print classifier.classify(dict([(word, True) for word in words]))
         #print classifier.classify(dict([("bad",True),("bad",True)]))
+
+
         # Iterate through all of the reviews and find sentiment
         count = 0.00
         correct = 0.00
         for reviews in movie_reviews.fileids():     #For every review
             score = 0;
-            tokens = nltk.pos_tag(nltk.word_tokenize(movie_reviews.raw(fileids=[reviews])))     #Tokenize all words
+            tokens = nltk.pos_tag(nltk.word_tokenize(movie_reviews.raw(fileids=[reviews])))     #Tokenize all words with POS
             for token in tokens:
                 if (token[1]== "JJ" or token[1] == "JJR" or token[1] == "JJS"):         # If adjective, check value
                     sent_value = classifier.classify(dict([(token[0], True)]))