From eb7a6d06f0a9f04ea142acd85261b9efaae5b33b Mon Sep 17 00:00:00 2001 From: Antonia Lewis Date: Thu, 17 Mar 2016 14:43:28 -0400 Subject: [PATCH] Working --- GlossBayes.py | 74 +++++++++++++++++++++++++++++++-------------------- GlossCount.py | 10 ++++--- 2 files changed, 51 insertions(+), 33 deletions(-) diff --git a/GlossBayes.py b/GlossBayes.py index 9983f91..b278285 100644 --- a/GlossBayes.py +++ b/GlossBayes.py @@ -12,7 +12,6 @@ class Solver: def demo(self): def word_feats(words): return dict([(word, True) for word in words]) - def expand_sets(positive,negative,neutral): newPositive = set(positive) newNegative = set(negative) @@ -22,21 +21,36 @@ class Solver: for syn in wn.synsets(word, pos=wn.ADJ): for lemma in syn.lemmas(): curr = lemma.name().split('.')[0] - if( curr not in newPositive and curr not in newNegative and curr not in newNeutral): + if(curr not in newNegative and curr not in newNeutral): newPositive.add(curr) elif( curr in newNegative): newNegative.discard(curr) newNeutral.add(curr) + # Deal with antonyms + for ant in lemma.antonyms(): + if(ant not in newPositive and ant not in newNeutral): + newNegative.add(ant) + elif(ant in newPositive): + newPositive.discard(ant) + newNeutral.add(ant) # Add Syns to Negative for word in negative: for syn in wn.synsets(word, pos=wn.ADJ): for lemma in syn.lemmas(): curr = lemma.name().split('.')[0] - if( curr not in newPositive and curr not in newNegative and curr not in newNeutral): + print curr + if(curr not in newPositive and curr not in newNeutral): newNegative.add(curr) elif(curr in newPositive): newPositive.discard(curr) newNeutral.add(curr) + # Deal with antonyms + for ant in lemma.antonyms(): + if(ant not in newNegative and ant not in newNeutral): + newPositive.add(ant) + elif(ant in newNegative): + newNegative.discard(ant) + newNeutral.add(ant) return (newPositive,newNegative,newNeutral) # Set up initial Sets S_p and S_n @@ -45,36 +59,38 @@ class Solver: neutral = Set(['']) # Expand on Sets to get S_p' and S_n' - for num in range(1,3): + for num in range(1,2): newsets = expand_sets(positive,negative,neutral); positive = set(newsets[0]) negative = set(newsets[1]) neutral = set(newsets[2]) - - # Learn Classifier - trainfeats = [({word : True},"pos") for word in positive] + [({word : True},"neg") for word in negative] - - #negfeats = [({'insulting': True},'neg'),({'bad':True},'neg')] - - #trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff] - classifier = NaiveBayesClassifier.train(trainfeats) - - - # Testing - negids = movie_reviews.fileids('neg') - posids = movie_reviews.fileids('pos') - - negfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in negids] - posfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in posids] - negcutoff = len(negfeats)*3/4 - poscutoff = len(posfeats)*3/4 - - testfeats = negfeats[negcutoff:] + posfeats[poscutoff:] - - print 'Dictionary of %d positive words and %d negative words, tested on %d instances' % (len(positive),len(negative), len(testfeats)) - - print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats) - classifier.show_most_informative_features() + print positive + print negative + + # # Learn Classifier + # trainfeats = [({word : True},"pos") for word in positive] + [({word : True},"neg") for word in negative] + # + # #negfeats = [({'insulting': True},'neg'),({'bad':True},'neg')] + # + # #trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff] + # classifier = NaiveBayesClassifier.train(trainfeats) + # + # + # # Testing + # negids = movie_reviews.fileids('neg') + # posids = movie_reviews.fileids('pos') + # + # negfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in negids] + # posfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in posids] + # negcutoff = len(negfeats)*3/4 + # poscutoff = len(posfeats)*3/4 + # + # testfeats = negfeats[negcutoff:] + posfeats[poscutoff:] + # + # print 'Dictionary of %d positive words and %d negative words, tested on %d instances' % (len(positive),len(negative), len(testfeats)) + # + # print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats) + # classifier.show_most_informative_features() #text = nltk.word_tokenize("And now for a production unlike any other a very fuzzy and cute dog") diff --git a/GlossCount.py b/GlossCount.py index 6180ae7..580f2a5 100644 --- a/GlossCount.py +++ b/GlossCount.py @@ -27,7 +27,7 @@ class GlossCount: for syn in wn.synsets(word, pos=wn.ADJ): for lemma in syn.lemmas(): curr = lemma.name().split('.')[0] - if( curr not in newPositive and curr not in newNegative and curr not in newNeutral): + if(curr not in newNegative and curr not in newNeutral): newPositive.add(curr) elif( curr in newNegative): newNegative.discard(curr) @@ -37,7 +37,7 @@ class GlossCount: for syn in wn.synsets(word, pos=wn.ADJ): for lemma in syn.lemmas(): curr = lemma.name().split('.')[0] - if( curr not in newPositive and curr not in newNegative and curr not in newNeutral): + if( curr not in newPositive and curr not in newNeutral): newNegative.add(curr) elif(curr in newPositive): newPositive.discard(curr) @@ -50,7 +50,7 @@ class GlossCount: neutral = Set(['']) # Expand on Sets to get S_p' and S_n' - for num in range(1,2): + for num in range(1,3): newsets = expand_sets(positive,negative,neutral); positive = set(newsets[0]) negative = set(newsets[1]) @@ -63,12 +63,14 @@ class GlossCount: classifier = NaiveBayesClassifier.train(trainfeats) #print classifier.classify(dict([(word, True) for word in words])) #print classifier.classify(dict([("bad",True),("bad",True)])) + + # Iterate through all of the reviews and find sentiment count = 0.00 correct = 0.00 for reviews in movie_reviews.fileids(): #For every review score = 0; - tokens = nltk.pos_tag(nltk.word_tokenize(movie_reviews.raw(fileids=[reviews]))) #Tokenize all words + tokens = nltk.pos_tag(nltk.word_tokenize(movie_reviews.raw(fileids=[reviews]))) #Tokenize all words with POS for token in tokens: if (token[1]== "JJ" or token[1] == "JJR" or token[1] == "JJS"): # If adjective, check value sent_value = classifier.classify(dict([(token[0], True)]))