Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
94 lines (82 sloc) 3.84 KB
import math
import nltk
from nltk.corpus import wordnet as wn
import nltk.classify.util
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import movie_reviews
from sets import Set
class GlossCount:
def demo(self):
def value_of(sentiment):
if sentiment == 'pos': return 1
if sentiment == 'neg': return -1
return 0
def sentiment_score(review):
return 0
#return sum ([value_of(tag) for sentence in dict_tagged_sentences for token in sentence for tag in token[2]])
def expand_sets(positive,negative,neutral):
newPositive = set(positive)
newNegative = set(negative)
newNeutral = set(neutral)
# Add Syns to Positive
for word in positive:
for syn in wn.synsets(word, pos=wn.ADJ):
for lemma in syn.lemmas():
curr = lemma.name().split('.')[0]
if(curr not in newNegative and curr not in newNeutral):
newPositive.add(curr)
elif( curr in newNegative):
newNegative.discard(curr)
newNeutral.add(curr)
# Add Syns to Negative
for word in negative:
for syn in wn.synsets(word, pos=wn.ADJ):
for lemma in syn.lemmas():
curr = lemma.name().split('.')[0]
if( curr not in newPositive and curr not in newNeutral):
newNegative.add(curr)
elif(curr in newPositive):
newPositive.discard(curr)
newNeutral.add(curr)
return (newPositive,newNegative,newNeutral)
# Set up initial Sets S_p and S_n
positive = Set(['Good'])
negative = Set(['Bad'])
neutral = Set([''])
# Expand on Sets to get S_p' and S_n'
for num in range(1,3):
newsets = expand_sets(positive,negative,neutral);
positive = set(newsets[0])
negative = set(newsets[1])
neutral = set(newsets[2])
# Learn Classifier
trainfeats = [({word : True},"pos") for word in positive] + [({word : True},"neg") for word in negative]
classifier = NaiveBayesClassifier.train(trainfeats)
print "cat"
#print classifier.classify(dict([(word, True) for word in words]))
print classifier.classify(dict([("bad",True),("bad",True)]))
# Iterate through all of the reviews and find sentiment
count = 0.00
correct = 0.00
for reviews in movie_reviews.fileids(): #For every review
score = 0;
tokens = nltk.pos_tag(nltk.word_tokenize(movie_reviews.raw(fileids=[reviews]))) #Tokenize all words with POS
for token in tokens:
if (token[1]== "JJ" or token[1] == "JJR" or token[1] == "JJS"): # If adjective, check value
sent_value = classifier.classify(dict([(token[0], True)]))
if(sent_value is 'neg'):
score = score - 1
elif(sent_value is 'pos'):
score = score + 1
if (score < 0):
print "Negative at %d" % (score)
sentiment = 'neg'
else:
sentiment = 'pos'
print "Positive at %d" % (score)
if (sentiment == movie_reviews.categories(fileids=[reviews])[0]):
print "Correct"
correct = correct + 1.00
count = count + 1.00
print correct/count
GlossCount().demo()
You can’t perform that action at this time.