import math
import nltk
from nltk.corpus import wordnet as wn
import nltk.classify.util
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import movie_reviews
from sets import Set
class GlossCount:
def demo(self):
def value_of(sentiment):
if sentiment == 'pos': return 1
if sentiment == 'neg': return -1
return 0
def sentiment_score(review):
return 0
#return sum ([value_of(tag) for sentence in dict_tagged_sentences for token in sentence for tag in token[2]])
def expand_sets(positive,negative,neutral):
newPositive = set(positive)
newNegative = set(negative)
newNeutral = set(neutral)
# Add Syns to Positive
for word in positive:
for syn in wn.synsets(word, pos=wn.ADJ):
for lemma in syn.lemmas():
curr ='.')[0]
if(curr not in newNegative and curr not in newNeutral):
elif( curr in newNegative):
# Add Syns to Negative
for word in negative:
for syn in wn.synsets(word, pos=wn.ADJ):
for lemma in syn.lemmas():
curr ='.')[0]
if( curr not in newPositive and curr not in newNeutral):
elif(curr in newPositive):
return (newPositive,newNegative,newNeutral)
# Set up initial Sets S_p and S_n
positive = Set(['Good'])
negative = Set(['Bad'])
neutral = Set([''])
# Expand on Sets to get S_p' and S_n'
for num in range(1,3):
newsets = expand_sets(positive,negative,neutral);
positive = set(newsets[0])
negative = set(newsets[1])
neutral = set(newsets[2])
# Learn Classifier
trainfeats = [({word : True},"pos") for word in positive] + [({word : True},"neg") for word in negative]
classifier = NaiveBayesClassifier.train(trainfeats)
print "cat"
#print classifier.classify(dict([(word, True) for word in words]))
print classifier.classify(dict([("bad",True),("bad",True)]))
# Iterate through all of the reviews and find sentiment
count = 0.00
correct = 0.00
for reviews in movie_reviews.fileids(): #For every review
score = 0;
tokens = nltk.pos_tag(nltk.word_tokenize(movie_reviews.raw(fileids=[reviews]))) #Tokenize all words with POS
for token in tokens:
if (token[1]== "JJ" or token[1] == "JJR" or token[1] == "JJS"): # If adjective, check value
sent_value = classifier.classify(dict([(token[0], True)]))
if(sent_value is 'neg'):
score = score - 1
elif(sent_value is 'pos'):
score = score + 1
if (score < 0):
print "Negative at %d" % (score)
sentiment = 'neg'
sentiment = 'pos'
print "Positive at %d" % (score)
if (sentiment == movie_reviews.categories(fileids=[reviews])[0]):
print "Correct"
correct = correct + 1.00
count = count + 1.00
print correct/count