Skip to content

Commit

Permalink
Glossary Based Sentiment Analysis added
Browse files Browse the repository at this point in the history
Accuracy of about 53% for the bayes, counting not functional. Will work
on optimizing.
  • Loading branch information
adl13006 committed Mar 16, 2016
1 parent 3e2c889 commit ac01541
Show file tree
Hide file tree
Showing 2 changed files with 182 additions and 0 deletions.
94 changes: 94 additions & 0 deletions GlossBayes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import math
import nltk
from nltk.corpus import wordnet as wn
import nltk.classify.util
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import movie_reviews
from sets import Set


class Solver:

def demo(self):
def word_feats(words):
return dict([(word, True) for word in words])

def expand_sets(positive,negative,neutral):
newPositive = set(positive)
newNegative = set(negative)
newNeutral = set(neutral)
# Add Syns to Positive
for word in positive:
for syn in wn.synsets(word, pos=wn.ADJ):
for lemma in syn.lemmas():
curr = lemma.name().split('.')[0]
if( curr not in newPositive and curr not in newNegative and curr not in newNeutral):
newPositive.add(curr)
elif( curr in newNegative):
newNegative.discard(curr)
newNeutral.add(curr)
# Add Syns to Negative
for word in negative:
for syn in wn.synsets(word, pos=wn.ADJ):
for lemma in syn.lemmas():
curr = lemma.name().split('.')[0]
if( curr not in newPositive and curr not in newNegative and curr not in newNeutral):
newNegative.add(curr)
elif(curr in newPositive):
newPositive.discard(curr)
newNeutral.add(curr)
return (newPositive,newNegative,newNeutral)

# Set up initial Sets S_p and S_n
positive = Set(['Good'])
negative = Set(['Bad'])
neutral = Set([''])

# Expand on Sets to get S_p' and S_n'
for num in range(1,3):
newsets = expand_sets(positive,negative,neutral);
positive = set(newsets[0])
negative = set(newsets[1])
neutral = set(newsets[2])

# Learn Classifier
trainfeats = [({word : True},"pos") for word in positive] + [({word : True},"neg") for word in negative]

#negfeats = [({'insulting': True},'neg'),({'bad':True},'neg')]

#trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
classifier = NaiveBayesClassifier.train(trainfeats)


# Testing
negids = movie_reviews.fileids('neg')
posids = movie_reviews.fileids('pos')

negfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
posfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
negcutoff = len(negfeats)*3/4
poscutoff = len(posfeats)*3/4

testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]

print 'Dictionary of %d positive words and %d negative words, tested on %d instances' % (len(positive),len(negative), len(testfeats))

print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
classifier.show_most_informative_features()


#text = nltk.word_tokenize("And now for a production unlike any other a very fuzzy and cute dog")
#print(text)
#text = nltk.pos_tag(text)
#print(text)
#for token in text:
# if(token[1] == "JJ" or token[1] == "JJR" or token[1] == "JJS"):
# print(wn.synsets(token[0]))


Solver().demo()





88 changes: 88 additions & 0 deletions GlossCount.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import math
import nltk
from nltk.corpus import wordnet as wn
import nltk.classify.util
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import movie_reviews
from sets import Set

class GlossCount:
def demo(self):
def value_of(sentiment):
if sentiment == 'pos': return 1
if sentiment == 'neg': return -1
return 0

def sentiment_score(review):
return 0

#return sum ([value_of(tag) for sentence in dict_tagged_sentences for token in sentence for tag in token[2]])

def expand_sets(positive,negative,neutral):
newPositive = set(positive)
newNegative = set(negative)
newNeutral = set(neutral)
# Add Syns to Positive
for word in positive:
for syn in wn.synsets(word, pos=wn.ADJ):
for lemma in syn.lemmas():
curr = lemma.name().split('.')[0]
if( curr not in newPositive and curr not in newNegative and curr not in newNeutral):
newPositive.add(curr)
elif( curr in newNegative):
newNegative.discard(curr)
newNeutral.add(curr)
# Add Syns to Negative
for word in negative:
for syn in wn.synsets(word, pos=wn.ADJ):
for lemma in syn.lemmas():
curr = lemma.name().split('.')[0]
if( curr not in newPositive and curr not in newNegative and curr not in newNeutral):
newNegative.add(curr)
elif(curr in newPositive):
newPositive.discard(curr)
newNeutral.add(curr)
return (newPositive,newNegative,newNeutral)

# Set up initial Sets S_p and S_n
positive = Set(['Good'])
negative = Set(['Bad'])
neutral = Set([''])

# Expand on Sets to get S_p' and S_n'
for num in range(1,2):
newsets = expand_sets(positive,negative,neutral);
positive = set(newsets[0])
negative = set(newsets[1])
neutral = set(newsets[2])

# Learn Classifier
trainfeats = [({word : True},"pos") for word in positive] + [({word : True},"neg") for word in negative]
classifier = NaiveBayesClassifier.train(trainfeats)
print "cat"
#print classifier.classify(dict([(word, True) for word in words]))
print classifier.classify(dict([("bad",True),("bad",True)]))
# Iterate through all of the reviews and find sentiment
count = 0.00
correct = 0.00
for reviews in movie_reviews.fileids():
score = 0;
for words in movie_reviews.words(fileids=[reviews]):
if()
sent_value = classifier.classify(dict([(word, True)]))
if(sent_value is 'neg'):
score = score - 1
elif(sent_value is 'pos'):
score = score + 1
if (score < 0):
print "Negative at %d" % (score)
sentiment = 'neg'
else:
sentiment = 'pos'
print "Positive at %d" % (score)
if (sentiment == movie_reviews.categories(fileids=[reviews])[0]):
correct = correct + 1.00
count = count + 1.00
print correct/count

GlossCount().demo()

0 comments on commit ac01541

Please sign in to comment.