Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Working
  • Loading branch information
adl13006 committed Mar 29, 2016
1 parent 6855e36 commit eb7a6d0
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 33 deletions.
74 changes: 45 additions & 29 deletions GlossBayes.py
Expand Up @@ -12,7 +12,6 @@ class Solver:
def demo(self):
def word_feats(words):
return dict([(word, True) for word in words])

def expand_sets(positive,negative,neutral):
newPositive = set(positive)
newNegative = set(negative)
Expand All @@ -22,21 +21,36 @@ class Solver:
for syn in wn.synsets(word, pos=wn.ADJ):
for lemma in syn.lemmas():
curr = lemma.name().split('.')[0]
if( curr not in newPositive and curr not in newNegative and curr not in newNeutral):
if(curr not in newNegative and curr not in newNeutral):
newPositive.add(curr)
elif( curr in newNegative):
newNegative.discard(curr)
newNeutral.add(curr)
# Deal with antonyms
for ant in lemma.antonyms():
if(ant not in newPositive and ant not in newNeutral):
newNegative.add(ant)
elif(ant in newPositive):
newPositive.discard(ant)
newNeutral.add(ant)
# Add Syns to Negative
for word in negative:
for syn in wn.synsets(word, pos=wn.ADJ):
for lemma in syn.lemmas():
curr = lemma.name().split('.')[0]
if( curr not in newPositive and curr not in newNegative and curr not in newNeutral):
print curr
if(curr not in newPositive and curr not in newNeutral):
newNegative.add(curr)
elif(curr in newPositive):
newPositive.discard(curr)
newNeutral.add(curr)
# Deal with antonyms
for ant in lemma.antonyms():
if(ant not in newNegative and ant not in newNeutral):
newPositive.add(ant)
elif(ant in newNegative):
newNegative.discard(ant)
newNeutral.add(ant)
return (newPositive,newNegative,newNeutral)

# Set up initial Sets S_p and S_n
Expand All @@ -45,36 +59,38 @@ class Solver:
neutral = Set([''])

# Expand on Sets to get S_p' and S_n'
for num in range(1,3):
for num in range(1,2):
newsets = expand_sets(positive,negative,neutral);
positive = set(newsets[0])
negative = set(newsets[1])
neutral = set(newsets[2])

# Learn Classifier
trainfeats = [({word : True},"pos") for word in positive] + [({word : True},"neg") for word in negative]

#negfeats = [({'insulting': True},'neg'),({'bad':True},'neg')]

#trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
classifier = NaiveBayesClassifier.train(trainfeats)


# Testing
negids = movie_reviews.fileids('neg')
posids = movie_reviews.fileids('pos')

negfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
posfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
negcutoff = len(negfeats)*3/4
poscutoff = len(posfeats)*3/4

testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]

print 'Dictionary of %d positive words and %d negative words, tested on %d instances' % (len(positive),len(negative), len(testfeats))

print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
classifier.show_most_informative_features()
print positive
print negative

# # Learn Classifier
# trainfeats = [({word : True},"pos") for word in positive] + [({word : True},"neg") for word in negative]
#
# #negfeats = [({'insulting': True},'neg'),({'bad':True},'neg')]
#
# #trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
# classifier = NaiveBayesClassifier.train(trainfeats)
#
#
# # Testing
# negids = movie_reviews.fileids('neg')
# posids = movie_reviews.fileids('pos')
#
# negfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
# posfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
# negcutoff = len(negfeats)*3/4
# poscutoff = len(posfeats)*3/4
#
# testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
#
# print 'Dictionary of %d positive words and %d negative words, tested on %d instances' % (len(positive),len(negative), len(testfeats))
#
# print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
# classifier.show_most_informative_features()


#text = nltk.word_tokenize("And now for a production unlike any other a very fuzzy and cute dog")
Expand Down
10 changes: 6 additions & 4 deletions GlossCount.py
Expand Up @@ -27,7 +27,7 @@ class GlossCount:
for syn in wn.synsets(word, pos=wn.ADJ):
for lemma in syn.lemmas():
curr = lemma.name().split('.')[0]
if( curr not in newPositive and curr not in newNegative and curr not in newNeutral):
if(curr not in newNegative and curr not in newNeutral):
newPositive.add(curr)
elif( curr in newNegative):
newNegative.discard(curr)
Expand All @@ -37,7 +37,7 @@ class GlossCount:
for syn in wn.synsets(word, pos=wn.ADJ):
for lemma in syn.lemmas():
curr = lemma.name().split('.')[0]
if( curr not in newPositive and curr not in newNegative and curr not in newNeutral):
if( curr not in newPositive and curr not in newNeutral):
newNegative.add(curr)
elif(curr in newPositive):
newPositive.discard(curr)
Expand All @@ -50,7 +50,7 @@ class GlossCount:
neutral = Set([''])

# Expand on Sets to get S_p' and S_n'
for num in range(1,2):
for num in range(1,3):
newsets = expand_sets(positive,negative,neutral);
positive = set(newsets[0])
negative = set(newsets[1])
Expand All @@ -63,12 +63,14 @@ class GlossCount:
classifier = NaiveBayesClassifier.train(trainfeats)
#print classifier.classify(dict([(word, True) for word in words]))
#print classifier.classify(dict([("bad",True),("bad",True)]))


# Iterate through all of the reviews and find sentiment
count = 0.00
correct = 0.00
for reviews in movie_reviews.fileids(): #For every review
score = 0;
tokens = nltk.pos_tag(nltk.word_tokenize(movie_reviews.raw(fileids=[reviews]))) #Tokenize all words
tokens = nltk.pos_tag(nltk.word_tokenize(movie_reviews.raw(fileids=[reviews]))) #Tokenize all words with POS
for token in tokens:
if (token[1]== "JJ" or token[1] == "JJR" or token[1] == "JJS"): # If adjective, check value
sent_value = classifier.classify(dict([(token[0], True)]))
Expand Down

0 comments on commit eb7a6d0

Please sign in to comment.