diff --git a/GlossCount.py b/GlossCount.py index 2ff4471..6180ae7 100644 --- a/GlossCount.py +++ b/GlossCount.py @@ -57,23 +57,25 @@ def expand_sets(positive,negative,neutral): neutral = set(newsets[2]) # Learn Classifier + print len(negative) + print len(positive) trainfeats = [({word : True},"pos") for word in positive] + [({word : True},"neg") for word in negative] classifier = NaiveBayesClassifier.train(trainfeats) - print "cat" #print classifier.classify(dict([(word, True) for word in words])) - print classifier.classify(dict([("bad",True),("bad",True)])) + #print classifier.classify(dict([("bad",True),("bad",True)])) # Iterate through all of the reviews and find sentiment count = 0.00 correct = 0.00 - for reviews in movie_reviews.fileids(): + for reviews in movie_reviews.fileids(): #For every review score = 0; - for words in movie_reviews.words(fileids=[reviews]): - if() - sent_value = classifier.classify(dict([(word, True)])) - if(sent_value is 'neg'): - score = score - 1 - elif(sent_value is 'pos'): - score = score + 1 + tokens = nltk.pos_tag(nltk.word_tokenize(movie_reviews.raw(fileids=[reviews]))) #Tokenize all words + for token in tokens: + if (token[1]== "JJ" or token[1] == "JJR" or token[1] == "JJS"): # If adjective, check value + sent_value = classifier.classify(dict([(token[0], True)])) + if(sent_value is 'neg'): + score = score - 1 + elif(sent_value is 'pos'): + score = score + 1 if (score < 0): print "Negative at %d" % (score) sentiment = 'neg' @@ -81,6 +83,7 @@ def expand_sets(positive,negative,neutral): sentiment = 'pos' print "Positive at %d" % (score) if (sentiment == movie_reviews.categories(fileids=[reviews])[0]): + print "Correct" correct = correct + 1.00 count = count + 1.00 print correct/count