GlossCount.py

import math
import nltk
from nltk.corpus import wordnet as wn
import nltk.classify.util
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import movie_reviews
from sets import Set

class GlossCount:
    def demo(self):
        def value_of(sentiment):
            if sentiment == 'pos': return 1
            if sentiment == 'neg': return -1
            return 0

        def sentiment_score(review):
            return 0

     #return sum ([value_of(tag) for sentence in dict_tagged_sentences for token in sentence for tag in token[2]])

        def expand_sets(positive,negative,neutral):
                    newPositive = set(positive)
                    newNegative = set(negative)
                    newNeutral = set(neutral)
                    # Add Syns to Positive
                    for word in positive:
                        for syn in wn.synsets(word, pos=wn.ADJ):
                            for lemma in syn.lemmas():
                                curr = lemma.name().split('.')[0]
                                if(curr not in newNegative and curr not in newNeutral):
                                    newPositive.add(curr)
                                elif( curr in newNegative):
                                    newNegative.discard(curr)
                                    newNeutral.add(curr)
                    # Add Syns to Negative
                    for word in negative:
                        for syn in wn.synsets(word, pos=wn.ADJ):
                            for lemma in syn.lemmas():
                                curr = lemma.name().split('.')[0]
                                if( curr not in newPositive and curr not in newNeutral):
                                    newNegative.add(curr)
                                elif(curr in newPositive):
                                    newPositive.discard(curr)
                                    newNeutral.add(curr)
                    return (newPositive,newNegative,newNeutral)

        # Set up initial Sets S_p and S_n
        positive = Set(['Good'])
        negative = Set(['Bad'])
        neutral = Set([''])

        # Expand on Sets to get S_p' and S_n'
        for num in range(1,3):
            newsets = expand_sets(positive,negative,neutral);
            positive = set(newsets[0])
            negative = set(newsets[1])
            neutral = set(newsets[2])

        # Learn Classifier
        trainfeats = [({word : True},"pos") for word in positive] + [({word : True},"neg") for word in negative]
        classifier = NaiveBayesClassifier.train(trainfeats)
        print "cat"
        #print classifier.classify(dict([(word, True) for word in words]))
        print classifier.classify(dict([("bad",True),("bad",True)]))

        # Iterate through all of the reviews and find sentiment
        count = 0.00
        correct = 0.00
        for reviews in movie_reviews.fileids():     #For every review
            score = 0;
            tokens = nltk.pos_tag(nltk.word_tokenize(movie_reviews.raw(fileids=[reviews])))     #Tokenize all words with POS
            for token in tokens:
                if (token[1]== "JJ" or token[1] == "JJR" or token[1] == "JJS"):         # If adjective, check value
                    sent_value = classifier.classify(dict([(token[0], True)]))
                    if(sent_value is 'neg'):
                        score = score - 1
                    elif(sent_value is 'pos'):
                        score = score + 1
            if (score < 0):
                print "Negative at %d" % (score)
                sentiment = 'neg'
            else:
                sentiment = 'pos'
                print "Positive at %d" % (score)
            if (sentiment == movie_reviews.categories(fileids=[reviews])[0]):
                print "Correct"
                correct = correct + 1.00
            count = count + 1.00
        print correct/count


GlossCount().demo()
	import math
	import nltk
	from nltk.corpus import wordnet as wn
	import nltk.classify.util
	from nltk.classify import NaiveBayesClassifier
	from nltk.corpus import movie_reviews
	from sets import Set

	class GlossCount:
	def demo(self):
	def value_of(sentiment):
	if sentiment == 'pos': return 1
	if sentiment == 'neg': return -1
	return 0

	def sentiment_score(review):
	return 0

	#return sum ([value_of(tag) for sentence in dict_tagged_sentences for token in sentence for tag in token[2]])

	def expand_sets(positive,negative,neutral):
	newPositive = set(positive)
	newNegative = set(negative)
	newNeutral = set(neutral)
	# Add Syns to Positive
	for word in positive:
	for syn in wn.synsets(word, pos=wn.ADJ):
	for lemma in syn.lemmas():
	curr = lemma.name().split('.')[0]
	if(curr not in newNegative and curr not in newNeutral):
	newPositive.add(curr)
	elif( curr in newNegative):
	newNegative.discard(curr)
	newNeutral.add(curr)
	# Add Syns to Negative
	for word in negative:
	for syn in wn.synsets(word, pos=wn.ADJ):
	for lemma in syn.lemmas():
	curr = lemma.name().split('.')[0]
	if( curr not in newPositive and curr not in newNeutral):
	newNegative.add(curr)
	elif(curr in newPositive):
	newPositive.discard(curr)
	newNeutral.add(curr)
	return (newPositive,newNegative,newNeutral)

	# Set up initial Sets S_p and S_n
	positive = Set(['Good'])
	negative = Set(['Bad'])
	neutral = Set([''])

	# Expand on Sets to get S_p' and S_n'
	for num in range(1,3):
	newsets = expand_sets(positive,negative,neutral);
	positive = set(newsets[0])
	negative = set(newsets[1])
	neutral = set(newsets[2])

	# Learn Classifier
	trainfeats = [({word : True},"pos") for word in positive] + [({word : True},"neg") for word in negative]
	classifier = NaiveBayesClassifier.train(trainfeats)
	print "cat"
	#print classifier.classify(dict([(word, True) for word in words]))
	print classifier.classify(dict([("bad",True),("bad",True)]))

	# Iterate through all of the reviews and find sentiment
	count = 0.00
	correct = 0.00
	for reviews in movie_reviews.fileids(): #For every review
	score = 0;
	tokens = nltk.pos_tag(nltk.word_tokenize(movie_reviews.raw(fileids=[reviews]))) #Tokenize all words with POS
	for token in tokens:
	if (token[1]== "JJ" or token[1] == "JJR" or token[1] == "JJS"): # If adjective, check value
	sent_value = classifier.classify(dict([(token[0], True)]))
	if(sent_value is 'neg'):
	score = score - 1
	elif(sent_value is 'pos'):
	score = score + 1
	if (score < 0):
	print "Negative at %d" % (score)
	sentiment = 'neg'
	else:
	sentiment = 'pos'
	print "Positive at %d" % (score)
	if (sentiment == movie_reviews.categories(fileids=[reviews])[0]):
	print "Correct"
	correct = correct + 1.00
	count = count + 1.00
	print correct/count




	GlossCount().demo()