Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
job13011 committed Mar 31, 2016
2 parents 2b97b41 + 70bbe1f commit 8e9a44c
Show file tree
Hide file tree
Showing 4 changed files with 553 additions and 4 deletions.
20 changes: 16 additions & 4 deletions GlossCount.py
Expand Up @@ -57,19 +57,23 @@ class GlossCount:
neutral = set(newsets[2])

# Learn Classifier
print len(negative)
print len(positive)
trainfeats = [({word : True},"pos") for word in positive] + [({word : True},"neg") for word in negative]
classifier = NaiveBayesClassifier.train(trainfeats)
print "cat"
#print classifier.classify(dict([(word, True) for word in words]))
<<<<<<< HEAD
#print classifier.classify(dict([("bad",True),("bad",True)]))


=======
print classifier.classify(dict([("bad",True),("bad",True)]))
>>>>>>> parent of 47c6a2a... Bugfix
# Iterate through all of the reviews and find sentiment
count = 0.00
correct = 0.00
for reviews in movie_reviews.fileids(): #For every review
for reviews in movie_reviews.fileids():
score = 0;
<<<<<<< HEAD
tokens = nltk.pos_tag(nltk.word_tokenize(movie_reviews.raw(fileids=[reviews]))) #Tokenize all words with POS
for token in tokens:
if (token[1]== "JJ" or token[1] == "JJR" or token[1] == "JJS"): # If adjective, check value
Expand All @@ -78,14 +82,22 @@ class GlossCount:
score = score - 1
elif(sent_value is 'pos'):
score = score + 1
=======
for words in movie_reviews.words(fileids=[reviews]):
if()
sent_value = classifier.classify(dict([(word, True)]))
if(sent_value is 'neg'):
score = score - 1
elif(sent_value is 'pos'):
score = score + 1
>>>>>>> parent of 47c6a2a... Bugfix
if (score < 0):
print "Negative at %d" % (score)
sentiment = 'neg'
else:
sentiment = 'pos'
print "Positive at %d" % (score)
if (sentiment == movie_reviews.categories(fileids=[reviews])[0]):
print "Correct"
correct = correct + 1.00
count = count + 1.00
print correct/count
Expand Down
26 changes: 26 additions & 0 deletions cblexicon.py
@@ -0,0 +1,26 @@
import math
import nltk
from nltk.corpus import wordnet as wn
from nltk.corpus import brown as sc
from collections import Counter
from sets import Set

class cblexicon:

def genSets(self):
f = open('words.txt', 'r+')
content = f.readlines()
positive = Set([])
negative = Set([])

for pair in content:
current = pair.split(' ')
if (current[1][0] == 'p'):
positive.add(current[0])
elif (current[1][0] == 'n'):
negative.add(current[0])

print len(positive)
print len(negative)

cblexicon().genSets()
18 changes: 18 additions & 0 deletions getAdjectives.py
@@ -0,0 +1,18 @@
import math
import nltk
from nltk.corpus import brown as sc
from collections import Counter


f = open('words.txt', 'r+')
list1 = []
for word in sc.tagged_sents():
for w in word:
if(w[1] == "JJ" or w[1] == "JJR" or w[1] == "JJS" or w[1] == "JJT"):
list1.append(w[0])
counts = Counter(list1)
d = dict(counts)

for n in d:
if( d[n] >= 20):
f.write(n+" \n")

0 comments on commit 8e9a44c

Please sign in to comment.