Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
For pulling conjunctions
  • Loading branch information
adl13006 committed Apr 17, 2016
1 parent d834a06 commit e307576
Showing 1 changed file with 31 additions and 23 deletions.
54 changes: 31 additions & 23 deletions getAdjectives.py
Expand Up @@ -2,49 +2,57 @@ import math
import nltk
from nltk.corpus import brown as sc
from collections import Counter
from nltk.corpus import movie_reviews



def genConj(training):
conj = open('conj.txt', 'r+')
def genConj():
conj = open('movieconj.txt', 'r+')
ands = open('ands.txt', 'r+')
ors = open('ors.txt', 'r+')
buts = open('buts.txt', 'r+')
nor = open('nor.txt', 'r+')
eor = open('eor.txt', 'r+')
j = 0;
for review in training: #For every review
j = 0
for review in sorted(movie_reviews.fileids()): #For every review
tokens = nltk.pos_tag(nltk.word_tokenize(movie_reviews.raw(fileids=[review])))
print j
j = j+1
for i in range(0,len(tokens)-3):
if ((tokens[i][1]== "JJ" or tokens[i][1] == "JJR" or tokens[i][1] == "JJS") and (tokens[i+2][1]== "JJ" or tokens[i+2][1] == "JJR" or tokens[i+2][1] == "JJS")):
if (tokens[i+1][0] == "and"):
print tokens[i][0]
conj.write(tokens[i][0]+ " " + tokens[i+2][0] + " " + tokens[i+1][0] + "\n")
ands.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n")
#ands.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n")
elif (tokens[i+1][0] == "or"):
conj.write(tokens[i][0]+ " " + tokens[i+2][0] + " " + tokens[i+1][0] + "\n")
ors.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n")
#ors.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n")
elif (tokens[i+1][0] == "but"+ "\n"):
conj.write(tokens[i][0]+ " " + tokens[i+2][0] + " " + tokens[i+1][0] + "\n")
buts.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n")
#buts.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n")
elif (tokens[i+1][0] == "either-or"):
conj.write(tokens[i][0]+ " " + tokens[i+2][0] + " " + tokens[i+1][0] + "\n")
eor.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n")
#eor.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n")
elif (tokens[i+1][0] == "neither-nor"):
conj.write(tokens[i][0]+ " " + tokens[i+2][0] + " " + tokens[i+1][0] + "\n")
nor.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n")

f = open('words2.txt', 'w')
list1 = []
for word in sc.tagged_sents():
for w in word:
if(w[1] == "JJ" or w[1] == "JJR" or w[1] == "JJS" or w[1] == "JJT"):
list1.append(w[0])
counts = Counter(list1)
d = dict(counts)

for n in d:
if( d[n] >= 15):
f.write(n+" \n")
f.close()
#nor.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n")




def doBrown():
f = open('movieconj.txt', 'w')
list1 = []
for word in sc.tagged_sents():
for w in word:
if(w[1] == "JJ" or w[1] == "JJR" or w[1] == "JJS" or w[1] == "JJT"):
list1.append(w[0])
counts = Counter(list1)
d = dict(counts)

for n in d:
if( d[n] >= 15):
f.write(n+" \n")
f.close()

genConj()

0 comments on commit e307576

Please sign in to comment.