From e3075760a61d0c8303320e346276dc32f61e38c1 Mon Sep 17 00:00:00 2001 From: Antonia Lewis Date: Sun, 17 Apr 2016 15:33:13 -0400 Subject: [PATCH] For pulling conjunctions --- getAdjectives.py | 54 +++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/getAdjectives.py b/getAdjectives.py index 7106326..ae19e02 100644 --- a/getAdjectives.py +++ b/getAdjectives.py @@ -2,49 +2,57 @@ import math import nltk from nltk.corpus import brown as sc from collections import Counter +from nltk.corpus import movie_reviews -def genConj(training): - conj = open('conj.txt', 'r+') +def genConj(): + conj = open('movieconj.txt', 'r+') ands = open('ands.txt', 'r+') ors = open('ors.txt', 'r+') buts = open('buts.txt', 'r+') nor = open('nor.txt', 'r+') eor = open('eor.txt', 'r+') - j = 0; - for review in training: #For every review + j = 0 + for review in sorted(movie_reviews.fileids()): #For every review tokens = nltk.pos_tag(nltk.word_tokenize(movie_reviews.raw(fileids=[review]))) print j j = j+1 for i in range(0,len(tokens)-3): if ((tokens[i][1]== "JJ" or tokens[i][1] == "JJR" or tokens[i][1] == "JJS") and (tokens[i+2][1]== "JJ" or tokens[i+2][1] == "JJR" or tokens[i+2][1] == "JJS")): if (tokens[i+1][0] == "and"): + print tokens[i][0] conj.write(tokens[i][0]+ " " + tokens[i+2][0] + " " + tokens[i+1][0] + "\n") - ands.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n") + #ands.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n") elif (tokens[i+1][0] == "or"): conj.write(tokens[i][0]+ " " + tokens[i+2][0] + " " + tokens[i+1][0] + "\n") - ors.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n") + #ors.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n") elif (tokens[i+1][0] == "but"+ "\n"): conj.write(tokens[i][0]+ " " + tokens[i+2][0] + " " + tokens[i+1][0] + "\n") - buts.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n") + #buts.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n") elif (tokens[i+1][0] == "either-or"): conj.write(tokens[i][0]+ " " + tokens[i+2][0] + " " + tokens[i+1][0] + "\n") - eor.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n") + #eor.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n") elif (tokens[i+1][0] == "neither-nor"): conj.write(tokens[i][0]+ " " + tokens[i+2][0] + " " + tokens[i+1][0] + "\n") - nor.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n") - -f = open('words2.txt', 'w') -list1 = [] -for word in sc.tagged_sents(): - for w in word: - if(w[1] == "JJ" or w[1] == "JJR" or w[1] == "JJS" or w[1] == "JJT"): - list1.append(w[0]) -counts = Counter(list1) -d = dict(counts) - -for n in d: - if( d[n] >= 15): - f.write(n+" \n") -f.close() \ No newline at end of file + #nor.write(tokens[i][0]+ " " + tokens[i+2][0]+ "\n") + + + + +def doBrown(): + f = open('movieconj.txt', 'w') + list1 = [] + for word in sc.tagged_sents(): + for w in word: + if(w[1] == "JJ" or w[1] == "JJR" or w[1] == "JJS" or w[1] == "JJT"): + list1.append(w[0]) + counts = Counter(list1) + d = dict(counts) + + for n in d: + if( d[n] >= 15): + f.write(n+" \n") + f.close() + +genConj() \ No newline at end of file