Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Conjunction based started
Hand tagged the brown dataset for the conjunction approach. My two gloss
approaches seem to be in git hell, will hopefully get to optimize the
jwb edits.
  • Loading branch information
adl13006 committed Mar 30, 2016
1 parent 7b2c04e commit 70bbe1f
Show file tree
Hide file tree
Showing 3 changed files with 537 additions and 0 deletions.
26 changes: 26 additions & 0 deletions cblexicon.py
@@ -0,0 +1,26 @@
import math
import nltk
from nltk.corpus import wordnet as wn
from nltk.corpus import brown as sc
from collections import Counter
from sets import Set

class cblexicon:

def genSets(self):
f = open('words.txt', 'r+')
content = f.readlines()
positive = Set([])
negative = Set([])

for pair in content:
current = pair.split(' ')
if (current[1][0] == 'p'):
positive.add(current[0])
elif (current[1][0] == 'n'):
negative.add(current[0])

print len(positive)
print len(negative)

cblexicon().genSets()
18 changes: 18 additions & 0 deletions getAdjectives.py
@@ -0,0 +1,18 @@
import math
import nltk
from nltk.corpus import brown as sc
from collections import Counter


f = open('words.txt', 'r+')
list1 = []
for word in sc.tagged_sents():
for w in word:
if(w[1] == "JJ" or w[1] == "JJR" or w[1] == "JJS" or w[1] == "JJT"):
list1.append(w[0])
counts = Counter(list1)
d = dict(counts)

for n in d:
if( d[n] >= 20):
f.write(n+" \n")

0 comments on commit 70bbe1f

Please sign in to comment.