From 70bbe1f4b230cb7907220a6f807dc5d9466f1359 Mon Sep 17 00:00:00 2001 From: Antonia Lewis Date: Tue, 29 Mar 2016 22:28:10 -0400 Subject: [PATCH] Conjunction based started Hand tagged the brown dataset for the conjunction approach. My two gloss approaches seem to be in git hell, will hopefully get to optimize the jwb edits. --- cblexicon.py | 26 +++ getAdjectives.py | 18 ++ words.txt | 493 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 537 insertions(+) create mode 100644 cblexicon.py create mode 100644 getAdjectives.py create mode 100644 words.txt diff --git a/cblexicon.py b/cblexicon.py new file mode 100644 index 0000000..74dba34 --- /dev/null +++ b/cblexicon.py @@ -0,0 +1,26 @@ +import math +import nltk +from nltk.corpus import wordnet as wn +from nltk.corpus import brown as sc +from collections import Counter +from sets import Set + +class cblexicon: + + def genSets(self): + f = open('words.txt', 'r+') + content = f.readlines() + positive = Set([]) + negative = Set([]) + + for pair in content: + current = pair.split(' ') + if (current[1][0] == 'p'): + positive.add(current[0]) + elif (current[1][0] == 'n'): + negative.add(current[0]) + + print len(positive) + print len(negative) + +cblexicon().genSets() \ No newline at end of file diff --git a/getAdjectives.py b/getAdjectives.py new file mode 100644 index 0000000..af79093 --- /dev/null +++ b/getAdjectives.py @@ -0,0 +1,18 @@ +import math +import nltk +from nltk.corpus import brown as sc +from collections import Counter + + +f = open('words.txt', 'r+') +list1 = [] +for word in sc.tagged_sents(): + for w in word: + if(w[1] == "JJ" or w[1] == "JJR" or w[1] == "JJS" or w[1] == "JJT"): + list1.append(w[0]) +counts = Counter(list1) +d = dict(counts) + +for n in d: + if( d[n] >= 20): + f.write(n+" \n") \ No newline at end of file diff --git a/words.txt b/words.txt new file mode 100644 index 0000000..a394af3 --- /dev/null +++ b/words.txt @@ -0,0 +1,493 @@ +reliable p +elaborate p +military p +golden p +successful p +absolute p +hot p +wrong n +effective n +enormous p +bitter n +responsible p +busy n +rich p +fair p +best p +unique p +right p +old n +creative p +respectable p +easy p +happy p +later n +exact p +brilliant p +logical p +positive p +angry n +apparent n +optimal p +conscious p +modest n +open p +sad n +opposite n +voluntary p +average n +bright p +slow n +prime p +easier p +quiet n +tiny n +dark n +vague n +satisfactory n +intimate p +favorite p +mean n +lively p +complete p +handsome p +dirty n +certain p +tight n +original p +careful n +large n +small n +past n +full n +beautiful p +huge p +authentic p +fundamental p +major p +immediate p +artistic p +dependent n +whole p +drunk n +firm p +uncertain n +straight n +funny p +spectacular p +alert p +recent p +early n +concrete p +eager p +private n +formal n +curious p +pale n +appropriate n +considerable p +mature p +occasional n +fresh p +conservative n +young p +magic p +anxious n +odd n +great p +technical p +exciting p +thin n +high p +educational p +doubtful n +perfect p +intelligent p +delightful p +ethical n +painful n +vigorous n +wet n +false n +gentle p +hypothalamic n +so-called n +rough n +principal p +familiar p +lucky p +willing p +generous p +mere n +evident p +official p +smooth p +desirable p +worth p +above p +total p +negative n +ordinary n +meaningful p +safe p +reasonable p +standard p +genuine p +convenient p +left n +subtle p +adequate n +unable n +ill n +tough n +flexible p +independent p +evil n +musical p +dead n +absent n +maximum p +emotional p +stronger p +abstract p +grateful p +upper p +helpless n +uniform n +extra p +live p +productive p +explicit p +typical n +cold n +still n +inevitable n +honest p +blind n +striking p +impossible n +silent n +tragic n +objective n +steady p +professional p +poetic p +noble p +severe n +valuable p +real p +rear n +equivalent n +uneasy n +central n +greater p +low n +valid p +poor n +fiscal n +casual p +loose n +strong p +marginal n +conventional +mysterious p +heavy n +earliest p +heroic p +terrible n +substantial p +clear p +clean p +latest p +pretty p +famous p +close n +probable p +unconscious n +sensitive p +experimental p +empty n +ready p +grand p +temporary n +older n +popular p +minimal n +newer p +dull n +similar n +acceptable n +friendly p +commercial n +worthy p +charming p +wonderful p +dominant p +front p +special p +prospective n +powerful p +endless p +gray n +desperate n +Good p +comparable n +precious p +competent p +strange n +accurate p +distant n +prominent p +necessary n +broad p +final n +additional p +highest p +universal p +dynamic p +sorry n +vast p +likely p +new p +active p +dry n +suitable n +warm p +amazing p +massive p +frequent p +better p +crucial n +linear n +peculiar n +modern p +regular n +colorful p +fascinating p +basic n +lovely p +ugly n +neat p +just n +royal p +dear p +bold p +intellectual p +classic p +unexpected p +tangent n +sympathetic p +rigid n +crazy n +pure p +mad n +destructive n +tall n +interesting p +rational p +correct p +worse n +long n +little n +efficient p +potential n +normal n +precise p +contemporary p +black n +secondary n +stupid n +relative n +capable p +dramatic n +promising p +middle n +sudden n +different n +extensive p +critical p +moderate n +unusual n +practical p +skilled p +sophisticated p +romantic p +deep p +important p +remote n +further n +narrow n +distinct p +particular n +minimum n +sacred p +late n +good p +hard n +thick n +pleasant p +extraordinary p +favorable p +contrary n +bad n +significant p +fine n +nervous n +unhappy n +common n +individual p +foreign n +simple n +expensive n +secret n +gay n +solid p +atomic n +big p +magnificent p +back n +strongest p +constant p +literary p +entire p +extreme p +obvious n +native n +ambiguous n +fortunate p +sexual p +ancient n +comfortable n +unlikely n +preliminary n +enthusiastic p +equal p +relevant p +tremendous p +general n +plain n +usual n +domestic n +smart p +soft p +classical p +alive p +proper p +slight n +actual p +functional p +biggest p +wise p +dangerous n +consistent p +direct p +visible p +excellent p +widespread p +ultimate p +complex p +eternal p +free p +primary n +philosophical p +top p +raw n +thorough p +greatest p +random n +automatic n +weak n +afraid n +able p +arbitrary n +specific p +primitive n +civil p +younger p +serious n +remarkable p +cool p +impressive p +quick p +healthy p +guilty n +vivid p +sharp p +vital p +exclusive p +lonely n +realistic p +wide n +ideal p +sure p +proud p +hungry n +senior n +true p +naked n +subsequent n +definite p +cheap n +key p +peaceful p +delicate p +violent n +longer n +separate n +present p +far n +fat n +worst n +brief n +wild p +faint n +minor n +flat n +glad p +short n +bigger p +inherent p +radical n +double p +outstanding p +traditional p +bare n +distinctive p +various n +numerous p +sweet p +due n +higher p +lower n +competitive p +essential p +intense p +fast p +junior n +characteristic p +nice p +smaller n +light n +superior p +permanent p +profound p +organic p +unknown n +operational p +available p +decent n +sufficient n +largest p +difficult n +excessive n +rapid n +attractive p +identical n +helpful p +rare n +own n +larger p +inadequate n +mutual n +incredible p +sick n \ No newline at end of file