diff --git a/asdf.py b/asdf.py deleted file mode 100644 index 9c8868d..0000000 --- a/asdf.py +++ /dev/null @@ -1,11 +0,0 @@ -import nltk -from nltk.corpus import wordnet -from nltk.corpus import movie_reviews -from nltk.classify import NaiveBayesClassifier - -word = "good" -syns = wordnet.synsets(word) -for syn in syns: - lemmas = syn.lemmas() - for lemma in lemmas: - if lemma.antonyms() != []: print lemma.antonyms() \ No newline at end of file diff --git a/pos.py b/pos.py deleted file mode 100644 index f6311c5..0000000 --- a/pos.py +++ /dev/null @@ -1,36 +0,0 @@ -import nltk -import os -import string - -""" -POS tagging is really slow compared to SVM training and prediction. -This script processes the reviews beforehand, applies the NLTK POS tagger, -and saves them in a new folder. -""" - -POS_FOLDER = os.path.join("review_polarity","txt_sentoken","pos") -NEG_FOLDER = os.path.join("review_polarity","txt_sentoken","neg") -POS_TAGGED_FOLDER = os.path.join("review_polarity","txt_sentoken","pos_tagged") -NEG_TAGGED_FOLDER = os.path.join("review_polarity","txt_sentoken","neg_tagged") - -for (folder_name, tagged_folder_name) in [(POS_FOLDER, POS_TAGGED_FOLDER), (NEG_FOLDER, NEG_TAGGED_FOLDER)]: - filenames = [] - for (folder, x, folder_filenames) in os.walk(folder_name): - for filename in folder_filenames: - if filename.endswith(".txt"): - filenames.append(os.path.join(folder, filename)) - for filename in filenames: - f = open(filename) - lines = f.readlines() - f.close() - text = string.join(lines, " ") - - tokens = nltk.word_tokenize(text) - tagged = nltk.pos_tag(tokens) - tagged = [string.join(t, "_") for t in tagged] - tagged = string.join(tagged, " ") - tagged_filename = os.path.join(tagged_folder_name, os.path.split(filename)[-1]) - f = open(tagged_filename, "w") - f.write(tagged) - f.close() - print "Tagged & saved file", tagged_filename \ No newline at end of file