Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
30 lines (28 sloc) 678 Bytes
import nltk
import string
import random
def load(sample=True):
CONJUNCTIONS = ["and", "but", "or"]
f = open("Sentiment Analysis Dataset.csv")
lines = f.readlines()
#lines = lines[:1000]
f.close()
if sample:
lines = random.sample(lines, 10000)
ids = []
tweets = []
labels = []
for line in lines[1:]:
line = line.replace("\"", "").strip()
line2 = ""
for c in line:
if ord(c) < 128: line2 += c
terms = line2.split(",")
id = terms[0]
label = int(terms[1])
if label == 0: label = -1
tweet = terms[3]
ids.append(id)
tweets.append(nltk.word_tokenize(tweet))
labels.append(label)
return (ids, tweets, labels)
You can’t perform that action at this time.