Skip to content
Permalink
7b5edc3acd
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
1 contributor

Users who have contributed to this file

30 lines (28 sloc) 678 Bytes
import nltk
import string
import random
def load(sample=True):
CONJUNCTIONS = ["and", "but", "or"]
f = open("Sentiment Analysis Dataset.csv")
lines = f.readlines()
#lines = lines[:1000]
f.close()
if sample:
lines = random.sample(lines, 10000)
ids = []
tweets = []
labels = []
for line in lines[1:]:
line = line.replace("\"", "").strip()
line2 = ""
for c in line:
if ord(c) < 128: line2 += c
terms = line2.split(",")
id = terms[0]
label = int(terms[1])
if label == 0: label = -1
tweet = terms[3]
ids.append(id)
tweets.append(nltk.word_tokenize(tweet))
labels.append(label)
return (ids, tweets, labels)