Skip to content

Commit

Permalink
Update main.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Qinqing Liu committed Apr 6, 2020
1 parent 94a43de commit 99c4a9e
Showing 1 changed file with 22 additions and 13 deletions.
35 changes: 22 additions & 13 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,29 @@
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

K = 5
random_seed = 0
test_ratio = 0.2

def read_data():
with open('./breast-cancer-wisconsin.data', 'r') as data_fid:
lines = data_fid.readlines()
records = []
for line in lines:
if '?' in line:
line = line.replace('?', '11')
line = line.split(',')
line = [int(item) for item in line][1:]
records.append(line)
records = np.array(records)
X = records[:, :-1]
y = records[:, -1]
return X, y

with open('./breast-cancer-wisconsin.data', 'r') as data_fid:
lines = data_fid.readlines()
records = []
for line in lines:
if '?' in line:
line = line.replace('?', '11')
line = line.split(',')
line = [int(item) for item in line][1:]
records.append(line)
records = np.array(records)
X = records[:,:-1]
y = records[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X, y = read_data()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, random_state=random_seed)

# Naive Bayes
clf = CategoricalNB()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
Expand All @@ -29,3 +36,5 @@
# X_2 = X[Y==2] #benign
# X_4 = X[Y==4] #cancer



0 comments on commit 99c4a9e

Please sign in to comment.