Skip to content
Permalink
075c36a25c
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
37 lines (27 sloc) 992 Bytes
import sklearn
import numpy as np
from sklearn.naive_bayes import CategoricalNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from utils import *
import statsmodels.graphics.api as smg
K = 5
random_seed = 0
test_ratio = 0.2
X, y = read_data()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, random_state=random_seed)
# Correlation Analysis
corr_matrix = np.corrcoef(X_train.T)
smg.plot_corr(corr_matrix, xnames = ["Clump_Thickness", "Cell_Size", "Cell_Shape",
"Marginal_Adhesion", "Single_Epithelial_Cell_Size", "Bare_Nuclei", "Bland_Chromatin",
"Normal_Nucleoli", "Mitoses"])
plt.show()
# Naive Bayes
clf = CategoricalNB()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print('accuracy is: {}'.format(acc))
save_cm_figs(y_test, y_pred, 'NB_ori') # confusion matrix
# X_2 = X[Y==2] #benign
# X_4 = X[Y==4] #cancer