Skip to content
Permalink
075c36a25c
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
130 lines (115 sloc) 4.25 KB
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
df=pd.read_csv('breast-cancer-wisconsin.csv')
features = ['clumpthickness', 'cellsize', 'cellshape', 'marginaladhesion','singleepithelialcellsize','barenuclei','blandchromatin','normalnucleoli','mitoses']
#impute missing values (all of which are in barenuclei) with mean of barenuclei
df=df.replace('?',3.54465593)
# miss=[617,411,321,315,297,294,292,275,249,235,164,158,145,139,40,23]
# for ind in miss:
# df=df.drop(df.index[ind])
# Separating out the features
x = df.loc[:, features].values# Separating out the target
y = df.loc[:,['samplecodenumber','class']].values# Standardizing the features
x = StandardScaler().fit_transform(x)
#print(y)
def classify(hlsize):
X_train, X_test, y_trainid, y_testid = train_test_split(x, y, test_size=0.3, random_state=1)
y_test=y_testid[:,1]
y_train=y_trainid[:,1]
#print(y_test)
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=hlsize, random_state=1)
clf.fit(X_train, y_train)
y_pred=clf.predict(X_test)
file.write("samplecodenumber,trueclass,predclass \n")
for i in range(len(y_pred)):
file.write(str(y_testid[i,0])+","+str(y_testid[i,1])+','+str(y_pred[i])+'\n')
tpr=0#positive=benign=2
fpr=0
tnr=0
fnr=0
for i in range(len(y_test)):
if y_test[i]==y_pred[i]:
if y_test[i]==2:
tpr+=1
else:
tnr+=1
elif y_test[i]==2:
fnr+=1
else:
fpr+=1
temp=tpr+fnr
if temp>0:
temp=fpr+tnr
if temp>0:
temp=tpr+fpr
if temp>0:
temp=tpr+fnr
if temp>0:
tprate= tpr/(tpr+fnr)
fprate= fpr/(fpr+tnr)
#print(tprate,fprate)
precision= tpr/(tpr+fpr)
recall= tpr/(tpr+fnr)
#print("recall=",recall)
acc=(tpr+tnr)/(tpr+fpr+tnr+fnr)
if acc>.965:
file.write("hlsize ="+str(hlsize)+'\n')
file.write("true pos rate="+str(tprate)+'\n')
file.write("false pos rate="+str(fprate)+'\n')
file.write("precision="+str(precision)+'\n')
file.write("accuracy="+str(acc)+'\n')
file = open('res.txt','w')
classify((13,8,18))
#
# for a in range(20):
# if a>0:
# for b in range(20):
# if b>0:
# for c in range(20):
# if c>0:
# classify((a,b,c))
# tsne = TSNE(n_components=2, verbose=1, perplexity=60, n_iter=500)
# tsne_results = tsne.fit_transform(df_x)
# #print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))
#
# df_x['tsne-2d-one'] = tsne_results[:,0]
# df_x['tsne-2d-two'] = tsne_results[:,1]
# df_x['target'] = targ['class']
# plt.figure(figsize=(16,10))
# sns.scatterplot(
# x="tsne-2d-one", y="tsne-2d-two",
# hue="target",
# palette=sns.color_palette("hls", 2),
# data=df_x,
# legend="full",
# alpha=0.3
# )
# pca = PCA(n_components=3)
# principalComponents = pca.fit_transform(x)
# principalDf = pd.DataFrame(data = principalComponents, columns = ['principal component 1', 'principal component 2','principal component 3'])
# finalDf = pd.concat([principalDf, df[['class']]], axis = 1)
#
# fig = plt.figure(figsize = (8,8))
# ax = fig.add_subplot(1,1,1)
# ax.set_xlabel('Principal Component 1', fontsize = 15)
# ax.set_ylabel('Principal Component 2', fontsize = 15)
# ax.set_title('2 component PCA', fontsize = 20)
# targets = [2,4]
# colors = ['r', 'g']
# for target, color in zip(targets,colors):
# indicesToKeep = finalDf['class'] == target
# ax.scatter(finalDf.loc[indicesToKeep, 'principal component 1']
# , finalDf.loc[indicesToKeep, 'principal component 2']
# , c = color
# , s = 10)
# ax.legend(targets)
# ax.grid()
plt.show()