Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
MLP classifier
  • Loading branch information
nim18004 committed Apr 24, 2020
1 parent 4890408 commit a565fa2
Show file tree
Hide file tree
Showing 3 changed files with 823 additions and 0 deletions.
Binary file added .DS_Store
Binary file not shown.
123 changes: 123 additions & 0 deletions bcw.py
@@ -0,0 +1,123 @@
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

df=pd.read_csv('breast-cancer-wisconsin.csv')
features = ['clumpthickness', 'cellsize', 'cellshape', 'marginaladhesion','singleepithelialcellsize','barenuclei','blandchromatin','normalnucleoli','mitoses']

#impute missing values (all of which are in barenuclei) with mean of barenuclei
df=df.replace('?',3.54465593)

# miss=[617,411,321,315,297,294,292,275,249,235,164,158,145,139,40,23]
# for ind in miss:
# df=df.drop(df.index[ind])

# Separating out the features
x = df.loc[:, features].values# Separating out the target
y = df.loc[:,['class']].values# Standardizing the features
x = StandardScaler().fit_transform(x)


def classify(hlsize):
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=2)
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=hlsize, random_state=1)
#print(y_test.shape)
clf.fit(X_train, y_train)
y_pred=clf.predict(X_test)
#print(y_pred)
tpr=0#positive=benign=2
fpr=0
tnr=0
fnr=0
for i in range(len(y_test)):
if y_test[i]==y_pred[i]:
if y_test[i]==2:
tpr+=1
else:
tnr+=1
elif y_test[i]==2:
fnr+=1
else:
fpr+=1


temp=tpr+fnr
if temp>0:
temp=fpr+tnr
if temp>0:
temp=tpr+fpr
if temp>0:
temp=tpr+fnr
if temp>0:
tprate= tpr/(tpr+fnr)
fprate= fpr/(fpr+tnr)

#print(tprate,fprate)
precision= tpr/(tpr+fpr)
recall= tpr/(tpr+fnr)

#print("recall=",recall)
acc=(tpr+tnr)/(tpr+fpr+tnr+fnr)

if acc>.965:
file.write("hlsize ="+str(hlsize)+'\n')
file.write("true pos rate="+str(tprate)+'\n')
file.write("false pos rate="+str(fprate)+'\n')
file.write("precision="+str(precision)+'\n')
file.write("accuracy="+str(acc)+'\n')

#
# file = open('3layers.txt','w')
# for a in range(20):
# if a>0:
# for b in range(20):
# if b>0:
# for c in range(20):
# if c>0:
# classify((a,b,c))

# tsne = TSNE(n_components=2, verbose=1, perplexity=60, n_iter=500)
# tsne_results = tsne.fit_transform(df_x)
# #print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))
#
# df_x['tsne-2d-one'] = tsne_results[:,0]
# df_x['tsne-2d-two'] = tsne_results[:,1]
# df_x['target'] = targ['class']
# plt.figure(figsize=(16,10))
# sns.scatterplot(
# x="tsne-2d-one", y="tsne-2d-two",
# hue="target",
# palette=sns.color_palette("hls", 2),
# data=df_x,
# legend="full",
# alpha=0.3
# )

# pca = PCA(n_components=3)
# principalComponents = pca.fit_transform(x)
# principalDf = pd.DataFrame(data = principalComponents, columns = ['principal component 1', 'principal component 2','principal component 3'])
# finalDf = pd.concat([principalDf, df[['class']]], axis = 1)
#
# fig = plt.figure(figsize = (8,8))
# ax = fig.add_subplot(1,1,1)
# ax.set_xlabel('Principal Component 1', fontsize = 15)
# ax.set_ylabel('Principal Component 2', fontsize = 15)
# ax.set_title('2 component PCA', fontsize = 20)
# targets = [2,4]
# colors = ['r', 'g']
# for target, color in zip(targets,colors):
# indicesToKeep = finalDf['class'] == target
# ax.scatter(finalDf.loc[indicesToKeep, 'principal component 1']
# , finalDf.loc[indicesToKeep, 'principal component 2']
# , c = color
# , s = 10)
# ax.legend(targets)
# ax.grid()
plt.show()

0 comments on commit a565fa2

Please sign in to comment.