MLP classifier

qil15006 · Apr 24, 2020 · a565fa2 · a565fa2
1 parent 4890408
commit a565fa2
Show file tree

Hide file tree

Showing 3 changed files with 823 additions and 0 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/bcw.py b/bcw.py
@@ -0,0 +1,123 @@
+import pandas as pd
+import seaborn as sn
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import StandardScaler
+from sklearn.decomposition import PCA
+from sklearn.manifold import TSNE
+from mpl_toolkits.mplot3d import Axes3D
+import seaborn as sns
+from sklearn.neural_network import MLPClassifier
+from sklearn.model_selection import train_test_split
+
+df=pd.read_csv('breast-cancer-wisconsin.csv')
+features = ['clumpthickness', 'cellsize', 'cellshape', 'marginaladhesion','singleepithelialcellsize','barenuclei','blandchromatin','normalnucleoli','mitoses']
+
+#impute missing values (all of which are in barenuclei) with mean of barenuclei
+df=df.replace('?',3.54465593)
+
+# miss=[617,411,321,315,297,294,292,275,249,235,164,158,145,139,40,23]
+# for ind in miss:
+#     df=df.drop(df.index[ind])
+
+# Separating out the features
+x = df.loc[:, features].values# Separating out the target
+y = df.loc[:,['class']].values# Standardizing the features
+x = StandardScaler().fit_transform(x)
+
+
+def classify(hlsize):
+    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=2)
+    clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=hlsize, random_state=1)
+    #print(y_test.shape)
+    clf.fit(X_train, y_train)
+    y_pred=clf.predict(X_test)
+    #print(y_pred)
+    tpr=0#positive=benign=2
+    fpr=0
+    tnr=0
+    fnr=0
+    for i in range(len(y_test)):
+        if y_test[i]==y_pred[i]:
+            if y_test[i]==2:
+                tpr+=1
+            else:
+                tnr+=1
+        elif y_test[i]==2:
+            fnr+=1
+        else:
+            fpr+=1
+
+
+    temp=tpr+fnr
+    if temp>0:
+        temp=fpr+tnr
+        if temp>0:
+            temp=tpr+fpr
+            if temp>0:
+                temp=tpr+fnr
+                if temp>0:
+                    tprate= tpr/(tpr+fnr)
+                    fprate= fpr/(fpr+tnr)
+
+                    #print(tprate,fprate)
+                    precision= tpr/(tpr+fpr)
+                    recall= tpr/(tpr+fnr)
+
+                    #print("recall=",recall)
+                    acc=(tpr+tnr)/(tpr+fpr+tnr+fnr)
+
+                    if acc>.965:
+                        file.write("hlsize ="+str(hlsize)+'\n')
+                        file.write("true pos rate="+str(tprate)+'\n')
+                        file.write("false pos rate="+str(fprate)+'\n')
+                        file.write("precision="+str(precision)+'\n')
+                        file.write("accuracy="+str(acc)+'\n')
+
+# 
+# file = open('3layers.txt','w')
+# for a in range(20):
+#      if a>0:
+#          for b in range(20):
+#              if b>0:
+#                  for c in range(20):
+#                      if c>0:
+#                         classify((a,b,c))
+
+# tsne = TSNE(n_components=2, verbose=1, perplexity=60, n_iter=500)
+# tsne_results = tsne.fit_transform(df_x)
+# #print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))
+#
+# df_x['tsne-2d-one'] = tsne_results[:,0]
+# df_x['tsne-2d-two'] = tsne_results[:,1]
+# df_x['target'] = targ['class']
+# plt.figure(figsize=(16,10))
+# sns.scatterplot(
+#     x="tsne-2d-one", y="tsne-2d-two",
+#     hue="target",
+#     palette=sns.color_palette("hls", 2),
+#     data=df_x,
+#     legend="full",
+#     alpha=0.3
+# )
+
+# pca = PCA(n_components=3)
+# principalComponents = pca.fit_transform(x)
+# principalDf = pd.DataFrame(data = principalComponents, columns = ['principal component 1', 'principal component 2','principal component 3'])
+# finalDf = pd.concat([principalDf, df[['class']]], axis = 1)
+#
+# fig = plt.figure(figsize = (8,8))
+# ax = fig.add_subplot(1,1,1)
+# ax.set_xlabel('Principal Component 1', fontsize = 15)
+# ax.set_ylabel('Principal Component 2', fontsize = 15)
+# ax.set_title('2 component PCA', fontsize = 20)
+# targets = [2,4]
+# colors = ['r', 'g']
+# for target, color in zip(targets,colors):
+#     indicesToKeep = finalDf['class'] == target
+#     ax.scatter(finalDf.loc[indicesToKeep, 'principal component 1']
+#                , finalDf.loc[indicesToKeep, 'principal component 2']
+#                , c = color
+#                , s = 10)
+# ax.legend(targets)
+# ax.grid()
+plt.show()