Permalink
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
CSE5713_DataMiningProject/utils.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
77 lines (66 sloc)
2.64 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import numpy as np | |
from sklearn.metrics import classification_report, confusion_matrix | |
import itertools | |
def read_data(): | |
with open('./breast-cancer-wisconsin.data', 'r') as data_fid: | |
lines = data_fid.readlines() | |
records = [] | |
for line in lines: | |
if '?' in line: | |
line = line.replace('?', '11') | |
line = line.split(',') | |
line = [int(item) for item in line][1:] | |
records.append(line) | |
records = np.array(records) | |
X = records[:, :-1] | |
y = records[:, -1] | |
return X, y | |
def plot_confusion_matrix(y_true, y_pred, classes = ['benign', 'cancer'], | |
normalize=False, | |
title='Confusion matrix', | |
cmap=plt.cm.Blues): | |
""" | |
This function prints and plots the confusion matrix. | |
Normalization can be applied by setting `normalize=True`. | |
""" | |
print('Confusion Matrix') | |
cm = confusion_matrix(y_true, y_pred) | |
print('Classification Report') | |
target_names = ['benigh', 'cancer'] | |
print(classification_report(y_true, y_pred, target_names=target_names)) | |
if normalize: | |
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] | |
print("Normalized confusion matrix") | |
else: | |
print('Confusion matrix, without normalization') | |
print(cm) | |
plt.imshow(cm, interpolation='nearest', cmap=cmap) | |
plt.title(title) | |
plt.colorbar() | |
tick_marks = np.arange(len(classes)) | |
plt.xticks(tick_marks, classes, rotation=45) | |
plt.yticks(tick_marks, classes) | |
fmt = '.2f' if normalize else 'd' | |
thresh = cm.max() / 2. | |
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): | |
plt.text(j, i, format(cm[i, j], fmt), | |
horizontalalignment="center", | |
color="white" if cm[i, j] > thresh else "black") | |
plt.ylabel('True label') | |
plt.xlabel('Predicted label') | |
plt.tight_layout() | |
def save_cm_figs(y_true, y_pred, arc, target_names = ['begign', 'cancer']): | |
# Compute confusion matrix | |
np.set_printoptions(precision=2) | |
# Plot non-normalized confusion matrix | |
plt.figure() | |
plot_confusion_matrix(y_true, y_pred, classes=target_names, | |
title='Confusion matrix, without normalization') | |
plt.savefig('./result/conf_no_norm_{}.png'.format(arc), bbox_inches='tight') | |
# Plot normalized confusion matrix | |
plt.figure() | |
plot_confusion_matrix(y_true, y_pred, classes=target_names, normalize=True, | |
title='Normalized confusion matrix') | |
plt.savefig('./result/conf_norm_{}.png'.format(arc), bbox_inches='tight') | |
plt.show() |