Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
# -*- coding: utf-8 -*-
"""GKN_5Fold0112_Function_Training.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1Ifye3m1wWbWtGWAK6TOnMLe8uNkVI3MB
"""
# mount for google colab
from google.colab import drive
drive.mount('grad')
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from sklearn.model_selection import KFold, StratifiedKFold
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from keras.layers import Flatten
from keras.models import Model
import datetime
import numpy as np
import csv
from sklearn.metrics import confusion_matrix
from keras.models import Sequential
# Functions:
def ResNet50_Model(IMG_SIZE, FreezeLayer):
# input layer size read in default 224x224
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
MODEL=tf.keras.applications.ResNet50(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet',
pooling='avg')
if FreezeLayer == 0:
MODEL.trainable=True
else:
# freezing layer numbers: first 10 layers freezing is set as default
for layer in MODEL.layers[:FreezeLayer]:
layer.trainable = False
print(layer, layer.trainable)
flat1 = Flatten()(MODEL.layers[-1].output)
output = tf.keras.layers.Dense(len(label_names),activation='softmax')(flat1)
prediction_layer = tf.keras.layers.Dense(len(label_names),activation='softmax')
model = Model(inputs=MODEL.inputs, outputs=output)
return model
def Training_Option(Optimizer,LearningRate):
optimizers_ADAM = tf.keras.optimizers.Adam(learning_rate=LearningRate)
optimizers_SGDM = tf.keras.optimizers.SGD(learning_rate=LearningRate, momentum=0.9)
if Optimizer == 'adam':
model.compile(loss= 'categorical_crossentropy', optimizer=optimizers_ADAM,
metrics=['accuracy'])
elif Optimizer == 'sgdm':
model.compile(loss= 'categorical_crossentropy', optimizer=optimizers_SGDM,
metrics=['accuracy'])
return model
def Model_Training(train_data_generator,valid_data_generator,epoch_size):
history = model.fit_generator(train_data_generator,
epochs=epoch_size,
steps_per_epoch=np.ceil(train_data_generator.samples / train_data_generator.batch_size),
validation_steps=np.ceil(valid_data_generator.samples / valid_data_generator.batch_size),
validation_data=valid_data_generator)
return history
def Plot_loss_accruacy(Show,history):
if Show == True:
plt.plot(history.history['val_loss'], color='r', label = 'Validation loss')
plt.plot(history.history['loss'], color='b', label = 'Training loss')
plt.legend()
plt.title('Loss')
plt.ylabel('Loss value')
plt.xlabel('No. epoch')
plt.show()
# Plot history: Accuracy
plt.plot(history.history['val_accuracy'], color='r',
label='Validation accuracy')
plt.plot(history.history['accuracy'], color='b',
label = 'Training accuracy')
# plt.title('Validation accuracy history')
plt.legend()
plt.title('Accuracy')
plt.ylabel('Accuracy value (%)')
plt.xlabel('No. epoch')
plt.show()
def Write_CSVresult(Show,OutputDir,valid_data_generator,predictions,pred,DATADIR,loss0,history,conf_matrix):
if Show == True:
# false and true option
with open(os.path.join(OutputDir, 'training_output_.csv'), mode='w') as csv_file:
csv_writer = csv.writer(csv_file, delimiter=',',lineterminator='\n',)
csv_writer.writerow(['Image File', 'Label', 'Predicted Label', 'Pred0', 'Pred1'])
for i in range(len(valid_data_generator.filenames)):
csv_writer.writerow([valid_data_generator.filenames[i],valid_data_generator.labels[i],predictions[i], pred[i][0], pred[i][1]])
with open(os.path.join(OutputDir, 'Notes.txt'), mode='w') as txt_file:
txt_file.writelines(['Input Data Directory:\n', DATADIR,"\n"])
txt_file.writelines(["Final Validation loss: {:.2f}".format(loss0), "\n", " Final Validation Accuracy: {:.2f}".format(accuracy0),"\n"])
txt_file.writelines(["Validation Loss over last 25 epochs: {:.2f}".format(np.mean(history.history['val_loss'][-25:-1])), "\n"])
txt_file.writelines(["Validation Accuracy over last 25 epochs: {:.2f}".format(np.mean(history.history['val_accuracy'][-25:-1])), "\n"])
txt_file.writelines(["Confusion Matrix: \n"])
txt_file.writelines(["True Positives: {:.0f}".format(conf_matrix[0][0]), " | False Negatives: {:.0f}".format(conf_matrix[0][1]), "\n"])
txt_file.writelines(["False Positives: {:.0f}".format(conf_matrix[1][0]), " | True Negatives: {:.0f}".format(conf_matrix[1][1]), "\n"])
txt_file.writelines(['Notes:\n', NotesString])
# folder path of images
DATADIR = '/content/grad/My Drive/Colab Notebooks/GKN456_0119_EN'
# file path of csv file
csv_path = '/content/grad/My Drive/Colab Notebooks/GKN456_EN.csv'
# generate k-fold from .csv file
train_data = pd.read_csv(csv_path,dtype=str)
Y = train_data[['label']]
kf = KFold(n_splits = 5,shuffle = True)
skf = StratifiedKFold(n_splits = 5, shuffle = True)
# Read in images with augmentation (flip, shift and scale)
image_generator = ImageDataGenerator(horizontal_flip=True,width_shift_range=(-10,10),height_shift_range=(-10,10),zoom_range=[0.9, 1.1])
save_dir = '/content/grad/My Drive/Colab Notebooks'
fold_var = 1
conf_matrix_list_of_arrays = []
for train_index, val_index in kf.split(Y):
# Notes to be saved with training results
NotesString = """Visible Defect Dataset (all edge cases excluded) Training,
With Alignment and CLAHE enhancement"""
OutputDir = './Training_' + datetime.datetime.now().strftime("%m-%d-%Y_%H-%M")
if not os.path.exists(OutputDir):
os.makedirs(OutputDir)
# read training images and testing images based on k-fold index
training_data = train_data.iloc[train_index]
validation_data = train_data.iloc[val_index]
print(training_data)
train_data_generator = image_generator.flow_from_dataframe(training_data,
directory = DATADIR,
x_col = "filename",
y_col = "label",
batch_size=64,
class_mode = "categorical",
target_size =(224, 224), # Be sure to change target image size when adjusting input layer size
shuffle = True)
valid_data_generator = image_generator.flow_from_dataframe(validation_data,
directory = DATADIR,
x_col = "filename",
y_col = "label",
class_mode = "categorical",
batch_size=64,
target_size =(224, 224), # Be sure to change target image size when adjusting input layer size
shuffle = False )
label_names={'1': 1, '2': 2}
# select model: ResNet50(default), Vgg16 and InceptionV3
IMG_SIZE=224
FreezeLayer = 10
model = ResNet50_Model(IMG_SIZE, FreezeLayer)
for l in model.layers:
print(l.name, l.trainable)
# optimizer with same momentum defaukt values as Matlab
Training_Option('sgdm',0.0001)
# model training
history = Model_Training(train_data_generator,valid_data_generator,150)
# model validation
validation_steps = np.ceil(valid_data_generator.samples / valid_data_generator.batch_size)
loss0, accuracy0 = model.evaluate(valid_data_generator, steps=validation_steps)
print("loss: {:.2f}".format(loss0))
print("accuracy: {:.2f}".format(accuracy0))
# prediction probability
pred=model.predict(valid_data_generator,verbose=1)
y_pred = np.round(pred)
# binary prediction
predictions = y_pred.argmax(axis=1)
# confusion matrix
conf_matrix = confusion_matrix(valid_data_generator.labels, y_pred.argmax(axis=1))
conf_matrix_list_of_arrays.append(conf_matrix)
# serialize model to JSON
model_json = model.to_json()
with open(os.path.join(OutputDir, 'model.json'), mode='w') as json_file:
# with open("model.json", "w") as json_file:
json_file.write(model_json)
# serialize weights to HDF5
Weight_name="model_weight"+str(fold_var)+".h5"
model.save_weights(Weight_name)
print("Saved model to disk")
# write result in .csv, .txt file and plot taining/ validation progress
Write_CSVresult(False,OutputDir,valid_data_generator,predictions,pred,DATADIR,loss0,history,conf_matrix)
Plot_loss_accruacy(True,history)
# kf cross validation setup
tf.keras.backend.clear_session()
fold_var += 1
# complete k-fold cross validation confusion matrix
print(conf_matrix_list_of_arrays)