Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Created a function to create the real data dataset from tham's data. …
…Essentially it take Thams data and makes a 3D version of it, by creating new layers from permuting within each column.
  • Loading branch information
rjm11010 committed Dec 17, 2017
1 parent 6142923 commit 134901f
Showing 1 changed file with 138 additions and 79 deletions.
217 changes: 138 additions & 79 deletions gan/gan.py
Expand Up @@ -4,6 +4,7 @@ import os
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn.utils import resample
# import matplotlib.pyplot as plt
# Nuerual Net Building
from keras import layers
Expand All @@ -15,97 +16,155 @@ from keras.utils.generic_utils import Progbar
from keras_adversarial import AdversarialModel, simple_gan, gan_targets
from keras_adversarial import AdversarialOptimizerSimultaneous, normal_latent_sampling

# Load data
base_path = "../mkdataset/datasets/gan_datasets/"
# label_file_name = "tham_human_and_mouse_dataset.csv"
file_name = "new.csv"
all_data = pd.read_csv(os.path.join(base_path, file_name))
# labeled_data = pd.read_csv(os.path.join(label_file_name, file_name))



# Output
output_base_path = './'

# Prepare data
x_train = all_data.iloc[:, np.arange(20)]

# column_start_index_of_genes = 2
# class_label_column_index = 1
# features = all_data.iloc[:, np.arange(column_start_index_of_genes, df.shape[1])]
# labels = all_data.iloc[:, class_label_column_index]
#################################################
# Constants
#################################################

# column_start_index_of_genes = 2
# features = all_data.iloc[:, np.arange(20)]
# labels = labeled_data.iloc[:, class_label_column_index]

# x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.33, random_state=42)

#################################################
# Functions
#################################################

# Data Variables
input_dimension = x_train.shape[1] # Number of features (e.g. genes)
def permute_sample(dataset, new_dataset_num_rows, as_data_frame=False):
"""
Given a 2-D pandas dataframe it will create a permutaiton
of each column.
"""
new_dataset = resample(dataset.iloc[:, 0],
n_samples=new_dataset_num_rows).reshape(new_dataset_num_rows, 1)
for col_index in range(1, dataset.shape[1]):
new_col = resample(dataset.iloc[:, col_index],
n_samples=new_dataset_num_rows).reshape(new_dataset_num_rows, 1)
new_dataset = np.append(new_dataset, new_col, 1)
return pd.DataFrame(new_dataset) if as_data_frame else new_dataset

gen_input_shape = (input_dimension,)
discr_input_shape = (input_dimension,)

epochs = 10
batch_size = x_train.shape[0]
def make_3d_dataset(dataset, new_dataset_num_rows, depth):
"""
Takes a 2 dimensional dataframe and makes it into a
3 dimensional dataframe by creating more 2D dataframes
and putting them together. New dataframes are made by
resampling (with replacement) the columns of the original dataset.
"""
height, width = dataset.shape
new_dataset = permute_sample(dataset, new_dataset_num_rows).reshape(new_dataset_num_rows, width, 1)
for _ in range(depth-1):
new_layer = permute_sample(dataset, new_dataset_num_rows).reshape(new_dataset_num_rows, width, 1)
new_dataset = np.append(new_dataset, new_layer, 2)
return new_dataset

# Build Generative model
generative_model = Sequential()
# generative_model.add(InputLayer(input_shape=gen_input_shape))
generative_model.add(Dense(units=int(1.2*input_dimension), activation='relu', input_dim=input_dimension))
generative_model.add(Dropout(rate=0.2, noise_shape=None, seed=15))
generative_model.add(Dense(units=int(0.2*input_dimension), activation='relu'))
generative_model.add(Dense(units=input_dimension, activation='relu'))
generative_model.add(Reshape(discr_input_shape))

# Build Discriminator model
discriminator_model = Sequential()
discriminator_model.add(InputLayer(input_shape=discr_input_shape))
discriminator_model.add(Dense(units=int(1.2*input_dimension), activation='relu'))
discriminator_model.add(Dropout(rate=0.2, noise_shape=None, seed=75))
discriminator_model.add(Dense(units=int(0.2*input_dimension), activation='relu'))
discriminator_model.add(Dense(units=1, activation='sigmoid'))

# Build GAN
gan = simple_gan(generative_model, discriminator_model, normal_latent_sampling((input_dimension, )))
model = AdversarialModel(base_model=gan,
player_params=[generative_model.trainable_weights,
discriminator_model.trainable_weights],
player_names=['generator', 'discriminator'])
# Other optimizer to try AdversarialOptimizerAlternating
model.adversarial_compile(adversarial_optimizer=AdversarialOptimizerSimultaneous(),
player_optimizers=['adam', 'adam'], loss='binary_crossentropy')

# Print Summary of Models
generative_model.summary()
discriminator_model.summary()
gan.summary()

# Train
# gan_targets takes as inputs the # of samples
training_record = model.fit(x=x_train, y=gan_targets(x_train.shape[0]), epochs=epochs,
batch_size=batch_size)
#################################################
# Load data
#################################################

# Diplay plot of loss over training
# plt.plot(history.history['player_0_loss'])
# plt.plot(history.history['player_1_loss'])
# plt.plot(history.history['loss'])
base_path = "../mkdataset/datasets/gan_datasets/"
# label_file_name = "tham_human_and_mouse_dataset.csv"
file_name = "tham_lasso_dataset.csv"
all_data = pd.read_csv(os.path.join(base_path, file_name))

# Predict (i.e. produce new samples)
zsamples = np.random.normal(size=(1, input_dimension))
pred = generative_model.predict(zsamples)
print(pred)
#---------------------------------------
# Prepare Data
#---------------------------------------

# Save new samples to file
# new_samples = pd.DataFrame(pred)
# new_samples.to_csv(os.path.join(output_base_path, 'new_samples.csv'))
just_values = all_data.iloc[:, 3:]
new_sample = permute_sample(just_values, 5)
real_dataset = make_3d_dataset(just_values, 5, 2)
print(real_dataset.shape)
print(real_dataset)

# # save training_record
# df = pd.DataFrame(training_record.history)
# df.to_csv(os.path.join(output_base_path, 'training_record.csv'))
# # Prepare data
# x_train = all_data.iloc[:, np.arange(20)]
#
# # column_start_index_of_genes = 2
# # class_label_column_index = 1
# # features = all_data.iloc[:, np.arange(column_start_index_of_genes, df.shape[1])]
# # labels = all_data.iloc[:, class_label_column_index]
#
# # column_start_index_of_genes = 2
# # features = all_data.iloc[:, np.arange(20)]
# # labels = labeled_data.iloc[:, class_label_column_index]
#
# # x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.33, random_state=42)
#
#
# #################################################
# # Variables
# #################################################
#
# # Output
# output_base_path = './'
#
# #---------------------------------------
# # Network Variables
# #---------------------------------------
#
# input_dimension = x_train.shape[1] # Number of features (e.g. genes)
#
# gen_input_shape = (input_dimension,)
# discr_input_shape = (input_dimension,)
#
# epochs = 10
# batch_size = x_train.shape[0]
#
# # Build Generative model
# generative_model = Sequential()
# # generative_model.add(InputLayer(input_shape=gen_input_shape))
# generative_model.add(Dense(units=int(1.2*input_dimension), activation='relu', input_dim=input_dimension))
# generative_model.add(Dropout(rate=0.2, noise_shape=None, seed=15))
# generative_model.add(Dense(units=int(0.2*input_dimension), activation='relu'))
# generative_model.add(Dense(units=input_dimension, activation='relu'))
# generative_model.add(Reshape(discr_input_shape))
#
# # Build Discriminator model
# discriminator_model = Sequential()
# discriminator_model.add(InputLayer(input_shape=discr_input_shape))
# discriminator_model.add(Dense(units=int(1.2*input_dimension), activation='relu'))
# discriminator_model.add(Dropout(rate=0.2, noise_shape=None, seed=75))
# discriminator_model.add(Dense(units=int(0.2*input_dimension), activation='relu'))
# discriminator_model.add(Dense(units=1, activation='sigmoid'))
#
# # Build GAN
# gan = simple_gan(generative_model, discriminator_model, normal_latent_sampling((input_dimension, )))
# model = AdversarialModel(base_model=gan,
# player_params=[generative_model.trainable_weights,
# discriminator_model.trainable_weights],
# player_names=['generator', 'discriminator'])
# # Other optimizer to try AdversarialOptimizerAlternating
# model.adversarial_compile(adversarial_optimizer=AdversarialOptimizerSimultaneous(),
# player_optimizers=['adam', 'adam'], loss='binary_crossentropy')
#
# # Print Summary of Models
# generative_model.summary()
# discriminator_model.summary()
# gan.summary()
#
# # Train
# # gan_targets takes as inputs the # of samples
# training_record = model.fit(x=x_train, y=gan_targets(x_train.shape[0]), epochs=epochs,
# batch_size=batch_size)
#
# # Diplay plot of loss over training
# # plt.plot(history.history['player_0_loss'])
# # plt.plot(history.history['player_1_loss'])
# # plt.plot(history.history['loss'])
#
# # Predict (i.e. produce new samples)
# zsamples = np.random.normal(size=(1, input_dimension))
# pred = generative_model.predict(zsamples)
# print(pred)
#
# # Save new samples to file
# # new_samples = pd.DataFrame(pred)
# # new_samples.to_csv(os.path.join(output_base_path, 'new_samples.csv'))
#
# # save models
# generator.save(os.path.join(output_base_path, 'generator.h5'))
# discriminator.save(os.path.join(output_base_path, "discriminator.h5"))
# # # save training_record
# # df = pd.DataFrame(training_record.history)
# # df.to_csv(os.path.join(output_base_path, 'training_record.csv'))
# #
# # # save models
# # generator.save(os.path.join(output_base_path, 'generator.h5'))
# # discriminator.save(os.path.join(output_base_path, "discriminator.h5"))

0 comments on commit 134901f

Please sign in to comment.