Skip to content

Commit

Permalink
Merge pull request #21 from lrm22005/Luis
Browse files Browse the repository at this point in the history
Luis
  • Loading branch information
lrm22005 committed Apr 18, 2024
2 parents 3881e71 + f8fcd3e commit 3936b55
Show file tree
Hide file tree
Showing 8 changed files with 96 additions and 3 deletions.
Binary file not shown.
28 changes: 27 additions & 1 deletion BML_project/active_learning/ss_active_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,30 @@ def run_minibatch_kmeans(data_loader, n_clusters, device, batch_size=100):

return minibatch_kmeans

""" from sklearn.cluster import MiniBatchKMeans
import numpy as np
def run_minibatch_kmeans(data_loader, n_clusters, device, batch_size=100, n_init='auto'):
# Initialize MiniBatchKMeans with explicit n_init to suppress the FutureWarning
minibatch_kmeans = MiniBatchKMeans(n_clusters=n_clusters, n_init=n_init, random_state=0, batch_size=batch_size)
# Prepare an empty list to collect all data for fitting
all_data = []
# Iterate through data_loader and collect data
for batch in data_loader:
# Assuming 'data' is a key in your batch dict that contains the features
data = batch['data'].view(batch['data'].size(0), -1).cpu().numpy() # Adjust as necessary
all_data.append(data)
# Concatenate all data collected from the batches
all_data_np = np.concatenate(all_data, axis=0)
# Fit MiniBatchKMeans with all collected data at once
minibatch_kmeans.fit(all_data_np)
return minibatch_kmeans """

# def compare_kmeans_gp_predictions(kmeans_model, gp_model, data_loader, device):
# # Compare K-Means with GP model predictions
# all_data, all_labels = [], []
Expand All @@ -80,7 +104,9 @@ def stochastic_compare_kmeans_gp_predictions(kmeans_model, gp_model, data_loader
kmeans_predictions = kmeans_model.predict(data.cpu().numpy())
all_labels.append(labels.cpu().numpy())
all_data.append((gp_predictions, kmeans_predictions))

print(f"Processed batch size: {len(current_batch_labels)}, Cumulative original_labels size: {len(original_labels)}, Cumulative gp_predictions size: {len(gp_predictions)}")
if len(current_batch_labels) < expected_batch_size:
print(f"Last batch processed with size: {len(current_batch_labels)}")
return all_data, np.concatenate(all_labels)

import random
Expand Down
64 changes: 64 additions & 0 deletions BML_project/main_checkpoints_updated.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 7 15:34:31 2024
@author: lrm22005
"""
import os
import tqdm
import torch
from utils.data_loader import preprocess_data, split_uids, update_train_loader_with_uncertain_samples
from models.ss_gp_model import MultitaskGPModel, train_gp_model
from utils_gp.ss_evaluation import stochastic_evaluation, evaluate_model_on_all_data
from active_learning.ss_active_learning import stochastic_uncertainty_sampling, run_minibatch_kmeans, stochastic_compare_kmeans_gp_predictions, label_samples
from utils.visualization import plot_comparative_results, plot_training_performance, plot_results

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def main():
n_classes = 4
batch_size = 1024
clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled = split_uids()
data_format = 'pt'

train_loader, val_loader, test_loader = preprocess_data(data_format, clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled, batch_size)

# Initialize result storage
results = {
'train_loss': [],
'validation_metrics': {'precision': [], 'recall': [], 'f1': [], 'auc_roc': []},
'active_learning': {'validation_metrics': []}, # Store validation metrics for each active learning iteration
'test_metrics': None
}

# Initial model training
model, likelihood, training_metrics = train_gp_model(train_loader, val_loader, num_iterations=50, n_classes=n_classes, patience=10, checkpoint_path='model_checkpoint_full.pt')

# Save initial training metrics
results['train_loss'].extend(training_metrics['train_loss'])
for metric in ['precision', 'recall', 'f1_score']:
results['validation_metrics'][metric].extend(training_metrics[metric])

active_learning_iterations = 10
for iteration in tqdm.tqdm(range(active_learning_iterations), desc='Active Learning', unit='iteration'):
uncertain_sample_indices = stochastic_uncertainty_sampling(model, likelihood, val_loader, n_samples=batch_size, device=device)
train_loader = update_train_loader_with_uncertain_samples(train_loader, uncertain_sample_indices, batch_size)

# Re-train the model with updated training data
model, likelihood, val_metrics = train_gp_model(train_loader, val_loader, num_iterations=10, n_classes=n_classes, patience=10, checkpoint_path='model_checkpoint_last.pt')

# Store validation metrics for each active learning iteration
results['active_learning']['validation_metrics'].append(val_metrics)

# Final evaluations
test_metrics = evaluate_model_on_all_data(model, likelihood, test_loader, device, n_classes)
results['test_metrics'] = test_metrics

# Visualization of results
plot_training_performance(results['train_loss'], results['validation_metrics'])
plot_results(results['test_metrics']) # Adjust this function to handle the structure of test_metrics

print("Final Test Metrics:", results['test_metrics'])

if __name__ == "__main__":
main()
Binary file modified BML_project/models/__pycache__/ss_gp_model.cpython-311.pyc
Binary file not shown.
7 changes: 5 additions & 2 deletions BML_project/ss_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
"""
import tqdm
import torch
from utils.data_loader import preprocess_data, split_uids, update_train_loader_with_uncertain_samples
from utils_gp.data_loader import preprocess_data, split_uids, update_train_loader_with_uncertain_samples
from models.ss_gp_model import MultitaskGPModel, train_gp_model
from utils_gp.ss_evaluation import stochastic_evaluation, evaluate_model_on_all_data
from active_learning.ss_active_learning import stochastic_uncertainty_sampling, run_minibatch_kmeans, stochastic_compare_kmeans_gp_predictions
from utils.visualization import plot_comparative_results, plot_training_performance, plot_results
from utils_gp.visualization import plot_comparative_results, plot_training_performance, plot_results

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Expand Down Expand Up @@ -50,6 +50,7 @@ def main():

# Update the training loader with uncertain samples
train_loader = update_train_loader_with_uncertain_samples(train_loader, uncertain_sample_indices, batch_size)
print(f"Updated training data size: {len(train_loader.dataset)}")

# Re-train the model with the updated training data
model, likelihood, val_metrics = train_gp_model(train_loader, val_loader, num_iterations=10, n_classes=n_classes, patience=10, checkpoint_path='model_checkpoint_last.pt')
Expand All @@ -71,6 +72,8 @@ def main():

results['test_metrics'] = test_metrics
test_gp_vs_kmeans_data, test_original_labels = stochastic_compare_kmeans_gp_predictions(test_kmeans_model, model, test_loader, n_batches=5, device=device)

print(f"Length of original_labels: {len(original_labels)}, Length of gp_predictions: {len(gp_predictions)}")
plot_comparative_results(test_gp_vs_kmeans_data, test_original_labels)

# Visualization of results
Expand Down
Binary file modified BML_project/utils_gp/__pycache__/data_loader.cpython-311.pyc
Binary file not shown.
Binary file modified BML_project/utils_gp/__pycache__/ss_evaluation.cpython-311.pyc
Binary file not shown.
Binary file modified BML_project/utils_gp/__pycache__/visualization.cpython-311.pyc
Binary file not shown.

0 comments on commit 3936b55

Please sign in to comment.