diff --git a/BML_project/active_learning/__pycache__/ss_active_learning.cpython-311.pyc b/BML_project/active_learning/__pycache__/ss_active_learning.cpython-311.pyc
new file mode 100644
index 0000000..45c9875
Binary files /dev/null and b/BML_project/active_learning/__pycache__/ss_active_learning.cpython-311.pyc differ
diff --git a/BML_project/active_learning/ss_active_learning.py b/BML_project/active_learning/ss_active_learning.py
new file mode 100644
index 0000000..546c4ad
--- /dev/null
+++ b/BML_project/active_learning/ss_active_learning.py
@@ -0,0 +1,120 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Dec 18 18:23:23 2023
+
+@author: lrm22005
+"""
+import numpy as np
+import random
+import torch
+from torch.utils.data import DataLoader
+from sklearn.cluster import MiniBatchKMeans
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+def label_samples(uncertain_samples, validation_data):
+    labels = [validation_data[sample_id]['label'] for sample_id in uncertain_samples]
+    return uncertain_samples, labels
+
+def stochastic_uncertainty_sampling(gp_model, gp_likelihood, val_loader, n_samples, n_batches, n_components=2):
+    gp_model.eval()
+    gp_likelihood.eval()
+    uncertain_sample_indices = []
+    sampled_batches = random.sample(list(val_loader), n_batches)  # Randomly sample n_batches from val_loader
+    
+    with torch.no_grad():
+        for batch in sampled_batches:
+            # reduced_data = apply_tsne(batch['data'].reshape(batch['data'].size(0), -1), n_components=n_components)
+            # reduced_data_tensor = torch.Tensor(reduced_data).to(device)
+            reduced_data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device)
+            predictions = gp_likelihood(gp_model(reduced_data_tensor))
+            var = predictions.variance
+            top_indices = torch.argsort(-var.flatten())[:n_samples]
+            uncertain_sample_indices.extend(top_indices.cpu().numpy())
+    
+    return uncertain_sample_indices[:n_samples]
+
+# def uncertainty_sampling(gp_model, gp_likelihood, val_loader, n_samples, n_components=2):
+#     gp_model.eval()
+#     gp_likelihood.eval()
+#     uncertain_sample_indices = []
+#     with torch.no_grad():
+#         for batch_idx, batch in tqdm(enumerate(val_loader), desc='Uncertainty Sampling', unit='batch'):
+#             reduced_data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device)
+#             predictions = gp_likelihood(gp_model(reduced_data_tensor))
+#             var = predictions.variance
+#             top_indices = torch.argsort(-var.flatten())[:n_samples]
+#             batch_uncertain_indices = [batch_idx * val_loader.batch_size + idx for idx in top_indices]
+#             uncertain_sample_indices.extend(batch_uncertain_indices)
+#     return uncertain_sample_indices[:n_samples]
+
+def run_minibatch_kmeans(data_loader, n_clusters, device, batch_size=100):
+    # Initialize MiniBatchKMeans
+    minibatch_kmeans = MiniBatchKMeans(n_clusters=n_clusters, random_state=0, batch_size=batch_size)
+
+    # Iterate through data_loader and fit MiniBatchKMeans
+    for batch in data_loader:
+        data = batch['data'].view(batch['data'].size(0), -1).to(device).cpu().numpy()
+        minibatch_kmeans.partial_fit(data)
+
+    return minibatch_kmeans
+
+# def compare_kmeans_gp_predictions(kmeans_model, gp_model, data_loader, device):
+#     # Compare K-Means with GP model predictions
+#     all_data, all_labels = [], []
+#     for batch in data_loader:
+#         data = batch['data'].view(batch['data'].size(0), -1).to(device)
+#         labels = batch['label'].to(device)
+#         gp_predictions = gp_model(data).mean.argmax(dim=0).cpu().numpy()
+#         kmeans_predictions = kmeans_model.predict(data.cpu().numpy())
+#         all_labels.append(labels.cpu().numpy())
+#         all_data.append((gp_predictions, kmeans_predictions))
+#     return all_data, np.concatenate(all_labels)
+
+def stochastic_compare_kmeans_gp_predictions(kmeans_model, gp_model, data_loader, n_batches, device):
+    all_data, all_labels = [], []
+    sampled_batches = random.sample(list(data_loader), n_batches)  # Randomly sample n_batches from data_loader
+    
+    for batch in sampled_batches:
+        data = batch['data'].view(batch['data'].size(0), -1).to(device)
+        labels = batch['label'].to(device)
+        gp_predictions = gp_model(data).mean.argmax(dim=0).cpu().numpy()
+        kmeans_predictions = kmeans_model.predict(data.cpu().numpy())
+        all_labels.append(labels.cpu().numpy())
+        all_data.append((gp_predictions, kmeans_predictions))
+    
+    return all_data, np.concatenate(all_labels)
+
+import random
+
+def refined_uncertainty_sampling(gp_model, gp_likelihood, kmeans_model, data_loader, n_samples, n_batches, uncertainty_threshold=0.2):
+    gp_model.eval()
+    gp_likelihood.eval()
+    uncertain_sample_indices = []
+
+    # Calculate the total number of batches in the DataLoader
+    total_batches = len(data_loader)
+    
+    # Ensure that n_batches does not exceed total_batches
+    n_batches = min(n_batches, total_batches)
+
+    # Randomly sample n_batches from data_loader
+    sampled_batches = random.sample(list(data_loader), n_batches)
+    
+    with torch.no_grad():
+        for batch in sampled_batches:
+            data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device)
+            gp_predictions = gp_likelihood(gp_model(data_tensor))
+            kmeans_predictions = kmeans_model.predict(data_tensor.cpu().numpy())
+
+            # Calculate the difference between K-means and GP predictions
+            disagreement = (gp_predictions.mean.argmax(dim=-1).cpu().numpy() != kmeans_predictions).astype(int)
+
+            # Calculate uncertainty based on variance of GP predictions
+            uncertainty = gp_predictions.variance.cpu().numpy()
+
+            # Select samples where the disagreement is high and the model is uncertain
+            uncertain_indices = np.where((disagreement > 0) & (uncertainty > uncertainty_threshold))[0]
+            uncertain_sample_indices.extend(uncertain_indices)
+    
+    return uncertain_sample_indices[:n_samples]
diff --git a/BML_project/models/__pycache__/ss_gp_model.cpython-311.pyc b/BML_project/models/__pycache__/ss_gp_model.cpython-311.pyc
new file mode 100644
index 0000000..7ea0e5a
Binary files /dev/null and b/BML_project/models/__pycache__/ss_gp_model.cpython-311.pyc differ
diff --git a/BML_project/models/ss_gp_model.py b/BML_project/models/ss_gp_model.py
new file mode 100644
index 0000000..c18f06f
--- /dev/null
+++ b/BML_project/models/ss_gp_model.py
@@ -0,0 +1,198 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Dec 18 18:01:41 2023
+
+@author: lrm22005
+"""
+import numpy as np
+from tqdm import tqdm
+import torch
+import gpytorch
+from sklearn.metrics import precision_recall_fscore_support, roc_auc_score
+from sklearn.preprocessing import label_binarize
+
+num_latents = 6  # This should match the complexity of your data or the number of tasks
+num_tasks = 4    # This should match the number of output classes or tasks
+num_inducing_points = 50  # This is independent and should be sufficient for the input space
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+class MultitaskGPModel(gpytorch.models.ApproximateGP):
+    def __init__(self):
+        # Let's use a different set of inducing points for each latent function
+        inducing_points = torch.rand(num_latents, num_inducing_points, 127 * 128)  # Assuming flattened 128x128 images
+
+        # We have to mark the CholeskyVariationalDistribution as batch
+        # so that we learn a variational distribution for each task
+        variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(
+            inducing_points.size(-2), batch_shape=torch.Size([num_latents])
+        )
+
+        # We have to wrap the VariationalStrategy in a LMCVariationalStrategy
+        # so that the output will be a MultitaskMultivariateNormal rather than a batch output
+        variational_strategy = gpytorch.variational.LMCVariationalStrategy(
+            gpytorch.variational.VariationalStrategy(
+                self, inducing_points, variational_distribution, learn_inducing_locations=True
+            ),
+            num_tasks=num_tasks,
+            num_latents=num_latents,
+            latent_dim=-1
+        )
+
+        super().__init__(variational_strategy)
+
+        # The mean and covariance modules should be marked as batch
+        # so we learn a different set of hyperparameters
+        self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([num_latents]))
+        self.covar_module = gpytorch.kernels.ScaleKernel(
+            gpytorch.kernels.RBFKernel(batch_shape=torch.Size([num_latents])),
+            batch_shape=torch.Size([num_latents])
+        )
+
+    def forward(self, x):
+        # The forward function should be written as if we were dealing with each output
+        # dimension in batch
+        # Ensure x is correctly shaped. It should have the same last dimension size as inducing_points
+        # x should be reshaped or sliced to have the shape [?, 1] where ? can be any size
+        # For example, if x originally has shape [N, D], and D != 1, you need to modify x accordingly
+        # print(f"Input shape: {x.shape}")
+        # x = x.view(x.size(0), -1)  # Flattening the images
+        # print(f"Input shape after flattening: {x.shape}")  # Debugging input shape
+        mean_x = self.mean_module(x)
+        covar_x = self.covar_module(x)
+    
+        # Debugging: Print shapes of intermediate outputs
+        # print(f"Mean shape: {mean_x.shape}, Covariance shape: {covar_x.shape}")
+        latent_pred = gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
+        # print(f"Latent prediction shape: {latent_pred.mean.shape}, {latent_pred.covariance_matrix.shape}")
+
+        return latent_pred
+
+
+def train_gp_model(train_loader, val_loader, num_iterations=50, n_classes=4, patience=10, checkpoint_path='model_checkpoint_full.pt'):
+    model = MultitaskGPModel().to(device)
+    likelihood = gpytorch.likelihoods.SoftmaxLikelihood(num_features=4, num_classes=4).to(device)
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
+    mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_loader.dataset))
+    best_val_loss = float('inf')
+    epochs_no_improve = 0
+
+    metrics = {
+        'precision': [],
+        'recall': [],
+        'f1_score': [],
+        'auc_roc': [],
+        'train_loss': []  # Add a list to store training losses
+    }
+
+    for epoch in tqdm(range(num_iterations), desc='Training', unit='epoch', leave=False):
+        for train_batch in train_loader:
+            model.train()
+            likelihood.train()
+            optimizer.zero_grad()
+            train_x = train_batch['data'].reshape(train_batch['data'].size(0), -1).to(device)  # Use reshape here
+            train_y = train_batch['label'].to(device)
+            output = model(train_x)
+            loss = -mll(output, train_y)
+            metrics['train_loss'].append(loss.item())  # Store the training loss
+            loss.backward()
+            optimizer.step()
+
+        # Stochastic validation
+        model.eval()
+        likelihood.eval()
+        with torch.no_grad():
+            val_indices = torch.randperm(len(val_loader.dataset))[:int(1 * len(val_loader.dataset))]
+            val_loss = 0.0
+            val_labels = []
+            val_predictions = []
+            for idx in val_indices:
+                val_batch = val_loader.dataset[idx]
+                val_x = val_batch['data'].reshape(-1).unsqueeze(0).to(device)  # Use reshape here
+                val_y = torch.tensor([val_batch['label']], device=device)
+                val_output = model(val_x)
+                val_loss_batch = -mll(val_output, val_y).sum()
+                val_loss += val_loss_batch.item()
+                val_labels.append(val_y.item())
+                val_predictions.append(val_output.mean.argmax(dim=-1).item())
+
+            precision, recall, f1, _ = precision_recall_fscore_support(val_labels, val_predictions, average='macro')
+            # auc_roc = roc_auc_score(label_binarize(val_labels, classes=np.arange(n_classes)),
+            #                         label_binarize(val_predictions, classes=np.arange(n_classes)),
+            #                         multi_class='ovr')
+
+            metrics['precision'].append(precision)
+            metrics['recall'].append(recall)
+            metrics['f1_score'].append(f1)
+            # metrics['auc_roc'].append(auc_roc)
+            val_loss /= len(val_indices)
+
+        if val_loss < best_val_loss:
+            best_val_loss = val_loss
+            epochs_no_improve = 0
+            torch.save({'model_state_dict': model.state_dict(),
+                        'likelihood_state_dict': likelihood.state_dict(),
+                        'optimizer_state_dict': optimizer.state_dict()}, checkpoint_path)
+        else:
+            epochs_no_improve += 1
+            if epochs_no_improve >= patience:
+                print(f"Early stopping triggered at epoch {epoch+1}")
+                break
+
+    checkpoint = torch.load(checkpoint_path)
+    model.load_state_dict(checkpoint['model_state_dict'])
+    likelihood.load_state_dict(checkpoint['likelihood_state_dict'])
+    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+
+    return model, likelihood, metrics
+
+def semi_supervised_labeling(kmeans_model, gp_model, gp_likelihood, data_loader, confidence_threshold=0.8):
+    gp_model.eval()
+    gp_likelihood.eval()
+    labeled_samples = []
+
+    with torch.no_grad():
+        for batch in data_loader:
+            data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device)
+            kmeans_predictions = kmeans_model.predict(data_tensor.cpu().numpy())
+            gp_predictions = gp_likelihood(gp_model(data_tensor))
+            
+            # Use GP predictions where the model is confident
+            confident_indices = gp_predictions.confidence().cpu().numpy() > confidence_threshold
+            for i, confident in enumerate(confident_indices):
+                if confident:
+                    labeled_samples.append((data_tensor[i], gp_predictions.mean.argmax(dim=-1)[i].item()))
+                else:
+                    labeled_samples.append((data_tensor[i], kmeans_predictions[i]))
+
+    return labeled_samples
+
+def calculate_elbo(model, likelihood, data_loader):
+    """
+    Calculates the ELBO (Evidence Lower Bound) score for the model on the given data.
+
+    Args:
+    - model: The trained Gaussian Process model.
+    - likelihood: The likelihood associated with the GP model.
+    - data_loader: DataLoader providing the data over which to calculate ELBO.
+
+    Returns:
+    - elbo_score: The calculated ELBO score.
+    """
+    model.eval()
+    likelihood.eval()
+    mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(data_loader.dataset))
+
+    with torch.no_grad():
+        elbo_score = 0.0
+        for batch in data_loader:
+            train_x = batch['data'].reshape(batch['data'].size(0), -1).to(device)
+            train_y = batch['label'].to(device)
+            output = model(train_x)
+            # Calculate the ELBO as the negative loss
+            elbo_score += -mll(output, train_y).sum().item()
+
+        # Average the ELBO over all data samples
+        elbo_score /= len(data_loader.dataset)
+
+    return elbo_score
diff --git a/BML_project/ss_main.py b/BML_project/ss_main.py
new file mode 100644
index 0000000..a610684
--- /dev/null
+++ b/BML_project/ss_main.py
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Dec 18 18:47:27 2023
+
+@author: lrm22005
+"""
+import tqdm
+import torch
+from utils.data_loader import preprocess_data, split_uids, update_train_loader_with_uncertain_samples
+from models.ss_gp_model import MultitaskGPModel, train_gp_model
+from utils_gp.ss_evaluation import stochastic_evaluation, evaluate_model_on_all_data
+from active_learning.ss_active_learning import stochastic_uncertainty_sampling, run_minibatch_kmeans, stochastic_compare_kmeans_gp_predictions
+from utils.visualization import plot_comparative_results, plot_training_performance, plot_results
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+def main():
+    # Set parameters like n_classes, batch_size, etc.
+    n_classes = 4
+    batch_size = 1024
+    clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled = split_uids()
+    data_format = 'pt'
+    # Preprocess data
+    train_loader, val_loader, test_loader = preprocess_data(data_format, clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled, batch_size)
+
+    kmeans_model = run_minibatch_kmeans(train_loader, n_clusters=n_classes, device=device)
+
+    # Initialize result storage
+    results = {
+        'train_loss': [],
+        'validation_metrics': {'precision': [], 'recall': [], 'f1': [], 'auc_roc': []},
+        'test_metrics': None
+    }
+    
+    # Initial model training
+    model, likelihood, training_metrics = train_gp_model(train_loader, val_loader, num_iterations=50, n_classes=n_classes)
+
+    # Save the training metrics for future visualization
+    results['train_loss'].extend(training_metrics['train_loss'])
+    results['validation_metrics']['precision'].extend(training_metrics['precision'])
+    results['validation_metrics']['recall'].extend(training_metrics['recall'])
+    results['validation_metrics']['f1'].extend(training_metrics['f1_score'])
+    # results['validation_metrics']['auc_roc'].extend(training_metrics['auc_roc'])
+
+    active_learning_iterations = 10
+    # Active Learning Iterations
+    for iteration in tqdm(range(active_learning_iterations), desc='Active Learning', unit='iteration', leave=True):
+        # Perform uncertainty sampling to select new samples from the validation set
+        uncertain_sample_indices = stochastic_uncertainty_sampling(model, likelihood, val_loader, n_samples=batch_size, n_batches=5)
+
+        # Update the training loader with uncertain samples
+        train_loader = update_train_loader_with_uncertain_samples(train_loader, uncertain_sample_indices, batch_size)
+
+        # Re-train the model with the updated training data
+        model, likelihood, val_metrics = train_gp_model(train_loader, val_loader, num_iterations=10, n_classes=n_classes, patience=10, checkpoint_path='model_checkpoint_last.pt')
+
+        # Store the validation metrics after each active learning iteration
+        results['validation_metrics']['precision'].append(val_metrics['precision'])
+        results['validation_metrics']['recall'].append(val_metrics['recall'])
+        results['validation_metrics']['f1'].append(val_metrics['f1'])
+        # results['validation_metrics']['auc_roc'].append(val_metrics['auc_roc'])
+
+    # Compare K-Means with GP model predictions after retraining
+    gp_vs_kmeans_data, original_labels = stochastic_compare_kmeans_gp_predictions(kmeans_model, model, train_loader, n_batches=5, device=device)
+    
+    plot_comparative_results(gp_vs_kmeans_data, original_labels)
+
+    # Final evaluation on test set
+    test_metrics = evaluate_model_on_all_data(model, likelihood, test_loader, device, n_classes)
+    test_kmeans_model = run_minibatch_kmeans(test_loader, n_clusters=n_classes, device=device)
+
+    results['test_metrics'] = test_metrics
+    test_gp_vs_kmeans_data, test_original_labels = stochastic_compare_kmeans_gp_predictions(test_kmeans_model, model, test_loader, n_batches=5, device=device)
+    plot_comparative_results(test_gp_vs_kmeans_data, test_original_labels)
+
+    # Visualization of results
+    plot_training_performance(results['train_loss'], results['validation_metrics'])
+    plot_results(results['test_metrics'])
+
+    # Print final test metrics
+    print("Final Test Metrics:", results['test_metrics'])
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/BML_project/ss_main_ss.py b/BML_project/ss_main_ss.py
new file mode 100644
index 0000000..0d0aed4
--- /dev/null
+++ b/BML_project/ss_main_ss.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Jan  4 14:40:13 2024
+
+@author: lrm22005
+"""
+from tqdm import tqdm
+import torch
+from utils_gp.data_loader import preprocess_data, split_uids, update_train_loader_with_labeled_samples, update_train_loader_with_uncertain_samples
+from models.ss_gp_model import MultitaskGPModel, train_gp_model, semi_supervised_labeling, calculate_elbo
+from utils_gp.ss_evaluation import stochastic_evaluation, evaluate_model_on_all_data, threshold_based_labeling, resolve_conflicts
+from active_learning.ss_active_learning import run_minibatch_kmeans, stochastic_compare_kmeans_gp_predictions, refined_uncertainty_sampling
+from utils_gp.visualization import plot_comparative_results, plot_training_performance, plot_results
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+def main():
+    # Set parameters like n_classes, batch_size, etc.
+    n_classes = 4
+    batch_size = 1024
+    clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled = split_uids()
+    data_format = 'pt'
+    
+    # Preprocess data
+    train_loader, val_loader, test_loader = preprocess_data(data_format, clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled, batch_size)
+
+    kmeans_model = run_minibatch_kmeans(train_loader, n_clusters=n_classes, device=device, batch_size=batch_size)
+
+    # Initialize result storage
+    results = {
+        'train_loss': [],
+        'validation_metrics': {'precision': [], 'recall': [], 'f1': [], 'auc_roc': []},
+        'test_metrics': None
+    }
+    
+    # Initial model training
+    model, likelihood, training_metrics = train_gp_model(train_loader, val_loader, num_iterations=50, n_classes=n_classes)
+
+    # Save the training metrics for future visualization
+    results['train_loss'].extend(training_metrics['train_loss'])
+    results['validation_metrics']['precision'].extend(training_metrics['precision'])
+    results['validation_metrics']['recall'].extend(training_metrics['recall'])
+    results['validation_metrics']['f1'].extend(training_metrics['f1_score'])
+
+    elbo_threshold = -0.5  # Define a threshold for the ELBO score
+    use_threshold_labeling = False  # Initially, do not use threshold-based labeling
+
+    active_learning_iterations = 10
+    # Active Learning Iterations
+    for iteration in tqdm(range(active_learning_iterations), desc='Active Learning', unit='iteration', leave=True):
+        # Perform uncertainty sampling to select new samples from the validation set
+        uncertain_sample_indices = refined_uncertainty_sampling(model, likelihood, kmeans_model, val_loader, n_samples=batch_size, n_batches=5)
+
+        # Semi-supervised labeling with K-means and GP model
+        semi_supervised_samples = semi_supervised_labeling(kmeans_model, model, likelihood, val_loader)
+
+        labeled_samples = semi_supervised_samples  # Initially, use only semi-supervised samples
+
+        if use_threshold_labeling:
+            # Threshold-based labeling to decide when a sample's predicted label should be trusted
+            threshold_based_samples = threshold_based_labeling(kmeans_model, model, likelihood, val_loader)
+
+            # Combine the two sets of labeled samples
+            # Implement your logic for resolving conflicts between the two methods here
+            labeled_samples = resolve_conflicts(semi_supervised_samples, threshold_based_samples)
+
+        # Update the training loader with uncertain and newly labeled samples
+        train_loader = update_train_loader_with_uncertain_samples(train_loader, uncertain_sample_indices, batch_size)
+        train_loader = update_train_loader_with_labeled_samples(train_loader, labeled_samples, batch_size)
+
+        # Re-train the model with the updated training data
+        model, likelihood, training_metrics = train_gp_model(train_loader, val_loader, num_iterations=10, n_classes=n_classes, patience=10, checkpoint_path='model_checkpoint_last.pt')
+
+        # Store the ELBO score after each active learning iteration
+        current_elbo = calculate_elbo(model, likelihood, train_loader)
+        results['elbo'].append(current_elbo)
+
+        # Determine if the threshold-based labeling should be used in the next iteration based on the ELBO score
+        if current_elbo >= elbo_threshold:
+            use_threshold_labeling = True
+
+    # Compare K-Means with GP model predictions after retraining
+    gp_vs_kmeans_data, original_labels = stochastic_compare_kmeans_gp_predictions(kmeans_model, model, train_loader, n_batches=5, device=device)
+    
+    plot_comparative_results(gp_vs_kmeans_data, original_labels)
+
+    # Final evaluation on test set
+    test_metrics = evaluate_model_on_all_data(model, likelihood, test_loader, device, n_classes)
+    test_kmeans_model = run_minibatch_kmeans(test_loader, n_clusters=n_classes, device=device)
+
+    results['test_metrics'] = test_metrics
+    test_gp_vs_kmeans_data, test_original_labels = stochastic_compare_kmeans_gp_predictions(test_kmeans_model, model, test_loader, n_batches=5, device=device)
+    plot_comparative_results(test_gp_vs_kmeans_data, test_original_labels)
+
+    # Visualization of results
+    plot_training_performance(results['train_loss'], results['validation_metrics'])
+    plot_results(results['test_metrics'])
+
+    # Print final test metrics
+    print("Final Test Metrics:", results['test_metrics'])
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/BML_project/utils_gp/__pycache__/data_loader.cpython-311.pyc b/BML_project/utils_gp/__pycache__/data_loader.cpython-311.pyc
new file mode 100644
index 0000000..0b1a7eb
Binary files /dev/null and b/BML_project/utils_gp/__pycache__/data_loader.cpython-311.pyc differ
diff --git a/BML_project/utils_gp/__pycache__/ss_evaluation.cpython-311.pyc b/BML_project/utils_gp/__pycache__/ss_evaluation.cpython-311.pyc
new file mode 100644
index 0000000..14c7dfa
Binary files /dev/null and b/BML_project/utils_gp/__pycache__/ss_evaluation.cpython-311.pyc differ
diff --git a/BML_project/utils_gp/__pycache__/visualization.cpython-311.pyc b/BML_project/utils_gp/__pycache__/visualization.cpython-311.pyc
new file mode 100644
index 0000000..a8c3afd
Binary files /dev/null and b/BML_project/utils_gp/__pycache__/visualization.cpython-311.pyc differ
diff --git a/BML_project/utils_gp/data_loader.py b/BML_project/utils_gp/data_loader.py
new file mode 100644
index 0000000..14ed5fe
--- /dev/null
+++ b/BML_project/utils_gp/data_loader.py
@@ -0,0 +1,297 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Dec 18 18:09:02 2023
+
+@author: lrm22005
+"""
+import os
+import numpy as np
+import pandas as pd
+from PIL import Image
+import torch
+from torch.utils.data import Dataset, DataLoader
+from sklearn.preprocessing import StandardScaler
+from torchvision.transforms import ToTensor
+
+def split_uids():
+    # ====== Load the per subject arrythmia summary ======
+    df_summary = pd.read_csv(r'\\grove.ad.uconn.edu\research\ENGR_Chon\NIH_Pulsewatch_Database\Adjudication_UConn\final_attemp_4_1_Dong_Ohm_summary_20231025.csv')
+    df_summary['UID'] = df_summary['UID'].astype(str).str.zfill(3)
+    
+    df_summary['sample_nonAF'] = df_summary['NSR'] + df_summary['PACPVC'] + df_summary['SVT']
+    df_summary['sample_AF'] = df_summary['AF']
+    
+    df_summary['sample_nonAF_ratio'] = df_summary['sample_nonAF'] / (df_summary['sample_AF'] + df_summary['sample_nonAF'])
+    
+    all_UIDs = df_summary['UID'].unique()
+    # ====================================================
+    # ====== AF trial separation ======
+    # R:\ENGR_Chon\Dong\Numbers\Pulsewatch_numbers\Fahimeh_CNNED_general_ExpertSystemwApplication\tbl_file_name\TrainingSet_final_segments
+    AF_trial_Fahimeh_train = ['402','410']
+    AF_trial_Fahimeh_test = ['301', '302', '305', '306', '307', '310', '311', 
+                             '312', '318', '319', '320', '321', '322', '324', 
+                             '325', '327', '329', '400', '406', '407', '409',
+                             '414']
+    AF_trial_Fahimeh_did_not_use = ['405', '413', '415', '416', '420', '421', '422', '423']
+    AF_trial_paroxysmal_AF = ['408','419']
+    
+    AF_trial_train = AF_trial_Fahimeh_train
+    AF_trial_test = AF_trial_Fahimeh_test
+    AF_trial_unlabeled = AF_trial_Fahimeh_did_not_use + AF_trial_paroxysmal_AF
+    print(f'AF trial: {len(AF_trial_train)} training subjects {AF_trial_train}')
+    print(f'AF trial: {len(AF_trial_test)} testing subjects {AF_trial_test}')
+    print(f'AF trial: {len(AF_trial_unlabeled)} unlabeled subjects {AF_trial_unlabeled}')
+    # =================================
+    # === Clinical trial AF subjects separation ===
+    clinical_trial_AF_subjects = ['005', '017', '026', '051', '075', '082']
+    
+    remaining_UIDs = []
+    count_NSR = []
+    import math
+    for index, row in df_summary.iterrows():
+        UID = row['UID']
+        this_NSR = row['sample_nonAF']
+        if math.isnan(this_NSR):
+            # There is no segment in this subject, skip this UID.
+            print(f'---------UID {UID} has no segments.------------')
+            continue
+        if UID not in AF_trial_train and UID not in AF_trial_test and UID not in clinical_trial_AF_subjects \
+            and not UID[0] == '3' and not UID[0] == '4':
+            remaining_UIDs.append(UID)
+            count_NSR.append(this_NSR)
+    
+    from numpy import random
+    random.seed(seed=42)
+    from numpy.random import choice
+    list_of_candidates = remaining_UIDs
+    number_of_items_to_pick = round(len(list_of_candidates) * 0.15) # 10% labeled for training, 5% for testing.
+    temp_sum = sum(count_NSR)
+    probability_distribution = [x/temp_sum for x in count_NSR]
+    probability_distribution = [(1-x/temp_sum)/ (len(count_NSR)-1) for x in count_NSR]# Subjects with fewer segments have higher chance to be selected. Make sure the sum is one.
+    draw = choice(list_of_candidates, number_of_items_to_pick,
+                  p=probability_distribution, replace=False)
+    
+    clinical_trial_train = list(draw[:round(len(list_of_candidates) * 0.1)])
+    clinical_trial_test_nonAF = list(draw[round(len(list_of_candidates) * 0.1):])
+    clinical_trial_test_temp = clinical_trial_test_nonAF + clinical_trial_AF_subjects
+    clinical_trial_test = []
+    for UID in clinical_trial_test_temp:
+        # UID 051 and maybe other UIDs had no segments (unknown reason).
+        if UID in all_UIDs:
+            clinical_trial_test.append(UID)
+            
+    clinical_trial_unlabeled = []
+    for UID in all_UIDs:
+        if UID not in clinical_trial_train and UID not in clinical_trial_test and not UID[0] == '3' and not UID[0] == '4':
+            clinical_trial_unlabeled.append(UID)
+    print(f'Clinical trial: selected {len(clinical_trial_train)} UIDs for training {clinical_trial_train}')
+    print(f'Clinical trial: selected {len(clinical_trial_test)} UIDs for testing {clinical_trial_test}')
+    print(f'Clinical trial: selected {len(clinical_trial_unlabeled)} UIDs for unlabeled {clinical_trial_unlabeled}')
+    
+    clinical_trial_train = [clinical_trial_train[0]]
+    clinical_trial_test = [clinical_trial_test[0]]
+    clinical_trial_unlabeled = clinical_trial_unlabeled[0:4]
+    
+    return clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled
+
+class CustomDataset(Dataset):
+    def __init__(self, data_path, labels_path, UIDs, standardize=True, data_format='csv', read_all_labels=False):
+        self.data_path = data_path
+        self.labels_path = labels_path
+        self.UIDs = UIDs
+        self.standardize = standardize
+        self.data_format = data_format
+        self.read_all_labels = read_all_labels
+        self.transforms = ToTensor()
+        self.refresh_dataset()
+
+    def refresh_dataset(self):
+        # Extract unique segment names and their corresponding labels
+        self.segment_names, self.labels = self.extract_segment_names_and_labels()
+
+    def add_uids(self, new_uids):
+        # Ensure new UIDs are unique and not already in the dataset
+        unique_new_uids = [uid for uid in new_uids if uid not in self.UIDs]
+
+        # Add unique new UIDs and refresh the dataset
+        self.UIDs.extend(unique_new_uids)
+        self.refresh_dataset()
+
+    def __len__(self):
+        return len(self.segment_names)
+
+    def __getitem__(self, idx):
+        segment_name = self.segment_names[idx]
+        label = self.labels[segment_name]
+
+        if hasattr(self, 'all_data') and idx < len(self.all_data):
+            # Data is stored in memory
+            time_freq_tensor = self.all_data[idx]
+        else:
+            # Load data on-the-fly based on the segment_name
+            time_freq_tensor = self.load_data(segment_name)
+            
+        return {'data': time_freq_tensor, 'label': label, 'segment_name': segment_name}
+
+    def add_data_label_pair(self, data, label):
+        # Assign a unique ID or name for the new data
+        new_id = len(self.segment_names)
+        segment_name = f"new_data_{new_id}"
+
+        # Append the new data and label
+        self.segment_names.append(segment_name)
+        self.labels[segment_name] = label
+
+        # Append the new data tensor to an attribute that holds all the data
+        if hasattr(self, 'all_data'):
+            self.all_data.append(data)
+        else:
+            self.all_data = [data]
+            
+    def extract_segment_names_and_labels(self):
+        segment_names = []
+        labels = {}
+
+        for UID in self.UIDs:
+            label_file = os.path.join(self.labels_path, UID + "_final_attemp_4_1_Dong.csv")
+            if os.path.exists(label_file):
+                label_data = pd.read_csv(label_file, sep=',', header=0, names=['segment', 'label'])
+                label_segment_names = label_data['segment'].apply(lambda x: x.split('.')[0])
+                for idx, segment_name in enumerate(label_segment_names):
+                    label_val = label_data['label'].values[idx]
+                    if self.read_all_labels:
+                        # Assign -1 if label is not in [0, 1, 2, 3]
+                        labels[segment_name] = label_val if label_val in [0, 1, 2, 3] else -1
+                        if segment_name not in segment_names:
+                            segment_names.append(segment_name)
+                    else:
+                        # Only add segments with labels in [0, 1, 2, 3]
+                        if label_val in [0, 1, 2, 3] and segment_name not in segment_names:
+                            segment_names.append(segment_name)
+                            labels[segment_name] = label_val
+
+        return segment_names, labels
+
+    def load_data(self, segment_name):
+        data_path_UID = os.path.join(self.data_path, segment_name.split('_')[0])
+        seg_path = os.path.join(data_path_UID, segment_name + '_filt_STFT.' + self.data_format)
+
+        try:
+            if self.data_format == 'csv' and seg_path.endswith('.csv'):
+                time_freq_plot = np.array(pd.read_csv(seg_path, header=None))
+                time_freq_tensor = torch.Tensor(time_freq_plot).reshape(1, 128, 128)
+            elif self.data_format == 'png' and seg_path.endswith('.png'):
+                img = Image.open(seg_path)
+                img_data = np.array(img)
+                time_freq_tensor = torch.Tensor(img_data).unsqueeze(0)
+            elif self.data_format == 'pt' and seg_path.endswith('.pt'):
+                time_freq_tensor = torch.load(seg_path)
+            else:
+                raise ValueError("Unsupported file format")
+            if self.standardize:
+                time_freq_tensor = self.standard_scaling(time_freq_tensor)  # Standardize the data
+
+            return time_freq_tensor.clone()
+
+        except Exception as e:
+            print(f"Error processing segment: {segment_name}. Exception: {str(e)}")
+            return torch.zeros((1, 128, 128))  # Return zeros in case of an error
+
+    def standard_scaling(self, data):
+        scaler = StandardScaler()
+        data = scaler.fit_transform(data.reshape(-1, data.shape[-1])).reshape(data.shape)
+        return torch.Tensor(data)
+
+def load_data_split_batched(data_path, labels_path, UIDs, batch_size, standardize=False, data_format='csv', read_all_labels=False, drop_last=False, num_workers=4):
+    dataset = CustomDataset(data_path, labels_path, UIDs, standardize, data_format, read_all_labels)
+    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=drop_last, num_workers=num_workers, prefetch_factor=2)
+    return dataloader
+
+def get_data_paths(data_format, is_linux=False, is_hpc=False):
+    if is_linux:
+        base_path = "/mnt/r/ENGR_Chon/Dong/MATLAB_generate_results/NIH_PulseWatch"
+        labels_base_path = "/mnt/r/ENGR_Chon/NIH_Pulsewatch_Database/Adjudication_UConn"
+        saving_base_path = "/mnt/r/ENGR_Chon/Luis/Research/Casseys_case/Project_1_analysis"
+    elif is_hpc:
+        base_path = "/gpfs/scratchfs1/kic14002/doh16101"
+        labels_base_path = "/gpfs/scratchfs1/hfp14002/lrm22005"
+        saving_base_path = "/gpfs/scratchfs1/hfp14002/lrm22005/Casseys_case/Project_1_analysis"
+    else:
+        # R:\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch
+        base_path = "R:\ENGR_Chon\Dong\MATLAB_generate_results\\NIH_PulseWatch"
+        labels_base_path = "R:\ENGR_Chon\\NIH_Pulsewatch_Database\Adjudication_UConn"
+        saving_base_path = r"\\grove.ad.uconn.edu\research\ENGR_Chon\Luis\Research\Casseys_case"
+    if data_format == 'csv':
+        data_path = os.path.join(base_path, "TFS_csv")
+        labels_path = os.path.join(labels_base_path, "final_attemp_4_1_Dong_Ohm")
+        saving_path = os.path.join(saving_base_path, "Project_1_analysis")
+    elif data_format == 'png':
+        data_path = os.path.join(base_path, "TFS_plots")
+        labels_path = os.path.join(labels_base_path, "final_attemp_4_1_Dong_Ohm")
+        saving_path = os.path.join(saving_base_path, "Project_1_analysis")
+    elif data_format == 'pt':
+        data_path = os.path.join(base_path, "PT_format")
+        labels_path = os.path.join(labels_base_path, "final_attemp_4_1_Dong_Ohm")
+        saving_path = os.path.join(saving_base_path, "Project_1_analysis")
+    else:
+        raise ValueError("Invalid data format. Choose 'csv' or 'png.")
+    return data_path, labels_path, saving_path
+
+# Function to extract and preprocess data
+def preprocess_data(data_format, clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled, batch_size, read_all_labels=False):
+    # Extracts paths and loads data into train, validation, and test loaders
+    data_path, labels_path, saving_path = get_data_paths(data_format)
+    train_loader = load_data_split_batched(data_path, labels_path, clinical_trial_train, batch_size, standardize=True, data_format=data_format, read_all_labels=read_all_labels)
+    val_loader = load_data_split_batched(data_path, labels_path, clinical_trial_test, batch_size, standardize=True, data_format=data_format, read_all_labels=read_all_labels)
+    test_loader = load_data_split_batched(data_path, labels_path, clinical_trial_unlabeled, batch_size, standardize=True, data_format=data_format, read_all_labels=read_all_labels)
+    return train_loader, val_loader, test_loader
+
+def map_samples_to_uids(uncertain_sample_indices, dataset):
+    """
+    Maps indices of uncertain samples back to their corresponding segment names or UIDs.
+
+    Args:
+    - uncertain_sample_indices: Indices of the uncertain samples in the dataset.
+    - dataset: The dataset object which contains the mapping of segment names and UIDs.
+
+    Returns:
+    - List of UIDs or segment names corresponding to the uncertain samples.
+    """
+    return [dataset.segment_names[i] for i in uncertain_sample_indices]
+
+def update_train_loader_with_labeled_samples(current_train_loader, labeled_samples, batch_size):
+    """
+    Updates the training DataLoader with newly labeled samples.
+
+    Args:
+    - current_train_loader: The current DataLoader for the training set.
+    - labeled_samples: A list of tuples, each containing a data tensor and its new label.
+    - batch_size: Batch size for the DataLoader.
+
+    Returns:
+    - DataLoader: The updated DataLoader with the new labeled samples.
+    """
+    
+    # Extract the current dataset from the DataLoader
+    current_dataset = current_train_loader.dataset
+    
+    # Update the dataset with new samples and labels
+    for data_tensor, label in labeled_samples:
+        # Assuming the CustomDataset class has a method to add new data and labels
+        current_dataset.add_data_label_pair(data_tensor, label)
+
+    # Create a new DataLoader with the updated dataset
+    updated_train_loader = DataLoader(current_dataset, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=4, prefetch_factor=2)
+
+    return updated_train_loader
+
+def update_train_loader_with_uncertain_samples(current_train_loader, new_sample_indices, batch_size, standardize=False, data_format='csv', read_all_labels=True):
+    # Extract current UIDs from the current_train_loader
+    current_dataset = current_train_loader.dataset
+    # Map new_samples back to their corresponding segment names or UIDs
+    new_uids = map_samples_to_uids(new_sample_indices, current_dataset)
+    # Add new UIDs to the current dataset and refresh it
+    current_dataset.add_uids(new_uids)
+    # Create new DataLoader with the updated dataset
+    updated_train_loader = DataLoader(current_dataset, batch_size=batch_size, shuffle=False)
+    return updated_train_loader
\ No newline at end of file
diff --git a/BML_project/utils_gp/ss_evaluation.py b/BML_project/utils_gp/ss_evaluation.py
new file mode 100644
index 0000000..5a01ee4
--- /dev/null
+++ b/BML_project/utils_gp/ss_evaluation.py
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Dec 18 18:17:40 2023
+
+@author: lrm22005
+"""
+import numpy as np
+import torch
+import gpytorch
+from sklearn.preprocessing import label_binarize
+from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
+from sklearn.metrics import precision_recall_fscore_support
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+def stochastic_evaluation(model, likelihood, data_loader, device, n_classes, n_batches):
+    model.eval()
+    likelihood.eval()
+    random_indices = torch.randperm(len(data_loader.dataset))[:n_batches * data_loader.batch_size]
+    
+    all_predicted_labels = []
+    all_actual_labels = []
+
+    with torch.no_grad(), gpytorch.settings.fast_pred_var():
+        for idx in random_indices:
+            batch = data_loader.dataset[idx]
+            test_data = batch['data'].view(-1).unsqueeze(0).to(device)
+            test_label = torch.tensor([batch['label']]).to(device)
+            
+            predictions = likelihood(model(test_data)).mean
+            predicted_label = predictions.argmax(dim=-1).item()
+
+            all_predicted_labels.append(predicted_label)
+            all_actual_labels.append(test_label.item())
+
+    # Compute metrics
+    precision, recall, f1, _ = precision_recall_fscore_support(all_actual_labels, all_predicted_labels, average='macro')
+    # auc_roc = roc_auc_score(label_binarize(all_actual_labels, classes=np.arange(n_classes)), 
+    #                         label_binarize(all_predicted_labels, classes=np.arange(n_classes)), 
+    #                         multi_class='ovr')
+
+    return {
+        'precision': precision,
+        'recall': recall,
+        'f1': f1,
+        # 'auc_roc': auc_roc
+    }
+
+def evaluate_model_on_all_data(model, likelihood, data_loader, device, n_classes):
+    model.eval()
+    likelihood.eval()
+
+    all_predicted_labels = []
+    all_test_labels = []
+    
+    with torch.no_grad(), gpytorch.settings.fast_pred_var():
+        for i, batch in enumerate(data_loader):
+            test_data = batch['data'].view(batch['data'].size(0), -1).to(device)
+            test_labels = batch['label'].to(device)
+            # print(f"Test data shape before t-SNE: {test_data.shape}")
+          
+            predictions = likelihood(model(test_data)).mean
+            # Debugging - check shape of predictions
+            # print(f"Predictions shape: {predictions.shape}")
+            predicted_labels = predictions.argmax(dim=0)
+
+            # Add debugging information
+            # print(f"Batch {i}: Predicted Labels Shape: {predicted_labels.shape}, Actual Labels Shape: {test_labels.shape}")
+
+            all_predicted_labels.append(predicted_labels.cpu().numpy())
+            all_test_labels.append(test_labels.numpy())
+
+            # Debug the accumulation of labels
+            # current_predicted = np.concatenate(all_predicted_labels, axis=0)
+            # current_actual = np.concatenate(all_test_labels, axis=0)
+            # print(f"After Batch {i}: Accumulated Predicted Labels: {current_predicted.shape[0]}, Accumulated Actual Labels: {current_actual.shape[0]}")
+
+    # Concatenate all batch results
+    all_predicted_labels = np.concatenate(all_predicted_labels, axis=0)
+    all_test_labels = np.concatenate(all_test_labels, axis=0)
+
+    # Final check
+    # print(f"Final: Total Predicted Labels: {all_predicted_labels.shape[0]}, Total Actual Labels: {all_test_labels.shape[0]}")
+
+    # Verify if the shapes match before proceeding to calculate metrics
+    if all_predicted_labels.shape[0] != all_test_labels.shape[0]:
+        raise ValueError("Mismatch in the number of samples between predicted and actual labels")
+
+    # Compute overall evaluation metrics
+    precision, recall, f1, _ = precision_recall_fscore_support(all_test_labels, all_predicted_labels, average='macro')
+    # For AUC-ROC, you need the predicted probabilities and true labels in a one-hot encoded format
+    test_labels_one_hot = label_binarize(all_test_labels, classes=np.arange(n_classes))
+    auc_roc = roc_auc_score(test_labels_one_hot, predictions.softmax(dim=-1).cpu().numpy(), multi_class='ovr')
+    return {
+        'precision': precision,
+        'recall': recall,
+        'f1': f1,
+        'auc_roc': auc_roc
+    }
+
+def threshold_based_labeling(kmeans_model, gp_model, gp_likelihood, data_loader, agreement_threshold=0.5):
+    gp_model.eval()
+    gp_likelihood.eval()
+    new_labels = []
+
+    with torch.no_grad():
+        for batch in data_loader:
+            data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device)
+            kmeans_predictions = kmeans_model.predict(data_tensor.cpu().numpy())
+            gp_predictions = gp_likelihood(gp_model(data_tensor))
+            
+            # Measure agreement between K-means and GP model
+            agreement = (gp_predictions.mean.argmax(dim=-1).cpu().numpy() == kmeans_predictions).astype(int)
+            
+            # Assign labels based on agreement threshold
+            for i, agree in enumerate(agreement):
+                if agree > agreement_threshold:
+                    new_labels.append(gp_predictions.mean.argmax(dim=-1)[i].item())
+                else:
+                    new_labels.append(kmeans_predictions[i])
+
+    return new_labels
+
+def resolve_conflicts(semi_supervised_samples, threshold_based_samples):
+    """
+    Resolves conflicts between two sets of labeled samples.
+
+    Args:
+    - semi_supervised_samples: Labeled samples from the semi_supervised_labeling method.
+    - threshold_based_samples: Labeled samples from the threshold_based_labeling method.
+
+    Returns:
+    - resolved_samples: The resolved set of labeled samples.
+    """
+    resolved_samples = []
+
+    # Create dictionaries for quick lookup
+    semi_supervised_dict = {segment_name: label for segment_name, label in semi_supervised_samples}
+    threshold_based_dict = {segment_name: label for segment_name, label in threshold_based_samples}
+
+    # Combine all unique segment names
+    all_segments = set(semi_supervised_dict.keys()).union(set(threshold_based_dict.keys()))
+
+    for segment_name in all_segments:
+        if segment_name in semi_supervised_dict and segment_name in threshold_based_dict:
+            # If there's a conflict, resolve it here. For simplicity, we're taking the label from semi_supervised
+            # You can implement other strategies like majority vote, confidence weighting, or agreement only
+            resolved_samples.append((segment_name, semi_supervised_dict[segment_name]))
+        elif segment_name in semi_supervised_dict:
+            resolved_samples.append((segment_name, semi_supervised_dict[segment_name]))
+        elif segment_name in threshold_based_dict:
+            resolved_samples.append((segment_name, threshold_based_dict[segment_name]))
+
+    return resolved_samples
+
+def parse_classification_report(report):
+    """Parse a classification report into a dictionary of metrics."""
+    lines = report.split('\n')
+    main_metrics = lines[-2].split()
+    
+    # Assuming the last line is like "accuracy: x    macro avg    y1  y2  y3  y4"
+    return {
+        'precision': float(main_metrics[3]),
+        'recall': float(main_metrics[4]),
+        'f1': float(main_metrics[5]),
+        'auc_roc': None  # AUC-ROC is not part of the classification report by default
+    }
diff --git a/BML_project/utils_gp/visualization.py b/BML_project/utils_gp/visualization.py
new file mode 100644
index 0000000..3ecf59b
--- /dev/null
+++ b/BML_project/utils_gp/visualization.py
@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Dec 18 18:20:55 2023
+
+@author: lrm22005
+"""
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.metrics import confusion_matrix
+
+def plot_training_performance(train_loss, validation_metrics):
+    epochs = range(1, len(train_loss) + 1)
+    
+    # Plot training loss
+    plt.figure(figsize=(14, 6))
+    plt.subplot(1, 2, 1)
+    plt.plot(epochs, train_loss, 'b-', label='Training Loss')
+    plt.title('Training Loss')
+    plt.xlabel('Epochs')
+    plt.ylabel('Loss')
+    plt.legend()
+
+    # Plot validation metrics
+    plt.subplot(1, 2, 2)
+    plt.plot(epochs, validation_metrics['precision'], 'r-', label='Precision')
+    plt.plot(epochs, validation_metrics['recall'], 'g-', label='Recall')
+    plt.plot(epochs, validation_metrics['f1'], 'b-', label='F1 Score')
+    plt.plot(epochs, validation_metrics['auc_roc'], 'y-', label='AUC-ROC')
+    plt.title('Validation Metrics')
+    plt.xlabel('Epochs')
+    plt.ylabel('Metrics')
+    plt.legend()
+    
+    plt.tight_layout()
+    plt.show()
+
+def plot_results(results):
+    plt.figure(figsize=(12, 5))
+    plt.subplot(1, 2, 1)
+    plt.plot(results['train_loss'], label='Train Loss')
+    plt.title('Training Loss Over Time')
+    plt.legend()
+
+    plt.subplot(1, 2, 2)
+    for metric in ['precision', 'recall', 'f1']:
+        plt.plot(results['validation_metrics'][metric], label=metric.title())
+    plt.title('Validation Metrics Over Time')
+    plt.legend()
+    plt.show()
+    
+    test_metrics = results['test_metrics']
+    print("Test Metrics:")
+    print(f"Precision: {test_metrics['precision']}")
+    print(f"Recall: {test_metrics['recall']}")
+    print(f"F1 Score: {test_metrics['f1']}")
+    print(f"AUC-ROC: {test_metrics['auc_roc']}")
+
+def plot_comparative_results(gp_vs_kmeans_data, original_labels):
+    fig, axes = plt.subplots(1, 2, figsize=(14, 7))
+
+    # Plot 1: Confusion Matrix for GP Predictions vs Original Labels
+    gp_predictions = [pair[0] for pair in gp_vs_kmeans_data]
+    gp_predictions = np.concatenate(gp_predictions)
+    cm_gp = confusion_matrix(original_labels, gp_predictions)
+    sns.heatmap(cm_gp, annot=True, ax=axes[0], fmt='g')
+    axes[0].set_title('GP Model Predictions vs Original Labels')
+    axes[0].set_xlabel('Predicted Labels')
+    axes[0].set_ylabel('True Labels')
+
+    # Plot 2: Confusion Matrix for K-Means Predictions vs Original Labels
+    kmeans_predictions = [pair[1] for pair in gp_vs_kmeans_data]
+    kmeans_predictions = np.concatenate(kmeans_predictions)
+    cm_kmeans = confusion_matrix(original_labels, kmeans_predictions)
+    sns.heatmap(cm_kmeans, annot=True, ax=axes[1], fmt='g')
+    axes[1].set_title('K-Means Predictions vs Original Labels')
+    axes[1].set_xlabel('Predicted Labels')
+    axes[1].set_ylabel('True Labels')
+
+    plt.tight_layout()
+    plt.show()
diff --git a/pytorch_file_generation_loader_update.py b/pytorch_file_generation_loader_update.py
new file mode 100644
index 0000000..cea6242
--- /dev/null
+++ b/pytorch_file_generation_loader_update.py
@@ -0,0 +1,374 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Dec 18 17:11:29 2023
+
+@author: lrm22005
+"""
+
+# import os
+# import pandas as pd
+# import numpy as np
+# import torch
+# from PIL import Image
+# import torch
+
+#### From first to the last
+# def preprocess_and_save_data(data_path, output_path):
+#     # Make sure the output directory exists
+#     if not os.path.exists(output_path):
+#         os.makedirs(output_path)
+
+#     # Traverse the directories for each UID
+#     for uid in os.listdir(data_path):
+#         uid_path = os.path.join(data_path, uid)
+#         if os.path.isdir(uid_path):
+#             # Make a corresponding directory in the output path
+#             uid_output_path = os.path.join(output_path, uid)
+#             if not os.path.exists(uid_output_path):
+#                 os.makedirs(uid_output_path)
+            
+#             # Process each file within the UID directory
+#             for file in os.listdir(uid_path):
+#                 if file.endswith('.csv') or file.endswith('.png'):
+#                     file_path = os.path.join(uid_path, file)
+#                     if file.endswith('.csv'):
+#                         data = pd.read_csv(file_path).values
+#                     else:  # if file.endswith('.png'):
+#                         data = np.array(Image.open(file_path))
+
+#                     data_tensor = torch.tensor(data, dtype=torch.float32)
+#                     output_file_path = os.path.join(uid_output_path, file.replace('.csv', '.pt').replace('.png', '.pt'))
+#                     torch.save(data_tensor, output_file_path)
+
+# # Define your input and output paths
+# input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv'
+# output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format'
+
+# # Run the preprocessing function
+# preprocess_and_save_data(input_path, output_path)
+
+#### From the last to the first
+# def preprocess_and_save_data(data_path, output_path):
+#     # Make sure the output directory exists
+#     if not os.path.exists(output_path):
+#         os.makedirs(output_path)
+
+#     # Traverse the directories for each UID
+#     # Get the list of directories and sort them in descending order
+#     uids = sorted(os.listdir(data_path), reverse=True)
+#     for uid in uids:
+#         uid_path = os.path.join(data_path, uid)
+#         if os.path.isdir(uid_path):
+#             # Make a corresponding directory in the output path
+#             uid_output_path = os.path.join(output_path, uid)
+#             if not os.path.exists(uid_output_path):
+#                 os.makedirs(uid_output_path)
+            
+#             # Process each file within the UID directory
+#             for file in os.listdir(uid_path):
+#                 if file.endswith('.csv') or file.endswith('.png'):
+#                     file_path = os.path.join(uid_path, file)
+#                     if file.endswith('.csv'):
+#                         data = pd.read_csv(file_path).values
+#                     else:  # if file.endswith('.png'):
+#                         data = np.array(Image.open(file_path))
+
+#                     data_tensor = torch.tensor(data, dtype=torch.float32)
+#                     output_file_path = os.path.join(uid_output_path, file.replace('.csv', '.pt').replace('.png', '.pt'))
+#                     torch.save(data_tensor, output_file_path)
+
+# # Define your input and output paths
+# input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv'
+# output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format'
+
+# # Run the preprocessing function
+# preprocess_and_save_data(input_path, output_path)
+######################################################################################################################################################
+#### First to last
+import os
+import pandas as pd
+import numpy as np
+from PIL import Image
+import torch
+from concurrent.futures import ThreadPoolExecutor
+
+def preprocess_file(uid_path, file, uid_output_path):
+    file_path = os.path.join(uid_path, file)
+    
+    if file.endswith('.csv'):
+        data = pd.read_csv(file_path).values
+    elif file.endswith('.png'):
+        data = np.array(Image.open(file_path))
+    else:
+        return
+    
+    data_tensor = torch.tensor(data, dtype=torch.float32)
+    base_name, extension = os.path.splitext(file)
+    output_file_path = os.path.join(uid_output_path, f'{base_name}.pt')
+    torch.save(data_tensor, output_file_path)
+
+def preprocess_and_save_data(data_path, output_path):
+    # Make sure the output directory exists
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    # Traverse the directories for each UID
+    for uid in os.listdir(data_path):
+        uid_path = os.path.join(data_path, uid)
+        if os.path.isdir(uid_path):
+            # Make a corresponding directory in the output path
+            uid_output_path = os.path.join(output_path, uid)
+            if not os.path.exists(uid_output_path):
+                os.makedirs(uid_output_path)
+            
+            # Create a ThreadPoolExecutor for parallel processing
+            with ThreadPoolExecutor() as executor:
+                files_to_process = [file for file in os.listdir(uid_path) if file.endswith(('.csv', '.png'))]
+                for file in files_to_process:
+                    executor.submit(preprocess_file, uid_path, file, uid_output_path)
+
+# Define your input and output paths
+input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv'
+output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format'
+
+# Run the preprocessing function
+preprocess_and_save_data(input_path, output_path)
+######################################################################################################################################################
+#### Last to first
+import os
+import pandas as pd
+import numpy as np
+import torch
+from PIL import Image
+from concurrent.futures import ThreadPoolExecutor
+
+def process_file(uid_path, file, uid_output_path):
+    if file.endswith('.csv') or file.endswith('.png'):
+        file_path = os.path.join(uid_path, file)
+        if file.endswith('.csv'):
+            data = pd.read_csv(file_path).values
+        else:  # if file.endswith('.png'):
+            data = np.array(Image.open(file_path))
+
+        data_tensor = torch.tensor(data, dtype=torch.float32)
+        output_file_path = os.path.join(uid_output_path, file.replace('.csv', '.pt').replace('.png', '.pt'))
+        torch.save(data_tensor, output_file_path)
+
+def preprocess_and_save_data(data_path, output_path):
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    uids = sorted(os.listdir(data_path), reverse=True)
+    for uid in uids:
+        uid_path = os.path.join(data_path, uid)
+        if os.path.isdir(uid_path):
+            uid_output_path = os.path.join(output_path, uid)
+            if not os.path.exists(uid_output_path):
+                os.makedirs(uid_output_path)
+
+            # Use a ThreadPoolExecutor to process files in parallel
+            with ThreadPoolExecutor() as executor:
+                # Create a list of tasks for the executor
+                tasks = [executor.submit(process_file, uid_path, file, uid_output_path) for file in os.listdir(uid_path)]
+                # Wait for all tasks to complete
+                for task in tasks:
+                    task.result()
+
+# Define your input and output paths
+input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv'
+output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format'
+
+# Run the preprocessing function
+preprocess_and_save_data(input_path, output_path)
+######################################################################################################################################################
+######################################################################################################################################################
+######################################################################################################################################################
+
+from torch.utils.data import Dataset, DataLoader
+from sklearn.preprocessing import StandardScaler
+
+class CustomDataset(Dataset):
+    def __init__(self, data_path, labels_path, UIDs, standardize=True, data_format='csv', read_all_labels=False):
+        self.data_path = data_path
+        self.labels_path = labels_path
+        self.UIDs = UIDs
+        self.standardize = standardize
+        self.data_format = data_format
+        self.read_all_labels = read_all_labels
+        self.refresh_dataset()
+
+    def refresh_dataset(self):
+        self.segment_names, self.labels = self.extract_segment_names_and_labels()
+
+    def add_uids(self, new_uids):
+        unique_new_uids = [uid for uid in new_uids if uid not in self.UIDs]
+        self.UIDs.extend(unique_new_uids)
+        self.refresh_dataset()
+
+    def __len__(self):
+        return len(self.segment_names)
+
+    def __getitem__(self, idx):
+        segment_name = self.segment_names[idx]
+        label = self.labels[segment_name]
+        time_freq_tensor = self.load_data(segment_name)
+        return {'data': time_freq_tensor, 'label': label, 'segment_name': segment_name}
+
+    def extract_segment_names_and_labels(self):
+        segment_names = []
+        labels = {}
+
+        for UID in self.UIDs:
+            label_file = os.path.join(self.labels_path, UID + "_final_attemp_4_1_Dong.csv")
+            if os.path.exists(label_file):
+                label_data = pd.read_csv(label_file, sep=',', header=0, names=['segment', 'label'])
+                label_segment_names = label_data['segment'].apply(lambda x: x.split('.')[0])
+                for idx, segment_name in enumerate(label_segment_names):
+                    label_val = label_data['label'].values[idx]
+                    if self.read_all_labels:
+                        # Assign -1 if label is not in [0, 1, 2, 3]
+                        labels[segment_name] = label_val if label_val in [0, 1, 2, 3] else -1
+                        if segment_name not in segment_names:
+                            segment_names.append(segment_name)
+                    else:
+                        # Only add segments with labels in [0, 1, 2, 3]
+                        if label_val in [0, 1, 2, 3] and segment_name not in segment_names:
+                            segment_names.append(segment_name)
+                            labels[segment_name] = label_val
+
+        return segment_names, labels
+
+    def load_data(self, segment_name):
+        data_path_UID = os.path.join(self.data_path, segment_name.split('_')[0])
+        seg_path = os.path.join(data_path_UID, segment_name + '_filt_STFT.csv')
+
+        try:
+            if self.data_format == 'csv' and seg_path.endswith('.csv'):
+                time_freq_plot = np.array(pd.read_csv(seg_path, header=None))
+                time_freq_tensor = torch.Tensor(time_freq_plot).reshape(1, 128, 128)
+            elif self.data_format == 'png' and seg_path.endswith('.png'):
+                img = Image.open(seg_path)
+                img_data = np.array(img)
+                time_freq_tensor = torch.Tensor(img_data).unsqueeze(0)
+            elif self.dta_format == 'pt' and seg_path.endswith('pt'):
+                time_freq_tensor = torch.load(seg_path)
+            else:
+                raise ValueError("Unsupported file format")
+            if self.standardize:
+                time_freq_tensor = self.standard_scaling(time_freq_tensor)  # Standardize the data
+
+            return time_freq_tensor.clone()
+
+        except Exception as e:
+            print(f"Error processing segment: {segment_name}. Exception: {str(e)}")
+            return torch.zeros((1, 128, 128))  # Return zeros in case of an error
+
+    def standard_scaling(self, data):
+        scaler = StandardScaler()
+        data = scaler.fit_transform(data.reshape(-1, data.shape[-1])).reshape(data.shape)
+        return torch.Tensor(data)
+
+def load_data_split_batched(data_path, labels_path, UIDs, batch_size, standardize=False, data_format='csv', read_all_labels=True, drop_last=False, num_workers=4):
+    dataset = CustomDataset(data_path, labels_path, UIDs, standardize, data_format, read_all_labels)
+    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=drop_last, num_workers=num_workers, prefetch_factor=2)
+    return dataloader
+
+
+
+
+
+
+import os
+import pandas as pd
+import numpy as np
+from PIL import Image
+import torch
+from concurrent.futures import ThreadPoolExecutor
+
+def preprocess_file(uid_path, file, uid_output_path):
+    file_path = os.path.join(uid_path, file)
+    
+    if file.endswith('.csv'):
+        # Ensure that the CSV file is read without an index or header
+        data = pd.read_csv(file_path, header=None).values
+        # Check the shape of the data and log if it's not 128x128
+        if data.shape != (128, 128):
+            print(f"Warning: File {file_path} has shape {data.shape} instead of 128x128.")
+    elif file.endswith('.png'):
+        data = np.array(Image.open(file_path))
+        # Check the shape of the image and log if it's not 128x128
+        if data.shape != (128, 128):
+            print(f"Warning: Image {file_path} has shape {data.shape} instead of 128x128.")
+    else:
+        return  # Skip files that are not CSV or PNG
+    
+    # Convert data to a 128x128 tensor
+    data_tensor = torch.tensor(data, dtype=torch.float32).view(128, 128)
+    base_name, extension = os.path.splitext(file)
+    output_file_path = os.path.join(uid_output_path, f'{base_name}.pt')
+    torch.save(data_tensor, output_file_path)
+
+def preprocess_and_save_data(data_path, output_path):
+    # Make sure the output directory exists
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    # Traverse the directories for each UID
+    for uid in os.listdir(data_path):
+        uid_path = os.path.join(data_path, uid)
+        if os.path.isdir(uid_path):
+            # Make a corresponding directory in the output path
+            uid_output_path = os.path.join(output_path, uid)
+            if not os.path.exists(uid_output_path):
+                os.makedirs(uid_output_path)
+            
+            # Create a ThreadPoolExecutor for parallel processing
+            with ThreadPoolExecutor() as executor:
+                files_to_process = [file for file in os.listdir(uid_path) if file.endswith(('.csv', '.png'))]
+                for file in files_to_process:
+                    executor.submit(preprocess_file, uid_path, file, uid_output_path)
+
+# Define your input and output paths
+input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv'
+output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format'
+
+# Run the preprocessing function
+preprocess_and_save_data(input_path, output_path)
+
+
+
+def preprocess_and_save_data(data_path, output_path):
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+    all_uids = [uid for uid in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, uid))]
+    for uid in reversed(all_uids):  # Reverse the list of directories
+        uid_path = os.path.join(data_path, uid)
+        uid_output_path = os.path.join(output_path, uid)
+        if not os.path.exists(uid_output_path):
+            os.makedirs(uid_output_path)
+        with ThreadPoolExecutor() as executor:
+            files_to_process = [file for file in os.listdir(uid_path) if file.endswith(('.csv', '.png'))]
+            for file in files_to_process:
+                executor.submit(preprocess_file, uid_path, file, uid_output_path)
+
+def preprocess_file(uid_path, file, uid_output_path):
+    file_path = os.path.join(uid_path, file)
+    if file.endswith('.csv'):
+        data = pd.read_csv(file_path, header=None).values
+        if data.shape != (128, 128):
+            print(f"Warning: File {file_path} has shape {data.shape} instead of 128x128.")
+    elif file.endswith('.png'):
+        data = np.array(Image.open(file_path))
+        if data.shape != (128, 128):
+            print(f"Warning: Image {file_path} has shape {data.shape} instead of 128x128.")
+    else:
+        return
+    data_tensor = torch.tensor(data, dtype=torch.float32).view(128, 128)
+    base_name, extension = os.path.splitext(file)
+    output_file_path = os.path.join(uid_output_path, f'{base_name}.pt')
+    torch.save(data_tensor, output_file_path)
+
+# Top-level script execution:
+input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv'
+output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format'
+preprocess_and_save_data(input_path, output_path)
diff --git a/semisupervised_method.py b/semisupervised_method.py
index 547ffcd..2608cd4 100644
--- a/semisupervised_method.py
+++ b/semisupervised_method.py
@@ -64,18 +64,13 @@ def __init__(self, data_path, labels_path, UIDs, standardize=True, data_format='
         self.standardize = standardize
         self.data_format = data_format
         self.read_all_labels = read_all_labels
-        self.transforms = ToTensor()
         self.refresh_dataset()
 
     def refresh_dataset(self):
-        # Extract unique segment names and their corresponding labels
         self.segment_names, self.labels = self.extract_segment_names_and_labels()
 
     def add_uids(self, new_uids):
-        # Ensure new UIDs are unique and not already in the dataset
         unique_new_uids = [uid for uid in new_uids if uid not in self.UIDs]
-
-        # Add unique new UIDs and refresh the dataset
         self.UIDs.extend(unique_new_uids)
         self.refresh_dataset()
 
@@ -85,10 +80,7 @@ def __len__(self):
     def __getitem__(self, idx):
         segment_name = self.segment_names[idx]
         label = self.labels[segment_name]
-
-        # Load data on-the-fly based on the segment_name
         time_freq_tensor = self.load_data(segment_name)
-
         return {'data': time_freq_tensor, 'label': label, 'segment_name': segment_name}
 
     def extract_segment_names_and_labels(self):
@@ -117,36 +109,24 @@ def extract_segment_names_and_labels(self):
 
     def load_data(self, segment_name):
         data_path_UID = os.path.join(self.data_path, segment_name.split('_')[0])
-        seg_path = os.path.join(data_path_UID, segment_name + '_filt_STFT.csv')
-
+        seg_path = os.path.join(data_path_UID, segment_name + '_filt_STFT.pt')
         try:
-            if self.data_format == 'csv' and seg_path.endswith('.csv'):
-                time_freq_plot = np.array(pd.read_csv(seg_path, header=None))
-                time_freq_tensor = torch.Tensor(time_freq_plot).reshape(1, 128, 128)
-            elif self.data_format == 'png' and seg_path.endswith('.png'):
-                img = Image.open(seg_path)
-                img_data = np.array(img)
-                time_freq_tensor = torch.Tensor(img_data).unsqueeze(0)
-            else:
-                raise ValueError("Unsupported file format")
-
+            time_freq_tensor = torch.load(seg_path)
             if self.standardize:
-                time_freq_tensor = self.standard_scaling(time_freq_tensor)  # Standardize the data
-
+                time_freq_tensor = self.standard_scaling(time_freq_tensor)
             return time_freq_tensor.clone()
-
         except Exception as e:
             print(f"Error processing segment: {segment_name}. Exception: {str(e)}")
-            return torch.zeros((1, 128, 128))  # Return zeros in case of an error
+            return torch.zeros((1, 128, 128))
 
     def standard_scaling(self, data):
         scaler = StandardScaler()
         data = scaler.fit_transform(data.reshape(-1, data.shape[-1])).reshape(data.shape)
         return torch.Tensor(data)
 
-def load_data_split_batched(data_path, labels_path, UIDs, batch_size, standardize=False, data_format='csv', read_all_labels=True, drop_last=False, num_workers=4):
-    dataset = CustomDataset(data_path, labels_path, UIDs, standardize, data_format, read_all_labels)
-    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=drop_last, num_workers=num_workers)
+def load_data_split_batched(data_path, labels_path, UIDs, batch_size, standardize=False, read_all_labels=True, drop_last=False, num_workers=4):
+    dataset = CustomDataset(data_path, labels_path, UIDs, standardize, read_all_labels)
+    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=drop_last, num_workers=num_workers, prefetch_factor=2)
     return dataloader
 
 # To validate the len of the dataloader