semisupervised_method.py

# -*- coding: utf-8 -*-
"""
Created on Mon Dec 18 13:49:26 2023

@author: lrm22005
"""
import os
import torch
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, Dataset, TensorDataset
from torchvision.transforms import ToTensor
from sklearn.preprocessing import StandardScaler
from PIL import Image

import logging
import numpy as np
import pandas as pd
import torch
from torch.distributions import MultivariateNormal
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA, IncrementalPCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.cluster import MiniBatchKMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score, adjusted_rand_score, adjusted_mutual_info_score, davies_bouldin_score
import seaborn as sns
from PIL import Image  # Import the Image module


def get_data_paths(data_format, is_linux=False, is_hpc=False):
    if is_linux:
        base_path = "/mnt/r/ENGR_Chon/Dong/MATLAB_generate_results/NIH_PulseWatch"
        labels_base_path = "/mnt/r/ENGR_Chon/NIH_Pulsewatch_Database/Adjudication_UConn"
        saving_base_path = "/mnt/r/ENGR_Chon/Luis/Research/Casseys_case/Project_1_analysis"
    elif is_hpc:
        base_path = "/gpfs/scratchfs1/kic14002/doh16101"
        labels_base_path = "/gpfs/scratchfs1/hfp14002/lrm22005"
        saving_base_path = "/gpfs/scratchfs1/hfp14002/lrm22005/Casseys_case/Project_1_analysis"
    else:
        # R:\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch
        base_path = "R:\ENGR_Chon\Dong\MATLAB_generate_results\\NIH_PulseWatch"
        labels_base_path = "R:\ENGR_Chon\\NIH_Pulsewatch_Database\Adjudication_UConn"
        saving_base_path = r"\\grove.ad.uconn.edu\research\ENGR_Chon\Luis\Research\Casseys_case"
    if data_format == 'csv':
        data_path = os.path.join(base_path, "TFS_csv")
        labels_path = os.path.join(labels_base_path, "final_attemp_4_1_Dong_Ohm")
        saving_path = os.path.join(saving_base_path, "Project_1_analysis")
    elif data_format == 'png':
        data_path = os.path.join(base_path, "TFS_plots")
        labels_path = os.path.join(labels_base_path, "final_attemp_4_1_Dong_Ohm")
        saving_path = os.path.join(saving_base_path, "Project_1_analysis")
    else:
        raise ValueError("Invalid data format. Choose 'csv' or 'png.")
    return data_path, labels_path, saving_path

class CustomDataset(Dataset):
    def __init__(self, data_path, labels_path, UIDs, standardize=True, data_format='csv', read_all_labels=False):
        self.data_path = data_path
        self.labels_path = labels_path
        self.UIDs = UIDs
        self.standardize = standardize
        self.data_format = data_format
        self.read_all_labels = read_all_labels
        self.refresh_dataset()

    def refresh_dataset(self):
        self.segment_names, self.labels = self.extract_segment_names_and_labels()

    def add_uids(self, new_uids):
        unique_new_uids = [uid for uid in new_uids if uid not in self.UIDs]
        self.UIDs.extend(unique_new_uids)
        self.refresh_dataset()

    def __len__(self):
        return len(self.segment_names)

    def __getitem__(self, idx):
        segment_name = self.segment_names[idx]
        label = self.labels[segment_name]
        time_freq_tensor = self.load_data(segment_name)
        return {'data': time_freq_tensor, 'label': label, 'segment_name': segment_name}

    def extract_segment_names_and_labels(self):
        segment_names = []
        labels = {}

        for UID in self.UIDs:
            label_file = os.path.join(self.labels_path, UID + "_final_attemp_4_1_Dong.csv")
            if os.path.exists(label_file):
                label_data = pd.read_csv(label_file, sep=',', header=0, names=['segment', 'label'])
                label_segment_names = label_data['segment'].apply(lambda x: x.split('.')[0])
                for idx, segment_name in enumerate(label_segment_names):
                    label_val = label_data['label'].values[idx]
                    if self.read_all_labels:
                        # Assign -1 if label is not in [0, 1, 2, 3]
                        labels[segment_name] = label_val if label_val in [0, 1, 2, 3] else -1
                        if segment_name not in segment_names:
                            segment_names.append(segment_name)
                    else:
                        # Only add segments with labels in [0, 1, 2, 3]
                        if label_val in [0, 1, 2, 3] and segment_name not in segment_names:
                            segment_names.append(segment_name)
                            labels[segment_name] = label_val

        return segment_names, labels

    def load_data(self, segment_name):
        data_path_UID = os.path.join(self.data_path, segment_name.split('_')[0])
        seg_path = os.path.join(data_path_UID, segment_name + '_filt_STFT.pt')
        try:
            time_freq_tensor = torch.load(seg_path)
            if self.standardize:
                time_freq_tensor = self.standard_scaling(time_freq_tensor)
            return time_freq_tensor.clone()
        except Exception as e:
            print(f"Error processing segment: {segment_name}. Exception: {str(e)}")
            return torch.zeros((1, 128, 128))

    def standard_scaling(self, data):
        scaler = StandardScaler()
        data = scaler.fit_transform(data.reshape(-1, data.shape[-1])).reshape(data.shape)
        return torch.Tensor(data)

def load_data_split_batched(data_path, labels_path, UIDs, batch_size, standardize=False, read_all_labels=True, drop_last=False, num_workers=4):
    dataset = CustomDataset(data_path, labels_path, UIDs, standardize, read_all_labels)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=drop_last, num_workers=num_workers, prefetch_factor=2)
    return dataloader

# To validate the len of the dataloader
# number of examples
# len(train_loader)
# number of batches
# len(train_loader)

# ====== Load the per subject arrythmia summary ======
df_summary = pd.read_csv(r'\\grove.ad.uconn.edu\research\ENGR_Chon\NIH_Pulsewatch_Database\Adjudication_UConn\final_attemp_4_1_Dong_Ohm_summary_20231025.csv')
df_summary['UID'] = df_summary['UID'].astype(str).str.zfill(3)

df_summary['sample_nonAF'] = df_summary['NSR'] + df_summary['PACPVC'] + df_summary['SVT']
df_summary['sample_AF'] = df_summary['AF']

df_summary['sample_nonAF_ratio'] = df_summary['sample_nonAF'] / (df_summary['sample_AF'] + df_summary['sample_nonAF'])

all_UIDs = df_summary['UID'].unique()
# ====================================================
# ====== AF trial separation ======
# R:\ENGR_Chon\Dong\Numbers\Pulsewatch_numbers\Fahimeh_CNNED_general_ExpertSystemwApplication\tbl_file_name\TrainingSet_final_segments
AF_trial_Fahimeh_train = ['402','410']
AF_trial_Fahimeh_test = ['301', '302', '305', '306', '307', '310', '311',
                         '312', '318', '319', '320', '321', '322', '324',
                         '325', '327', '329', '400', '406', '407', '409',
                         '414']
AF_trial_Fahimeh_did_not_use = ['405', '413', '415', '416', '420', '421', '422', '423']
AF_trial_paroxysmal_AF = ['408','419']

AF_trial_train = AF_trial_Fahimeh_train
AF_trial_test = AF_trial_Fahimeh_test
AF_trial_unlabeled = AF_trial_Fahimeh_did_not_use + AF_trial_paroxysmal_AF
print(f'AF trial: {len(AF_trial_train)} training subjects {AF_trial_train}')
print(f'AF trial: {len(AF_trial_test)} testing subjects {AF_trial_test}')
print(f'AF trial: {len(AF_trial_unlabeled)} unlabeled subjects {AF_trial_unlabeled}')
# =================================
# === Clinical trial AF subjects separation ===
clinical_trial_AF_subjects = ['005', '017', '026', '051', '075', '082']

remaining_UIDs = []
count_NSR = []
import math
for index, row in df_summary.iterrows():
    UID = row['UID']
    this_NSR = row['sample_nonAF']
    if math.isnan(this_NSR):
        # There is no segment in this subject, skip this UID.
        print(f'---------UID {UID} has no segments.------------')
        continue
    if UID not in AF_trial_train and UID not in AF_trial_test and UID not in clinical_trial_AF_subjects \
        and not UID[0] == '3' and not UID[0] == '4':
        remaining_UIDs.append(UID)
        count_NSR.append(this_NSR)

from numpy import random
random.seed(seed=42)
from numpy.random import choice
list_of_candidates = remaining_UIDs
number_of_items_to_pick = round(len(list_of_candidates) * 0.15) # 10% labeled for training, 5% for testing.
temp_sum = sum(count_NSR)
probability_distribution = [x/temp_sum for x in count_NSR]
probability_distribution = [(1-x/temp_sum)/ (len(count_NSR)-1) for x in count_NSR]# Subjects with fewer segments have higher chance to be selected. Make sure the sum is one.
draw = choice(list_of_candidates, number_of_items_to_pick,
              p=probability_distribution, replace=False)

clinical_trial_train = list(draw[:round(len(list_of_candidates) * 0.1)])
clinical_trial_test_nonAF = list(draw[round(len(list_of_candidates) * 0.1):])
clinical_trial_test_temp = clinical_trial_test_nonAF + clinical_trial_AF_subjects
clinical_trial_test = []
for UID in clinical_trial_test_temp:
    # UID 051 and maybe other UIDs had no segments (unknown reason).
    if UID in all_UIDs:
        clinical_trial_test.append(UID)

clinical_trial_unlabeled = []
for UID in all_UIDs:
    if UID not in clinical_trial_train and UID not in clinical_trial_test and not UID[0] == '3' and not UID[0] == '4':
        clinical_trial_unlabeled.append(UID)
print(f'Clinical trial: selected {len(clinical_trial_train)} UIDs for training {clinical_trial_train}')
print(f'Clinical trial: selected {len(clinical_trial_test)} UIDs for testing {clinical_trial_test}')
print(f'Clinical trial: selected {len(clinical_trial_unlabeled)} UIDs for unlabeled {clinical_trial_unlabeled}')

is_linux = False  # Set to True if running on Linux, False if on Windows
is_hpc = False  # Set to True if running on an HPC, False if on Windows
data_format = 'csv'  # Choose 'csv' or 'png'
data_path, labels_path, saving_path = get_data_paths(data_format, is_linux=is_linux, is_hpc=is_hpc)

# clinical_trial_train = [clinical_trial_train[0]]
# clinical_trial_test = [clinical_trial_test[0]]
# clinical_trial_unlabeled = clinical_trial_unlabeled[0:4]

batch_size = 512
train_loader = load_data_split_batched(data_path, labels_path, clinical_trial_train, batch_size, standardize=True, data_format='csv', read_all_labels=False, drop_last=True)
val_loader = load_data_split_batched(data_path, labels_path, clinical_trial_test, batch_size, standardize=True, data_format='csv', read_all_labels=False, drop_last=True)
test_loader = load_data_split_batched(data_path, labels_path, clinical_trial_unlabeled, batch_size, standardize=True, data_format='csv', read_all_labels=False, drop_last=True)

# data_iter = iter(train_loader)
# data = next(data_iter)
# data["label"]


'''
Key Points:
Initialization: The script starts by initializing the data loaders for training, validation, and testing.
Initial Training: The model is initially trained on the training set.
Active Learning Loop: During each iteration of active learning, the script:
Performs uncertainty sampling on the validation set.
Updates the training loader with the uncertain samples.
Retrains the model with this updated training set.
Final Evaluation: After the active learning iterations, the model is evaluated on the test set to assess its performance on unseen data.
Notes:
Data Preprocessing: The script assumes that train_batch['data'] from your data loaders is already appropriately preprocessed for your model. If additional preprocessing is required, make sure to include it.
Active Learning Iterations: The number of active learning iterations (active_learning_iterations) and the number of samples per iteration (n_samples) are parameters you can adjust based on your scenario and dataset.
Test Evaluation: The final evaluation on the test set gives you an understanding of how well the model generalizes to new, unseen data.
'''
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
import gpytorch
from gpytorch.mlls import VariationalELBO
from gpytorch.kernels import RBFKernel, PeriodicKernel, ScaleKernel
from gpytorch.models import ExactGP
from gpytorch.mlls import ExactMarginalLogLikelihood
from gpytorch.means import ConstantMean
from gpytorch.distributions import MultivariateNormal
from sklearn.cluster import KMeans
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

# Assuming CustomDataset and data loader functions are defined as provided

def map_samples_to_uids(uncertain_sample_indices, dataset):
    """
    Maps indices of uncertain samples back to their corresponding segment names or UIDs.

    Args:
    - uncertain_sample_indices: Indices of the uncertain samples in the dataset.
    - dataset: The dataset object which contains the mapping of segment names and UIDs.

    Returns:
    - List of UIDs or segment names corresponding to the uncertain samples.
    """
    return [dataset.segment_names[i] for i in uncertain_sample_indices]

def apply_tsne(data, n_components=2):
    n_samples = data.shape[0]
    perplexity = min(30, n_samples - 1)  # Ensure perplexity is less than the number of samples
    tsne = TSNE(n_components=n_components, perplexity=perplexity)
    return tsne.fit_transform(data)

import torch
import gpytorch
from gpytorch.kernels import SpectralMixtureKernel
from gpytorch.models import AbstractVariationalGP
from gpytorch.variational import CholeskyVariationalDistribution, VariationalStrategy
from gpytorch.distributions import MultivariateNormal

import numpy as np
import torch
import gpytorch
import pandas as pd
from sklearn.cluster import KMeans
from torch.utils.data import DataLoader
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, precision_recall_fscore_support
from sklearn.preprocessing import label_binarize, StandardScaler
from sklearn.manifold import TSNE

num_latents = 6  # This should match the complexity of your data or the number of tasks
num_tasks = 4    # This should match the number of output classes or tasks
num_inducing_points = 50  # This is independent and should be sufficient for the input space

class MultitaskGPModel(gpytorch.models.ApproximateGP):
    def __init__(self):
        # Let's use a different set of inducing points for each latent function
        inducing_points = torch.rand(num_latents, num_inducing_points, 128 * 128)  # Assuming flattened 128x128 images

        # We have to mark the CholeskyVariationalDistribution as batch
        # so that we learn a variational distribution for each task
        variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(
            inducing_points.size(-2), batch_shape=torch.Size([num_latents])
        )

        # We have to wrap the VariationalStrategy in a LMCVariationalStrategy
        # so that the output will be a MultitaskMultivariateNormal rather than a batch output
        variational_strategy = gpytorch.variational.LMCVariationalStrategy(
            gpytorch.variational.VariationalStrategy(
                self, inducing_points, variational_distribution, learn_inducing_locations=True
            ),
            num_tasks=num_tasks,
            num_latents=num_latents,
            latent_dim=-1
        )

        super().__init__(variational_strategy)

        # The mean and covariance modules should be marked as batch
        # so we learn a different set of hyperparameters
        self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([num_latents]))
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(batch_shape=torch.Size([num_latents])),
            batch_shape=torch.Size([num_latents])
        )

    def forward(self, x):
        # The forward function should be written as if we were dealing with each output
        # dimension in batch
        # Ensure x is correctly shaped. It should have the same last dimension size as inducing_points
        # x should be reshaped or sliced to have the shape [?, 1] where ? can be any size
        # For example, if x originally has shape [N, D], and D != 1, you need to modify x accordingly
        # print(f"Input shape: {x.shape}")
        # x = x.view(x.size(0), -1)  # Flattening the images
        # print(f"Input shape after flattening: {x.shape}")  # Debugging input shape
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)

        # Debugging: Print shapes of intermediate outputs
        # print(f"Mean shape: {mean_x.shape}, Covariance shape: {covar_x.shape}")
        latent_pred = gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
        # print(f"Latent prediction shape: {latent_pred.mean.shape}, {latent_pred.covariance_matrix.shape}")

        return latent_pred

# Usage
# model = MultitaskGPModel()
# likelihood = gpytorch.likelihoods.SoftmaxLikelihood(num_features=2, num_classes=4, mixing_weights = False)

from tqdm import tqdm

# Initialize result storage
results = {
    'train_loss': [],
    'validation_metrics': {'precision': [], 'recall': [], 'f1': [], 'auc_roc': []},
    'test_metrics': None  # This will be filled in with the final test metrics
}

import torch
import gpytorch
from gpytorch.likelihoods import SoftmaxLikelihood
from gpytorch.functions import log_normal_cdf

def train_gp_model(train_x, train_y, val_loader, num_iterations=50, n_classes=4, patience=10, checkpoint_path='model_checkpoint_full.pt'):
    model = MultitaskGPModel().to(device)
    likelihood = gpytorch.likelihoods.SoftmaxLikelihood(num_features=4, num_classes=4).to(device)
    model.train()
    likelihood.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
    mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(0))

    best_val_loss = float('inf')
    epochs_no_improve = 0

    for i in tqdm(range(num_iterations), desc='Training', unit='iter', leave=False):
        optimizer.zero_grad()
        output = model(train_x)
        loss = -mll(output, train_y)
        scalar_loss = loss.sum() if loss.numel() > 1 else loss
        scalar_loss.backward()
        optimizer.step()

        # Validation step
        model.eval()
        likelihood.eval()
        with torch.no_grad():
            val_loss = 0.0
            for val_batch in val_loader:
                val_x, val_y = val_batch['data'].view(val_batch['data'].size(0), -1).to(device), val_batch['label'].to(device)
                val_output = model(val_x)
                val_loss += -mll(val_output, val_y).item()
            val_loss /= len(val_loader)

        model.train()
        likelihood.train()

        # Early stopping and checkpointing based on validation loss
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
            torch.save({'model_state_dict': model.state_dict(),
                        'likelihood_state_dict': likelihood.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict()}, checkpoint_path)
        else:
            epochs_no_improve += 1
            if epochs_no_improve == patience:
                print(f"Early stopping triggered at iteration {i+1}")
                break

    # Load the best model before return
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    likelihood.load_state_dict(checkpoint['likelihood_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    return model, likelihood

def parse_classification_report(report):
    """Parse a classification report into a dictionary of metrics."""
    lines = report.split('\n')
    main_metrics = lines[-2].split()

    # Assuming the last line is like "accuracy: x    macro avg    y1  y2  y3  y4"
    return {
        'precision': float(main_metrics[3]),
        'recall': float(main_metrics[4]),
        'f1': float(main_metrics[5]),
        'auc_roc': None  # AUC-ROC is not part of the classification report by default
    }

from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import label_binarize
from sklearn.metrics import auc
from sklearn.metrics import roc_curve
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support

def evaluate_model_on_all_data(model, likelihood, data_loader, device, n_classes):
    model.eval()
    likelihood.eval()

    all_predicted_labels = []
    all_test_labels = []

    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        for i, batch in enumerate(data_loader):
            test_data = batch['data'].view(batch['data'].size(0), -1).to(device)
            test_labels = batch['label'].to(device)
            # print(f"Test data shape before t-SNE: {test_data.shape}")

            predictions = likelihood(model(test_data)).mean
            # Debugging - check shape of predictions
            # print(f"Predictions shape: {predictions.shape}")
            predicted_labels = predictions.argmax(dim=0)

            # Add debugging information
            # print(f"Batch {i}: Predicted Labels Shape: {predicted_labels.shape}, Actual Labels Shape: {test_labels.shape}")

            all_predicted_labels.append(predicted_labels.cpu().numpy())
            all_test_labels.append(test_labels.numpy())

            # Debug the accumulation of labels
            current_predicted = np.concatenate(all_predicted_labels, axis=0)
            current_actual = np.concatenate(all_test_labels, axis=0)
            # print(f"After Batch {i}: Accumulated Predicted Labels: {current_predicted.shape[0]}, Accumulated Actual Labels: {current_actual.shape[0]}")

    # Concatenate all batch results
    all_predicted_labels = np.concatenate(all_predicted_labels, axis=0)
    all_test_labels = np.concatenate(all_test_labels, axis=0)

    # Final check
    # print(f"Final: Total Predicted Labels: {all_predicted_labels.shape[0]}, Total Actual Labels: {all_test_labels.shape[0]}")

    # Verify if the shapes match before proceeding to calculate metrics
    if all_predicted_labels.shape[0] != all_test_labels.shape[0]:
        raise ValueError("Mismatch in the number of samples between predicted and actual labels")

    # Compute overall evaluation metrics
    precision, recall, f1, _ = precision_recall_fscore_support(all_test_labels, all_predicted_labels, average='macro')
    # For AUC-ROC, you need the predicted probabilities and true labels in a one-hot encoded format
    test_labels_one_hot = label_binarize(all_test_labels, classes=np.arange(n_classes))
    auc_roc = roc_auc_score(test_labels_one_hot, predictions.softmax(dim=-1).cpu().numpy(), multi_class='ovr')
    return {
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'auc_roc': auc_roc
    }

import random

def stochastic_uncertainty_sampling(gp_model, gp_likelihood, val_loader, n_samples, n_batches, n_components=2):
    gp_model.eval()
    gp_likelihood.eval()
    uncertain_sample_indices = []
    sampled_batches = random.sample(list(val_loader), n_batches)  # Randomly sample n_batches from val_loader

    with torch.no_grad():
        for batch in sampled_batches:
            # reduced_data = apply_tsne(batch['data'].reshape(batch['data'].size(0), -1), n_components=n_components)
            # reduced_data_tensor = torch.Tensor(reduced_data).to(device)
            reduced_data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device)
            predictions = gp_likelihood(gp_model(reduced_data_tensor))
            var = predictions.variance
            top_indices = torch.argsort(-var.flatten())[:n_samples]
            uncertain_sample_indices.extend(top_indices.cpu().numpy())

    return uncertain_sample_indices[:n_samples]

# def uncertainty_sampling(gp_model, gp_likelihood, val_loader, n_samples, n_components=2):
#     gp_model.eval()
#     gp_likelihood.eval()
#     uncertain_sample_indices = []
#     with torch.no_grad():
#         for batch_idx, batch in tqdm(enumerate(val_loader), desc='Uncertainty Sampling', unit='batch'):
#             reduced_data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device)
#             predictions = gp_likelihood(gp_model(reduced_data_tensor))
#             var = predictions.variance
#             top_indices = torch.argsort(-var.flatten())[:n_samples]
#             batch_uncertain_indices = [batch_idx * val_loader.batch_size + idx for idx in top_indices]
#             uncertain_sample_indices.extend(batch_uncertain_indices)
#     return uncertain_sample_indices[:n_samples]

def label_samples(uncertain_samples, validation_data):
    labels = [validation_data[sample_id]['label'] for sample_id in uncertain_samples]
    return uncertain_samples, labels

def update_train_loader_with_uncertain_samples(current_train_loader, new_sample_indices, data_path, labels_path, batch_size, standardize=False, data_format='csv', read_all_labels=True):
    # Extract current UIDs from the current_train_loader
    current_dataset = current_train_loader.dataset
    # Map new_samples back to their corresponding segment names or UIDs
    new_uids = map_samples_to_uids(new_sample_indices, current_dataset)
    # Add new UIDs to the current dataset and refresh it
    current_dataset.add_uids(new_uids)
    # Create new DataLoader with the updated dataset
    updated_train_loader = DataLoader(current_dataset, batch_size=batch_size, shuffle=False)
    return updated_train_loader

def plot_training_performance(train_loss, validation_metrics):
    epochs = range(1, len(train_loss) + 1)

    # Plot training loss
    plt.figure(figsize=(14, 6))
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_loss, 'b-', label='Training Loss')
    plt.title('Training Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # Plot validation metrics
    plt.subplot(1, 2, 2)
    plt.plot(epochs, validation_metrics['precision'], 'r-', label='Precision')
    plt.plot(epochs, validation_metrics['recall'], 'g-', label='Recall')
    plt.plot(epochs, validation_metrics['f1'], 'b-', label='F1 Score')
    plt.plot(epochs, validation_metrics['auc_roc'], 'y-', label='AUC-ROC')
    plt.title('Validation Metrics')
    plt.xlabel('Epochs')
    plt.ylabel('Metrics')
    plt.legend()

    plt.tight_layout()
    plt.show()

def plot_results(results):
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(results['train_loss'], label='Train Loss')
    plt.title('Training Loss Over Time')
    plt.legend()

    plt.subplot(1, 2, 2)
    for metric in ['precision', 'recall', 'f1']:
        plt.plot(results['validation_metrics'][metric], label=metric.title())
    plt.title('Validation Metrics Over Time')
    plt.legend()
    plt.show()

    test_metrics = results['test_metrics']
    print("Test Metrics:")
    print(f"Precision: {test_metrics['precision']}")
    print(f"Recall: {test_metrics['recall']}")
    print(f"F1 Score: {test_metrics['f1']}")
    print(f"AUC-ROC: {test_metrics['auc_roc']}")

from sklearn.cluster import KMeans

# K-Means Validation Function
def kmeans_validation(model, data_loader, n_clusters, device):
    model.eval()
    all_data = []
    all_predictions = []
    with torch.no_grad():
        for batch in data_loader:
            data = batch['data'].view(batch['data'].size(0), -1).to(device)
            labels = batch['label'].to(device)
            predictions = model(data).mean.argmax(dim=-1).cpu().numpy()
            all_data.extend(data.cpu().numpy())
            all_predictions.extend(predictions)

    all_data = np.array(all_data)
    all_predictions = np.array(all_predictions)

    # Perform KMeans clustering
    kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(all_data)
    cluster_labels = kmeans.labels_

    # Check label consistency within each cluster
    for i in range(n_clusters):
        cluster_indices = np.where(cluster_labels == i)[0]
        cluster_pred_labels = all_predictions[cluster_indices]
        most_common_label = np.bincount(cluster_pred_labels).argmax()
        # Compare most_common_label with actual labels in the cluster
        # Adjust the model's predictions if necessary

    return kmeans

from sklearn.cluster import MiniBatchKMeans

def run_minibatch_kmeans(data_loader, n_clusters, device, batch_size=100):
    # Initialize MiniBatchKMeans
    minibatch_kmeans = MiniBatchKMeans(n_clusters=n_clusters, random_state=0, batch_size=batch_size)

    # Iterate through data_loader and fit MiniBatchKMeans
    for batch in data_loader:
        data = batch['data'].view(batch['data'].size(0), -1).to(device).cpu().numpy()
        minibatch_kmeans.partial_fit(data)

    return minibatch_kmeans

# def compare_kmeans_gp_predictions(kmeans_model, gp_model, data_loader, device):
#     # Compare K-Means with GP model predictions
#     all_data, all_labels = [], []
#     for batch in data_loader:
#         data = batch['data'].view(batch['data'].size(0), -1).to(device)
#         labels = batch['label'].to(device)
#         gp_predictions = gp_model(data).mean.argmax(dim=0).cpu().numpy()
#         kmeans_predictions = kmeans_model.predict(data.cpu().numpy())
#         all_labels.append(labels.cpu().numpy())
#         all_data.append((gp_predictions, kmeans_predictions))
#     return all_data, np.concatenate(all_labels)

def stochastic_compare_kmeans_gp_predictions(kmeans_model, gp_model, data_loader, n_batches, device):
    all_data, all_labels = [], []
    sampled_batches = random.sample(list(data_loader), n_batches)  # Randomly sample n_batches from data_loader

    for batch in sampled_batches:
        data = batch['data'].view(batch['data'].size(0), -1).to(device)
        labels = batch['label'].to(device)
        gp_predictions = gp_model(data).mean.argmax(dim=0).cpu().numpy()
        kmeans_predictions = kmeans_model.predict(data.cpu().numpy())
        all_labels.append(labels.cpu().numpy())
        all_data.append((gp_predictions, kmeans_predictions))

    return all_data, np.concatenate(all_labels)

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

def plot_comparative_results(gp_vs_kmeans_data, original_labels):
    fig, axes = plt.subplots(1, 2, figsize=(14, 7))

    # Plot 1: Confusion Matrix for GP Predictions vs Original Labels
    gp_predictions = [pair[0] for pair in gp_vs_kmeans_data]
    gp_predictions = np.concatenate(gp_predictions)
    cm_gp = confusion_matrix(original_labels, gp_predictions)
    sns.heatmap(cm_gp, annot=True, ax=axes[0], fmt='g')
    axes[0].set_title('GP Model Predictions vs Original Labels')
    axes[0].set_xlabel('Predicted Labels')
    axes[0].set_ylabel('True Labels')

    # Plot 2: Confusion Matrix for K-Means Predictions vs Original Labels
    kmeans_predictions = [pair[1] for pair in gp_vs_kmeans_data]
    kmeans_predictions = np.concatenate(kmeans_predictions)
    cm_kmeans = confusion_matrix(original_labels, kmeans_predictions)
    sns.heatmap(cm_kmeans, annot=True, ax=axes[1], fmt='g')
    axes[1].set_title('K-Means Predictions vs Original Labels')
    axes[1].set_xlabel('Predicted Labels')
    axes[1].set_ylabel('True Labels')

    plt.tight_layout()
    plt.show()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Main execution
if __name__ == "__main__":
    # Define the number of output features and classes first
    n_classes = 4  # Assuming you have 4 classes

    # Initialize data loaders
    train_loader = load_data_split_batched(data_path, labels_path, clinical_trial_train, batch_size, standardize=False, data_format='csv', read_all_labels=False, drop_last=True)
    val_loader = load_data_split_batched(data_path, labels_path, clinical_trial_test, batch_size, standardize=False, data_format='csv', read_all_labels=True, drop_last=True)
    test_loader = load_data_split_batched(data_path, labels_path, clinical_trial_unlabeled, batch_size, standardize=False, data_format='csv', read_all_labels=True, drop_last=True)

    kmeans_model = run_minibatch_kmeans(train_loader, n_clusters=n_classes, device=device)

    # Initialize result storage
    results = {
        'train_loss': [],
        'validation_metrics': {'precision': [], 'recall': [], 'f1': [], 'auc_roc': []},
        'test_metrics': None  # This will be filled in with the final test metrics
    }

    # Initial model training
    for train_batch in train_loader:
        train_x = train_batch['data'].view(train_batch['data'].size(0), -1).to(device)
        train_y = train_batch['label'].to(device)
        model, likelihood = train_gp_model(train_x, train_y, val_loader, num_iterations=10, n_classes=n_classes)

    active_learning_iterations = 10
    n_samples = batch_size  # Number of uncertain samples to accumulate
    for iteration in tqdm(range(active_learning_iterations), desc='Active Learning', unit='iteration', leave=True):
        uncertain_sample_indices = stochastic_uncertainty_sampling(model, likelihood, val_loader, n_samples, n_batches=5, n_components=2)

        # Accumulate indices of uncertain samples
        accumulated_indices = []
        for idx in uncertain_sample_indices:
            accumulated_indices.append(idx)

        # Update the training loader with indices of uncertain samples
        train_loader = update_train_loader_with_uncertain_samples(train_loader, accumulated_indices, data_path, labels_path, batch_size)

        # Re-train the model with the updated train_loader
        for train_batch in tqdm(train_loader, desc='Batch Training', leave=False):
            train_x = train_batch['data'].view(train_batch['data'].size(0), -1).to(device)  # Flatten the image
            train_y = train_batch['label'].to(device)
            model, likelihood = train_gp_model(train_x, train_y, val_loader, num_iterations=10, n_classes=n_classes)
        val_metrics = evaluate_model_on_all_data(model, likelihood, val_loader, device, n_classes)
        for metric in ['precision', 'recall', 'f1', 'auc_roc']:
            results['validation_metrics'][metric].append(val_metrics[metric])

        # Compare K-Means with GP model predictions after retraining
        gp_vs_kmeans_data, original_labels = stochastic_compare_kmeans_gp_predictions(kmeans_model, model, train_loader, n_batches=5, device=device)
        plot_comparative_results(gp_vs_kmeans_data, original_labels)

    plot_training_performance(results['train_loss'], results['validation_metrics'])

    # Final evaluation on test set
    classification_result = evaluate_model_on_all_data(model, likelihood, test_loader, device, n_classes=n_classes)
    # Store test metrics
    results['test_metrics'] = classification_result
    # Now results dictionary is ready to be used for plotting
    plot_results(results)
    # You might also want to print or log the final test metrics
    print("Final Test Metrics:", results['test_metrics'])