Skip to content

Commit

Permalink
HPC FOLDER
Browse files Browse the repository at this point in the history
Co-Authored-By: Dong Han <dong.han@uconn.edu>
  • Loading branch information
lrm22005 and doh16101 committed Jan 22, 2024
1 parent d4b0cd8 commit c9809a3
Show file tree
Hide file tree
Showing 10 changed files with 1,247 additions and 0 deletions.
86 changes: 86 additions & 0 deletions HPC/final_project/active_learning/ss_active_learning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 18 18:23:23 2023
@author: lrm22005
"""
import numpy as np
import random
import torch
from torch.utils.data import DataLoader
from sklearn.cluster import MiniBatchKMeans

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def label_samples(uncertain_samples, validation_data):
labels = [validation_data[sample_id]['label'] for sample_id in uncertain_samples]
return uncertain_samples, labels

def stochastic_uncertainty_sampling(gp_model, gp_likelihood, val_loader, n_samples, n_batches, n_components=2):
gp_model.eval()
gp_likelihood.eval()
uncertain_sample_indices = []
sampled_batches = random.sample(list(val_loader), n_batches) # Randomly sample n_batches from val_loader

with torch.no_grad():
for batch in sampled_batches:
# reduced_data = apply_tsne(batch['data'].reshape(batch['data'].size(0), -1), n_components=n_components)
# reduced_data_tensor = torch.Tensor(reduced_data).to(device)
reduced_data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device)
predictions = gp_likelihood(gp_model(reduced_data_tensor))
var = predictions.variance
top_indices = torch.argsort(-var.flatten())[:n_samples]
uncertain_sample_indices.extend(top_indices.cpu().numpy())

return uncertain_sample_indices[:n_samples]

# def uncertainty_sampling(gp_model, gp_likelihood, val_loader, n_samples, n_components=2):
# gp_model.eval()
# gp_likelihood.eval()
# uncertain_sample_indices = []
# with torch.no_grad():
# for batch_idx, batch in tqdm(enumerate(val_loader), desc='Uncertainty Sampling', unit='batch'):
# reduced_data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device)
# predictions = gp_likelihood(gp_model(reduced_data_tensor))
# var = predictions.variance
# top_indices = torch.argsort(-var.flatten())[:n_samples]
# batch_uncertain_indices = [batch_idx * val_loader.batch_size + idx for idx in top_indices]
# uncertain_sample_indices.extend(batch_uncertain_indices)
# return uncertain_sample_indices[:n_samples]

def run_minibatch_kmeans(data_loader, n_clusters, device, batch_size=100):
# Initialize MiniBatchKMeans
minibatch_kmeans = MiniBatchKMeans(n_clusters=n_clusters, random_state=0, batch_size=batch_size)

# Iterate through data_loader and fit MiniBatchKMeans
for batch in data_loader:
data = batch['data'].view(batch['data'].size(0), -1).to(device).cpu().numpy()
minibatch_kmeans.partial_fit(data)

return minibatch_kmeans

# def compare_kmeans_gp_predictions(kmeans_model, gp_model, data_loader, device):
# # Compare K-Means with GP model predictions
# all_data, all_labels = [], []
# for batch in data_loader:
# data = batch['data'].view(batch['data'].size(0), -1).to(device)
# labels = batch['label'].to(device)
# gp_predictions = gp_model(data).mean.argmax(dim=0).cpu().numpy()
# kmeans_predictions = kmeans_model.predict(data.cpu().numpy())
# all_labels.append(labels.cpu().numpy())
# all_data.append((gp_predictions, kmeans_predictions))
# return all_data, np.concatenate(all_labels)

def stochastic_compare_kmeans_gp_predictions(kmeans_model, gp_model, data_loader, n_batches, device):
all_data, all_labels = [], []
sampled_batches = random.sample(list(data_loader), n_batches) # Randomly sample n_batches from data_loader

for batch in sampled_batches:
data = batch['data'].view(batch['data'].size(0), -1).to(device)
labels = batch['label'].to(device)
gp_predictions = gp_model(data).mean.argmax(dim=0).cpu().numpy()
kmeans_predictions = kmeans_model.predict(data.cpu().numpy())
all_labels.append(labels.cpu().numpy())
all_data.append((gp_predictions, kmeans_predictions))

return all_data, np.concatenate(all_labels)
Loading

0 comments on commit c9809a3

Please sign in to comment.