-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Co-Authored-By: Dong Han <dong.han@uconn.edu>
- Loading branch information
Showing
10 changed files
with
1,247 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Mon Dec 18 18:23:23 2023 | ||
@author: lrm22005 | ||
""" | ||
import numpy as np | ||
import random | ||
import torch | ||
from torch.utils.data import DataLoader | ||
from sklearn.cluster import MiniBatchKMeans | ||
|
||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | ||
|
||
def label_samples(uncertain_samples, validation_data): | ||
labels = [validation_data[sample_id]['label'] for sample_id in uncertain_samples] | ||
return uncertain_samples, labels | ||
|
||
def stochastic_uncertainty_sampling(gp_model, gp_likelihood, val_loader, n_samples, n_batches, n_components=2): | ||
gp_model.eval() | ||
gp_likelihood.eval() | ||
uncertain_sample_indices = [] | ||
sampled_batches = random.sample(list(val_loader), n_batches) # Randomly sample n_batches from val_loader | ||
|
||
with torch.no_grad(): | ||
for batch in sampled_batches: | ||
# reduced_data = apply_tsne(batch['data'].reshape(batch['data'].size(0), -1), n_components=n_components) | ||
# reduced_data_tensor = torch.Tensor(reduced_data).to(device) | ||
reduced_data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device) | ||
predictions = gp_likelihood(gp_model(reduced_data_tensor)) | ||
var = predictions.variance | ||
top_indices = torch.argsort(-var.flatten())[:n_samples] | ||
uncertain_sample_indices.extend(top_indices.cpu().numpy()) | ||
|
||
return uncertain_sample_indices[:n_samples] | ||
|
||
# def uncertainty_sampling(gp_model, gp_likelihood, val_loader, n_samples, n_components=2): | ||
# gp_model.eval() | ||
# gp_likelihood.eval() | ||
# uncertain_sample_indices = [] | ||
# with torch.no_grad(): | ||
# for batch_idx, batch in tqdm(enumerate(val_loader), desc='Uncertainty Sampling', unit='batch'): | ||
# reduced_data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device) | ||
# predictions = gp_likelihood(gp_model(reduced_data_tensor)) | ||
# var = predictions.variance | ||
# top_indices = torch.argsort(-var.flatten())[:n_samples] | ||
# batch_uncertain_indices = [batch_idx * val_loader.batch_size + idx for idx in top_indices] | ||
# uncertain_sample_indices.extend(batch_uncertain_indices) | ||
# return uncertain_sample_indices[:n_samples] | ||
|
||
def run_minibatch_kmeans(data_loader, n_clusters, device, batch_size=100): | ||
# Initialize MiniBatchKMeans | ||
minibatch_kmeans = MiniBatchKMeans(n_clusters=n_clusters, random_state=0, batch_size=batch_size) | ||
|
||
# Iterate through data_loader and fit MiniBatchKMeans | ||
for batch in data_loader: | ||
data = batch['data'].view(batch['data'].size(0), -1).to(device).cpu().numpy() | ||
minibatch_kmeans.partial_fit(data) | ||
|
||
return minibatch_kmeans | ||
|
||
# def compare_kmeans_gp_predictions(kmeans_model, gp_model, data_loader, device): | ||
# # Compare K-Means with GP model predictions | ||
# all_data, all_labels = [], [] | ||
# for batch in data_loader: | ||
# data = batch['data'].view(batch['data'].size(0), -1).to(device) | ||
# labels = batch['label'].to(device) | ||
# gp_predictions = gp_model(data).mean.argmax(dim=0).cpu().numpy() | ||
# kmeans_predictions = kmeans_model.predict(data.cpu().numpy()) | ||
# all_labels.append(labels.cpu().numpy()) | ||
# all_data.append((gp_predictions, kmeans_predictions)) | ||
# return all_data, np.concatenate(all_labels) | ||
|
||
def stochastic_compare_kmeans_gp_predictions(kmeans_model, gp_model, data_loader, n_batches, device): | ||
all_data, all_labels = [], [] | ||
sampled_batches = random.sample(list(data_loader), n_batches) # Randomly sample n_batches from data_loader | ||
|
||
for batch in sampled_batches: | ||
data = batch['data'].view(batch['data'].size(0), -1).to(device) | ||
labels = batch['label'].to(device) | ||
gp_predictions = gp_model(data).mean.argmax(dim=0).cpu().numpy() | ||
kmeans_predictions = kmeans_model.predict(data.cpu().numpy()) | ||
all_labels.append(labels.cpu().numpy()) | ||
all_data.append((gp_predictions, kmeans_predictions)) | ||
|
||
return all_data, np.concatenate(all_labels) |
Oops, something went wrong.