-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Starting the coding. I am using modular codes because I need to avoid error repetitions. And simplify the debugging.
- Loading branch information
0 parents
commit 4b6b50e
Showing
1 changed file
with
104 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Mon Oct 23 14:50:24 2023 | ||
@author: lrm22005 | ||
""" | ||
|
||
import os | ||
import numpy as np | ||
import pandas as pd | ||
import torch | ||
from torch.utils.data import DataLoader, TensorDataset | ||
import matplotlib.pyplot as plt | ||
from sklearn.decomposition import PCA | ||
import seaborn as sns | ||
|
||
def load_data(data_path, dataset_size=10, train=True, standardize=True): | ||
# Load data from the specified data_path | ||
dir_list_UID = os.listdir(data_path) | ||
UID_list = dir_list_UID[:dataset_size] if train else dir_list_UID[dataset_size:] | ||
|
||
X_data = [] | ||
segment_names = [] | ||
|
||
for UID in UID_list: | ||
data_path_UID = os.path.join(data_path, UID) | ||
dir_list_seg = os.listdir(data_path_UID) | ||
|
||
for seg in dir_list_seg[:50]: # Limiting to 50 segments | ||
seg_path = os.path.join(data_path_UID, seg) | ||
time_freq_plot = np.array(pd.read_csv(seg_path, header=None)) | ||
time_freq_tensor = torch.Tensor(time_freq_plot).reshape(1, 128, 128) | ||
X_data.append(time_freq_tensor) | ||
segment_names.append(seg) # Store segment names | ||
|
||
X_data = torch.cat(X_data, 0) | ||
|
||
if standardize: | ||
X_data = standard_scaling(X_data) | ||
|
||
return X_data, segment_names | ||
|
||
def standard_scaling(tensor): | ||
# Z-score normalization (standardization) | ||
mean = tensor.mean() | ||
std = tensor.std() | ||
tensor_standardized = (tensor - mean) / std | ||
return tensor_standardized | ||
|
||
def create_dataloader(data, batch_size=64, shuffle=True): | ||
dataset = TensorDataset(data, data) | ||
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle) | ||
return data_loader | ||
|
||
def visualize_trends(data, segment_names, num_plots=3): | ||
# Visualize random trends/segments | ||
num_samples, _, _ = data.shape | ||
for _ in range(num_plots): | ||
idx = np.random.randint(0, num_samples) | ||
plt.imshow(data[idx].numpy()) | ||
plt.title(f"Segment: {segment_names[idx]}") | ||
plt.colorbar() | ||
plt.show() | ||
|
||
def perform_pca(data, num_components=2): | ||
# Perform PCA for dimensionality reduction | ||
data_flattened = data.view(data.size(0), -1) # Flatten the data | ||
pca = PCA(n_components=num_components) | ||
reduced_data = pca.fit_transform(data_flattened.numpy()) | ||
return reduced_data, pca | ||
|
||
def visualize_correlation_matrix(data): | ||
# Visualize the correlation matrix | ||
data_flattened = data.view(data.size(0), -1).numpy() | ||
correlation_matrix = np.corrcoef(data_flattened, rowvar=False) | ||
sns.heatmap(correlation_matrix, cmap="coolwarm", xticklabels=False, yticklabels=False) | ||
plt.title("Correlation Matrix") | ||
plt.show() | ||
|
||
def main(): | ||
is_linux = False # Set to True if running on Linux, False if on Windows | ||
if is_linux: | ||
data_path = "/mnt/r/ENGR_Chon/Dong/MATLAB_generate_results/NIH_PulseWatch/TFS_csv" | ||
else: | ||
data_path = r"R:\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv" | ||
|
||
train_data, segment_names = load_data(data_path, dataset_size=141, train=True) | ||
test_data, _ = load_data(data_path, dataset_size=10, train=False) | ||
|
||
train_dataloader = create_dataloader(train_data) | ||
test_dataloader = create_dataloader(test_data) | ||
|
||
# Visualize random trends/segments | ||
visualize_trends(train_data, segment_names, num_plots=3) | ||
|
||
# Perform PCA for dimensionality reduction | ||
reduced_data, pca = perform_pca(train_data, num_components=2) | ||
print("Explained variance ratio:", pca.explained_variance_ratio_) | ||
|
||
# Visualize the correlation matrix | ||
visualize_correlation_matrix(train_data) | ||
|
||
if __name__ == "__main__": | ||
main() |