Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
Starting the coding.

I am using modular codes because I need to avoid error repetitions. And simplify the debugging.
  • Loading branch information
lrm22005 committed Oct 23, 2023
0 parents commit 4b6b50e
Showing 1 changed file with 104 additions and 0 deletions.
104 changes: 104 additions & 0 deletions project_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 23 14:50:24 2023
@author: lrm22005
"""

import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import seaborn as sns

def load_data(data_path, dataset_size=10, train=True, standardize=True):
# Load data from the specified data_path
dir_list_UID = os.listdir(data_path)
UID_list = dir_list_UID[:dataset_size] if train else dir_list_UID[dataset_size:]

X_data = []
segment_names = []

for UID in UID_list:
data_path_UID = os.path.join(data_path, UID)
dir_list_seg = os.listdir(data_path_UID)

for seg in dir_list_seg[:50]: # Limiting to 50 segments
seg_path = os.path.join(data_path_UID, seg)
time_freq_plot = np.array(pd.read_csv(seg_path, header=None))
time_freq_tensor = torch.Tensor(time_freq_plot).reshape(1, 128, 128)
X_data.append(time_freq_tensor)
segment_names.append(seg) # Store segment names

X_data = torch.cat(X_data, 0)

if standardize:
X_data = standard_scaling(X_data)

return X_data, segment_names

def standard_scaling(tensor):
# Z-score normalization (standardization)
mean = tensor.mean()
std = tensor.std()
tensor_standardized = (tensor - mean) / std
return tensor_standardized

def create_dataloader(data, batch_size=64, shuffle=True):
dataset = TensorDataset(data, data)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
return data_loader

def visualize_trends(data, segment_names, num_plots=3):
# Visualize random trends/segments
num_samples, _, _ = data.shape
for _ in range(num_plots):
idx = np.random.randint(0, num_samples)
plt.imshow(data[idx].numpy())
plt.title(f"Segment: {segment_names[idx]}")
plt.colorbar()
plt.show()

def perform_pca(data, num_components=2):
# Perform PCA for dimensionality reduction
data_flattened = data.view(data.size(0), -1) # Flatten the data
pca = PCA(n_components=num_components)
reduced_data = pca.fit_transform(data_flattened.numpy())
return reduced_data, pca

def visualize_correlation_matrix(data):
# Visualize the correlation matrix
data_flattened = data.view(data.size(0), -1).numpy()
correlation_matrix = np.corrcoef(data_flattened, rowvar=False)
sns.heatmap(correlation_matrix, cmap="coolwarm", xticklabels=False, yticklabels=False)
plt.title("Correlation Matrix")
plt.show()

def main():
is_linux = False # Set to True if running on Linux, False if on Windows
if is_linux:
data_path = "/mnt/r/ENGR_Chon/Dong/MATLAB_generate_results/NIH_PulseWatch/TFS_csv"
else:
data_path = r"R:\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv"

train_data, segment_names = load_data(data_path, dataset_size=141, train=True)
test_data, _ = load_data(data_path, dataset_size=10, train=False)

train_dataloader = create_dataloader(train_data)
test_dataloader = create_dataloader(test_data)

# Visualize random trends/segments
visualize_trends(train_data, segment_names, num_plots=3)

# Perform PCA for dimensionality reduction
reduced_data, pca = perform_pca(train_data, num_components=2)
print("Explained variance ratio:", pca.explained_variance_ratio_)

# Visualize the correlation matrix
visualize_correlation_matrix(train_data)

if __name__ == "__main__":
main()

0 comments on commit 4b6b50e

Please sign in to comment.