Skip to content

Resolve issue with trends visualization #3

Merged
merged 2 commits into from
Oct 24, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 47 additions & 28 deletions project_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,20 @@ def get_data_paths(data_format, is_linux=False, is_hpc=False):

def load_data(data_path, labels_path, dataset_size=10, train=True, standardize=True, data_format='csv'):
if data_format not in ['csv', 'png']:
raise ValueError("Invalid data_format. Choose 'csv' or 'png'.")
raise ValueError("Invalid data_format. Choose 'csv' or 'png.")

dir_list_UID = os.listdir(data_path)
UID_list = dir_list_UID[:dataset_size] if train else dir_list_UID[dataset_size:]

X_data = []
X_data_original = [] # Store original data without standardization
segment_names = []

for UID in UID_list:
data_path_UID = os.path.join(data_path, UID)
dir_list_seg = os.listdir(data_path_UID)

for seg in dir_list_seg[:50]: # Limiting to 50 segments
for seg in dir_list_seg[:len(dir_list_seg)]: # Limiting to 50 segments
seg_path = os.path.join(data_path_UID, seg)

if data_format == 'csv' and seg.endswith('.csv'):
Expand All @@ -71,18 +72,21 @@ def load_data(data_path, labels_path, dataset_size=10, train=True, standardize=T
else:
continue # Skip other file formats

X_data_original.append(time_freq_tensor.clone()) # Store a copy of the original data
X_data.append(time_freq_tensor)

segment_names.append(seg) # Store segment names

X_data = torch.cat(X_data, 0)
X_data_original = torch.cat(X_data_original, 0)

if standardize:
X_data = standard_scaling(X_data)
X_data = standard_scaling(X_data) # Standardize the data

# Extract labels from CSV files
labels = extract_labels(UID_list, labels_path)

return X_data, segment_names, labels
return X_data_original, X_data, segment_names, labels

def extract_labels(UID_list, labels_path):
labels = {}
Expand All @@ -105,20 +109,36 @@ def create_dataloader(data, batch_size=64, shuffle=True):
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
return data_loader

def visualize_trends(data, segment_names, num_plots=3, save_path=None):
# Visualize random trends/segments
num_samples, _, _ = data.shape
for _ in range(num_plots):
idx = np.random.randint(0, num_samples)
plt.figure() # Create a new figure for each plot
plt.imshow(data[idx].numpy())
plt.title(f"Segment: {segment_names[idx]}")
plt.colorbar()
if save_path:
subject_save_path = os.path.join(save_path, f"trends_visualization_segment_{segment_names}.png")
plt.savefig(subject_save_path)
plt.show()
def visualize_trends(standardized_data, original_data, segment_names, num_plots=3, data_format='csv', save_path=None):
if data_format == 'csv':
num_samples, num_columns, num_rows = original_data.shape
for _ in range(num_plots):
idx = np.random.randint(0, num_samples)
# Create a figure with three subplots
fig, axes = plt.subplots(2, 1, figsize=(16, 5))

# Plot the trend matrix of the original data
axes[0].imshow(original_data[idx], aspect='auto', cmap='viridis')
axes[0].set_title(f"Trend Matrix (Original Data): Segment {segment_names[idx]}")
axes[0].set_xlabel("Matrix Width")
axes[0].set_ylabel("Matrix Height")

# Plot the trend matrix of the standardized data
axes[1].imshow(standardized_data[idx].numpy(), aspect='auto', cmap='viridis')
axes[1].set_title(f"Trend Matrix (Standardized Data): Segment {segment_names[idx]}")
axes[1].set_xlabel("Matrix Width")
axes[1].set_ylabel("Matrix Height")
plt.tight_layout()

if save_path:
subject_save_path = os.path.join(save_path, f"trends_visualization_segment_{segment_names[idx]}.png")
plt.savefig(subject_save_path)
plt.show()
elif data_format == 'png':
print("This is a trend analysis for PNG data format.")
else:
print("This is a trend analysis for data in an unsupported format.")

def perform_pca(data, num_components=2):
# Perform PCA for dimensionality reduction
data_flattened = data.view(data.size(0), -1) # Flatten the data
Expand Down Expand Up @@ -311,36 +331,35 @@ def main():
is_linux = False # Set to True if running on Linux, False if on Windows
is_hpc = False # Set to True if running on hpc, False if on Windows

data_format = 'png' # Choose 'csv' or 'png'
data_format = 'csv' # Choose 'csv' or 'png'

data_path, labels_path, saving_path = get_data_paths(data_format, is_linux=is_linux, is_hpc=is_hpc)

train_data, segment_names, labels = load_data(data_path, labels_path, dataset_size=10, train=True, data_format=data_format)
original_data, standardized_data, segment_names, labels = load_data(data_path, labels_path, dataset_size=10, train=True, data_format=data_format)
# test_data, _, _ = load_data(data_path, labels_path, dataset_size=30, train=False)

train_dataloader = create_dataloader(train_data)
train_dataloader = create_dataloader(standardized_data)
# test_dataloader = create_dataloader(test_data)

# Visualize random trends/segments
visualize_trends(train_data, segment_names, num_plots=20)
visualize_trends(standardized_data, original_data, segment_names, num_plots=10, data_format=data_format, save_path=None)

# Perform PCA for dimensionality reduction
# reduced_data, pca = perform_pca(train_data, num_components=2)
# reduced_data, pca = perform_pca(standardized_data, num_components=2)
# print("Explained variance ratio:", pca.explained_variance_ratio_)

# Visualize the correlation matrix
visualize_correlation_matrix(train_data, segment_names, subject_mode=True, num_subjects_to_visualize=None, save_path=saving_path)
# visualize_correlation_matrix(train_data, segment_names, subject_mode=False, num_subjects_to_visualize=None, save_path=saving_path)
# visualize_correlation_matrix(train_data, segment_names, subject_mode=False, num_subjects_to_visualize=None, save_path=saving_path)
visualize_correlation_matrix(standardized_data, segment_names, subject_mode=True, num_subjects_to_visualize=None, save_path=saving_path)
# visualize_correlation_matrix(standardized_data, segment_names, subject_mode=False, num_subjects_to_visualize=None, save_path=saving_path)
# visualize_correlation_matrix(standardized_data, segment_names, subject_mode=False, num_subjects_to_visualize=None, save_path=saving_path)

# Perform MFVI for your data
K = 4 # Number of clusters
miu_mfvi, pi_mfvi, resp_mfvi = perform_mfvi(train_data, K, n_optimization_iterations=1000, convergence_threshold=1e-5, run_until_convergence=False)
miu_mfvi, pi_mfvi, resp_mfvi = perform_mfvi(standardized_data, K, n_optimization_iterations=1000, convergence_threshold=1e-5, run_until_convergence=False)

# Calculate clustering metrics for MFVI
zi_mfvi = np.argmax(resp_mfvi, axis=1)
# Perform PCA for dimensionality reduction
reduced_data, pca = perform_pca(train_data, num_components=2)
reduced_data, pca = perform_pca(standardized_data, num_components=2)
print("Explained variance ratio:", pca.explained_variance_ratio_)

# Create two plots: PCA results and original labels
Expand Down