From 473f0bfa269582007e4e5cd948892e7516e79fa2 Mon Sep 17 00:00:00 2001 From: Luis Roberto Mercado Diaz Date: Tue, 24 Oct 2023 16:16:14 -0400 Subject: [PATCH] Resolve issue with the plotting of trends and reading I found a error in the code to return the plots, I just resolve it and change different lines. --- project_1.py | 75 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/project_1.py b/project_1.py index caf0e18..f06acff 100644 --- a/project_1.py +++ b/project_1.py @@ -46,19 +46,20 @@ def get_data_paths(data_format, is_linux=False, is_hpc=False): def load_data(data_path, labels_path, dataset_size=10, train=True, standardize=True, data_format='csv'): if data_format not in ['csv', 'png']: - raise ValueError("Invalid data_format. Choose 'csv' or 'png'.") + raise ValueError("Invalid data_format. Choose 'csv' or 'png.") dir_list_UID = os.listdir(data_path) UID_list = dir_list_UID[:dataset_size] if train else dir_list_UID[dataset_size:] X_data = [] + X_data_original = [] # Store original data without standardization segment_names = [] for UID in UID_list: data_path_UID = os.path.join(data_path, UID) dir_list_seg = os.listdir(data_path_UID) - for seg in dir_list_seg[:50]: # Limiting to 50 segments + for seg in dir_list_seg[:len(dir_list_seg)]: # Limiting to 50 segments seg_path = os.path.join(data_path_UID, seg) if data_format == 'csv' and seg.endswith('.csv'): @@ -71,18 +72,21 @@ def load_data(data_path, labels_path, dataset_size=10, train=True, standardize=T else: continue # Skip other file formats + X_data_original.append(time_freq_tensor.clone()) # Store a copy of the original data X_data.append(time_freq_tensor) + segment_names.append(seg) # Store segment names X_data = torch.cat(X_data, 0) + X_data_original = torch.cat(X_data_original, 0) if standardize: - X_data = standard_scaling(X_data) + X_data = standard_scaling(X_data) # Standardize the data # Extract labels from CSV files labels = extract_labels(UID_list, labels_path) - return X_data, segment_names, labels + return X_data_original, X_data, segment_names, labels def extract_labels(UID_list, labels_path): labels = {} @@ -105,20 +109,36 @@ def create_dataloader(data, batch_size=64, shuffle=True): data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle) return data_loader -def visualize_trends(data, segment_names, num_plots=3, save_path=None): - # Visualize random trends/segments - num_samples, _, _ = data.shape - for _ in range(num_plots): - idx = np.random.randint(0, num_samples) - plt.figure() # Create a new figure for each plot - plt.imshow(data[idx].numpy()) - plt.title(f"Segment: {segment_names[idx]}") - plt.colorbar() - if save_path: - subject_save_path = os.path.join(save_path, f"trends_visualization_segment_{segment_names}.png") - plt.savefig(subject_save_path) - plt.show() +def visualize_trends(standardized_data, original_data, segment_names, num_plots=3, data_format='csv', save_path=None): + if data_format == 'csv': + num_samples, num_columns, num_rows = original_data.shape + for _ in range(num_plots): + idx = np.random.randint(0, num_samples) + # Create a figure with three subplots + fig, axes = plt.subplots(2, 1, figsize=(16, 5)) + + # Plot the trend matrix of the original data + axes[0].imshow(original_data[idx], aspect='auto', cmap='viridis') + axes[0].set_title(f"Trend Matrix (Original Data): Segment {segment_names[idx]}") + axes[0].set_xlabel("Matrix Width") + axes[0].set_ylabel("Matrix Height") + + # Plot the trend matrix of the standardized data + axes[1].imshow(standardized_data[idx].numpy(), aspect='auto', cmap='viridis') + axes[1].set_title(f"Trend Matrix (Standardized Data): Segment {segment_names[idx]}") + axes[1].set_xlabel("Matrix Width") + axes[1].set_ylabel("Matrix Height") + plt.tight_layout() + if save_path: + subject_save_path = os.path.join(save_path, f"trends_visualization_segment_{segment_names[idx]}.png") + plt.savefig(subject_save_path) + plt.show() + elif data_format == 'png': + print("This is a trend analysis for PNG data format.") + else: + print("This is a trend analysis for data in an unsupported format.") + def perform_pca(data, num_components=2): # Perform PCA for dimensionality reduction data_flattened = data.view(data.size(0), -1) # Flatten the data @@ -311,36 +331,35 @@ def main(): is_linux = False # Set to True if running on Linux, False if on Windows is_hpc = False # Set to True if running on hpc, False if on Windows - data_format = 'png' # Choose 'csv' or 'png' + data_format = 'csv' # Choose 'csv' or 'png' data_path, labels_path, saving_path = get_data_paths(data_format, is_linux=is_linux, is_hpc=is_hpc) - - train_data, segment_names, labels = load_data(data_path, labels_path, dataset_size=10, train=True, data_format=data_format) + original_data, standardized_data, segment_names, labels = load_data(data_path, labels_path, dataset_size=10, train=True, data_format=data_format) # test_data, _, _ = load_data(data_path, labels_path, dataset_size=30, train=False) - train_dataloader = create_dataloader(train_data) + train_dataloader = create_dataloader(standardized_data) # test_dataloader = create_dataloader(test_data) # Visualize random trends/segments - visualize_trends(train_data, segment_names, num_plots=20) + visualize_trends(standardized_data, original_data, segment_names, num_plots=10, data_format=data_format, save_path=None) # Perform PCA for dimensionality reduction - # reduced_data, pca = perform_pca(train_data, num_components=2) + # reduced_data, pca = perform_pca(standardized_data, num_components=2) # print("Explained variance ratio:", pca.explained_variance_ratio_) # Visualize the correlation matrix - visualize_correlation_matrix(train_data, segment_names, subject_mode=True, num_subjects_to_visualize=None, save_path=saving_path) - # visualize_correlation_matrix(train_data, segment_names, subject_mode=False, num_subjects_to_visualize=None, save_path=saving_path) - # visualize_correlation_matrix(train_data, segment_names, subject_mode=False, num_subjects_to_visualize=None, save_path=saving_path) + visualize_correlation_matrix(standardized_data, segment_names, subject_mode=True, num_subjects_to_visualize=None, save_path=saving_path) + # visualize_correlation_matrix(standardized_data, segment_names, subject_mode=False, num_subjects_to_visualize=None, save_path=saving_path) + # visualize_correlation_matrix(standardized_data, segment_names, subject_mode=False, num_subjects_to_visualize=None, save_path=saving_path) # Perform MFVI for your data K = 4 # Number of clusters - miu_mfvi, pi_mfvi, resp_mfvi = perform_mfvi(train_data, K, n_optimization_iterations=1000, convergence_threshold=1e-5, run_until_convergence=False) + miu_mfvi, pi_mfvi, resp_mfvi = perform_mfvi(standardized_data, K, n_optimization_iterations=1000, convergence_threshold=1e-5, run_until_convergence=False) # Calculate clustering metrics for MFVI zi_mfvi = np.argmax(resp_mfvi, axis=1) # Perform PCA for dimensionality reduction - reduced_data, pca = perform_pca(train_data, num_components=2) + reduced_data, pca = perform_pca(standardized_data, num_components=2) print("Explained variance ratio:", pca.explained_variance_ratio_) # Create two plots: PCA results and original labels