Skip to content

Commit

Permalink
Merge pull request #3 from lrm22005/Luis
Browse files Browse the repository at this point in the history
Resolve issue with trends visualization
  • Loading branch information
lrm22005 committed Oct 24, 2023
2 parents 61f7a2d + 473f0bf commit 0fac61d
Showing 1 changed file with 47 additions and 28 deletions.
75 changes: 47 additions & 28 deletions project_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,20 @@ def get_data_paths(data_format, is_linux=False, is_hpc=False):

def load_data(data_path, labels_path, dataset_size=10, train=True, standardize=True, data_format='csv'):
if data_format not in ['csv', 'png']:
raise ValueError("Invalid data_format. Choose 'csv' or 'png'.")
raise ValueError("Invalid data_format. Choose 'csv' or 'png.")

dir_list_UID = os.listdir(data_path)
UID_list = dir_list_UID[:dataset_size] if train else dir_list_UID[dataset_size:]

X_data = []
X_data_original = [] # Store original data without standardization
segment_names = []

for UID in UID_list:
data_path_UID = os.path.join(data_path, UID)
dir_list_seg = os.listdir(data_path_UID)

for seg in dir_list_seg[:50]: # Limiting to 50 segments
for seg in dir_list_seg[:len(dir_list_seg)]: # Limiting to 50 segments
seg_path = os.path.join(data_path_UID, seg)

if data_format == 'csv' and seg.endswith('.csv'):
Expand All @@ -71,18 +72,21 @@ def load_data(data_path, labels_path, dataset_size=10, train=True, standardize=T
else:
continue # Skip other file formats

X_data_original.append(time_freq_tensor.clone()) # Store a copy of the original data
X_data.append(time_freq_tensor)

segment_names.append(seg) # Store segment names

X_data = torch.cat(X_data, 0)
X_data_original = torch.cat(X_data_original, 0)

if standardize:
X_data = standard_scaling(X_data)
X_data = standard_scaling(X_data) # Standardize the data

# Extract labels from CSV files
labels = extract_labels(UID_list, labels_path)

return X_data, segment_names, labels
return X_data_original, X_data, segment_names, labels

def extract_labels(UID_list, labels_path):
labels = {}
Expand All @@ -105,20 +109,36 @@ def create_dataloader(data, batch_size=64, shuffle=True):
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
return data_loader

def visualize_trends(data, segment_names, num_plots=3, save_path=None):
# Visualize random trends/segments
num_samples, _, _ = data.shape
for _ in range(num_plots):
idx = np.random.randint(0, num_samples)
plt.figure() # Create a new figure for each plot
plt.imshow(data[idx].numpy())
plt.title(f"Segment: {segment_names[idx]}")
plt.colorbar()
if save_path:
subject_save_path = os.path.join(save_path, f"trends_visualization_segment_{segment_names}.png")
plt.savefig(subject_save_path)
plt.show()
def visualize_trends(standardized_data, original_data, segment_names, num_plots=3, data_format='csv', save_path=None):
if data_format == 'csv':
num_samples, num_columns, num_rows = original_data.shape
for _ in range(num_plots):
idx = np.random.randint(0, num_samples)
# Create a figure with three subplots
fig, axes = plt.subplots(2, 1, figsize=(16, 5))

# Plot the trend matrix of the original data
axes[0].imshow(original_data[idx], aspect='auto', cmap='viridis')
axes[0].set_title(f"Trend Matrix (Original Data): Segment {segment_names[idx]}")
axes[0].set_xlabel("Matrix Width")
axes[0].set_ylabel("Matrix Height")

# Plot the trend matrix of the standardized data
axes[1].imshow(standardized_data[idx].numpy(), aspect='auto', cmap='viridis')
axes[1].set_title(f"Trend Matrix (Standardized Data): Segment {segment_names[idx]}")
axes[1].set_xlabel("Matrix Width")
axes[1].set_ylabel("Matrix Height")
plt.tight_layout()

if save_path:
subject_save_path = os.path.join(save_path, f"trends_visualization_segment_{segment_names[idx]}.png")
plt.savefig(subject_save_path)
plt.show()
elif data_format == 'png':
print("This is a trend analysis for PNG data format.")
else:
print("This is a trend analysis for data in an unsupported format.")

def perform_pca(data, num_components=2):
# Perform PCA for dimensionality reduction
data_flattened = data.view(data.size(0), -1) # Flatten the data
Expand Down Expand Up @@ -311,36 +331,35 @@ def main():
is_linux = False # Set to True if running on Linux, False if on Windows
is_hpc = False # Set to True if running on hpc, False if on Windows

data_format = 'png' # Choose 'csv' or 'png'
data_format = 'csv' # Choose 'csv' or 'png'

data_path, labels_path, saving_path = get_data_paths(data_format, is_linux=is_linux, is_hpc=is_hpc)

train_data, segment_names, labels = load_data(data_path, labels_path, dataset_size=10, train=True, data_format=data_format)
original_data, standardized_data, segment_names, labels = load_data(data_path, labels_path, dataset_size=10, train=True, data_format=data_format)
# test_data, _, _ = load_data(data_path, labels_path, dataset_size=30, train=False)

train_dataloader = create_dataloader(train_data)
train_dataloader = create_dataloader(standardized_data)
# test_dataloader = create_dataloader(test_data)

# Visualize random trends/segments
visualize_trends(train_data, segment_names, num_plots=20)
visualize_trends(standardized_data, original_data, segment_names, num_plots=10, data_format=data_format, save_path=None)

# Perform PCA for dimensionality reduction
# reduced_data, pca = perform_pca(train_data, num_components=2)
# reduced_data, pca = perform_pca(standardized_data, num_components=2)
# print("Explained variance ratio:", pca.explained_variance_ratio_)

# Visualize the correlation matrix
visualize_correlation_matrix(train_data, segment_names, subject_mode=True, num_subjects_to_visualize=None, save_path=saving_path)
# visualize_correlation_matrix(train_data, segment_names, subject_mode=False, num_subjects_to_visualize=None, save_path=saving_path)
# visualize_correlation_matrix(train_data, segment_names, subject_mode=False, num_subjects_to_visualize=None, save_path=saving_path)
visualize_correlation_matrix(standardized_data, segment_names, subject_mode=True, num_subjects_to_visualize=None, save_path=saving_path)
# visualize_correlation_matrix(standardized_data, segment_names, subject_mode=False, num_subjects_to_visualize=None, save_path=saving_path)
# visualize_correlation_matrix(standardized_data, segment_names, subject_mode=False, num_subjects_to_visualize=None, save_path=saving_path)

# Perform MFVI for your data
K = 4 # Number of clusters
miu_mfvi, pi_mfvi, resp_mfvi = perform_mfvi(train_data, K, n_optimization_iterations=1000, convergence_threshold=1e-5, run_until_convergence=False)
miu_mfvi, pi_mfvi, resp_mfvi = perform_mfvi(standardized_data, K, n_optimization_iterations=1000, convergence_threshold=1e-5, run_until_convergence=False)

# Calculate clustering metrics for MFVI
zi_mfvi = np.argmax(resp_mfvi, axis=1)
# Perform PCA for dimensionality reduction
reduced_data, pca = perform_pca(train_data, num_components=2)
reduced_data, pca = perform_pca(standardized_data, num_components=2)
print("Explained variance ratio:", pca.explained_variance_ratio_)

# Create two plots: PCA results and original labels
Expand Down

0 comments on commit 0fac61d

Please sign in to comment.