From 1c21e2ab66f742754f1bfd55da21c789c10fe431 Mon Sep 17 00:00:00 2001 From: Luis Roberto Mercado Diaz Date: Sat, 3 Feb 2024 20:55:30 -0500 Subject: [PATCH] Update pytorch_file_generation_loader_update.py --- pytorch_file_generation_loader_update.py | 60 +++++++++++++++--------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/pytorch_file_generation_loader_update.py b/pytorch_file_generation_loader_update.py index cea6242..aa25a32 100644 --- a/pytorch_file_generation_loader_update.py +++ b/pytorch_file_generation_loader_update.py @@ -337,38 +337,54 @@ def preprocess_and_save_data(data_path, output_path): +import os +import pandas as pd +import numpy as np +from PIL import Image +import torch +from concurrent.futures import ThreadPoolExecutor + def preprocess_and_save_data(data_path, output_path): if not os.path.exists(output_path): os.makedirs(output_path) all_uids = [uid for uid in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, uid))] - for uid in reversed(all_uids): # Reverse the list of directories - uid_path = os.path.join(data_path, uid) - uid_output_path = os.path.join(output_path, uid) - if not os.path.exists(uid_output_path): - os.makedirs(uid_output_path) - with ThreadPoolExecutor() as executor: + + with ThreadPoolExecutor() as executor: + futures = [] + for uid in reversed(all_uids): # Reverse the list of directories + uid_path = os.path.join(data_path, uid) + uid_output_path = os.path.join(output_path, uid) + os.makedirs(uid_output_path, exist_ok=True) files_to_process = [file for file in os.listdir(uid_path) if file.endswith(('.csv', '.png'))] for file in files_to_process: - executor.submit(preprocess_file, uid_path, file, uid_output_path) + future = executor.submit(preprocess_file, uid_path, file, uid_output_path) + futures.append(future) + + # Optionally, check for completion and handle exceptions + for future in futures: + future.result() def preprocess_file(uid_path, file, uid_output_path): file_path = os.path.join(uid_path, file) - if file.endswith('.csv'): - data = pd.read_csv(file_path, header=None).values - if data.shape != (128, 128): - print(f"Warning: File {file_path} has shape {data.shape} instead of 128x128.") - elif file.endswith('.png'): - data = np.array(Image.open(file_path)) - if data.shape != (128, 128): - print(f"Warning: Image {file_path} has shape {data.shape} instead of 128x128.") - else: - return - data_tensor = torch.tensor(data, dtype=torch.float32).view(128, 128) - base_name, extension = os.path.splitext(file) - output_file_path = os.path.join(uid_output_path, f'{base_name}.pt') - torch.save(data_tensor, output_file_path) + try: + if file.endswith('.csv'): + data = pd.read_csv(file_path, header=None).values + if data.shape != (128, 128): + print(f"Warning: File {file_path} has shape {data.shape} instead of 128x128.") + elif file.endswith('.png'): + data = np.array(Image.open(file_path)) + if data.shape != (128, 128): + print(f"Warning: Image {file_path} has shape {data.shape} instead of 128x128.") + else: + return + data_tensor = torch.tensor(data, dtype=torch.float32).view(128, 128) + base_name, extension = os.path.splitext(file) + output_file_path = os.path.join(uid_output_path, f'{base_name}.pt') + torch.save(data_tensor, output_file_path) + except Exception as e: + print(f"Failed to process file {file_path} due to {e}") -# Top-level script execution: +# Top-level script execution input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv' output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format' preprocess_and_save_data(input_path, output_path)