From 1c21e2ab66f742754f1bfd55da21c789c10fe431 Mon Sep 17 00:00:00 2001
From: Luis Roberto Mercado Diaz <lrmercadod@gmail.com>
Date: Sat, 3 Feb 2024 20:55:30 -0500
Subject: [PATCH] Update pytorch_file_generation_loader_update.py

---
 pytorch_file_generation_loader_update.py | 60 +++++++++++++++---------
 1 file changed, 38 insertions(+), 22 deletions(-)

diff --git a/pytorch_file_generation_loader_update.py b/pytorch_file_generation_loader_update.py
index cea6242..aa25a32 100644
--- a/pytorch_file_generation_loader_update.py
+++ b/pytorch_file_generation_loader_update.py
@@ -337,38 +337,54 @@ def preprocess_and_save_data(data_path, output_path):
 
 
 
+import os
+import pandas as pd
+import numpy as np
+from PIL import Image
+import torch
+from concurrent.futures import ThreadPoolExecutor
+
 def preprocess_and_save_data(data_path, output_path):
     if not os.path.exists(output_path):
         os.makedirs(output_path)
     all_uids = [uid for uid in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, uid))]
-    for uid in reversed(all_uids):  # Reverse the list of directories
-        uid_path = os.path.join(data_path, uid)
-        uid_output_path = os.path.join(output_path, uid)
-        if not os.path.exists(uid_output_path):
-            os.makedirs(uid_output_path)
-        with ThreadPoolExecutor() as executor:
+
+    with ThreadPoolExecutor() as executor:
+        futures = []
+        for uid in reversed(all_uids):  # Reverse the list of directories
+            uid_path = os.path.join(data_path, uid)
+            uid_output_path = os.path.join(output_path, uid)
+            os.makedirs(uid_output_path, exist_ok=True)
             files_to_process = [file for file in os.listdir(uid_path) if file.endswith(('.csv', '.png'))]
             for file in files_to_process:
-                executor.submit(preprocess_file, uid_path, file, uid_output_path)
+                future = executor.submit(preprocess_file, uid_path, file, uid_output_path)
+                futures.append(future)
+
+        # Optionally, check for completion and handle exceptions
+        for future in futures:
+            future.result()
 
 def preprocess_file(uid_path, file, uid_output_path):
     file_path = os.path.join(uid_path, file)
-    if file.endswith('.csv'):
-        data = pd.read_csv(file_path, header=None).values
-        if data.shape != (128, 128):
-            print(f"Warning: File {file_path} has shape {data.shape} instead of 128x128.")
-    elif file.endswith('.png'):
-        data = np.array(Image.open(file_path))
-        if data.shape != (128, 128):
-            print(f"Warning: Image {file_path} has shape {data.shape} instead of 128x128.")
-    else:
-        return
-    data_tensor = torch.tensor(data, dtype=torch.float32).view(128, 128)
-    base_name, extension = os.path.splitext(file)
-    output_file_path = os.path.join(uid_output_path, f'{base_name}.pt')
-    torch.save(data_tensor, output_file_path)
+    try:
+        if file.endswith('.csv'):
+            data = pd.read_csv(file_path, header=None).values
+            if data.shape != (128, 128):
+                print(f"Warning: File {file_path} has shape {data.shape} instead of 128x128.")
+        elif file.endswith('.png'):
+            data = np.array(Image.open(file_path))
+            if data.shape != (128, 128):
+                print(f"Warning: Image {file_path} has shape {data.shape} instead of 128x128.")
+        else:
+            return
+        data_tensor = torch.tensor(data, dtype=torch.float32).view(128, 128)
+        base_name, extension = os.path.splitext(file)
+        output_file_path = os.path.join(uid_output_path, f'{base_name}.pt')
+        torch.save(data_tensor, output_file_path)
+    except Exception as e:
+        print(f"Failed to process file {file_path} due to {e}")
 
-# Top-level script execution:
+# Top-level script execution
 input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv'
 output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format'
 preprocess_and_save_data(input_path, output_path)