diff --git a/.gitignore b/.gitignore
index 11d1435..233cecd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,4 @@ VAE.py
 model_checkpoint.pt
 GP_original_data.py
 Attention_network.py
+*.pt
diff --git a/BML_project/active_learning/__pycache__/ss_active_learning.cpython-311.pyc b/BML_project/active_learning/__pycache__/ss_active_learning.cpython-311.pyc
deleted file mode 100644
index 8d691ba..0000000
Binary files a/BML_project/active_learning/__pycache__/ss_active_learning.cpython-311.pyc and /dev/null differ
diff --git a/BML_project/active_learning/ss_active_learning.py b/BML_project/active_learning/ss_active_learning.py
index 4442a34..2758c81 100644
--- a/BML_project/active_learning/ss_active_learning.py
+++ b/BML_project/active_learning/ss_active_learning.py
@@ -54,6 +54,7 @@ def run_minibatch_kmeans(data_loader, n_clusters, device, batch_size=100):
     for batch in data_loader:
         data = batch['data'].view(batch['data'].size(0), -1).to(device).cpu().numpy()
         minibatch_kmeans.partial_fit(data)
+        # minibatch_kmeans.fit(data) # Dong, 01/22/2024: Debug
 
     return minibatch_kmeans
 
diff --git a/BML_project/cassey_CS330_torch.yml b/BML_project/cassey_CS330_torch.yml
new file mode 100644
index 0000000..fc86d9e
--- /dev/null
+++ b/BML_project/cassey_CS330_torch.yml
@@ -0,0 +1,272 @@
+name: CS330_torch
+channels:
+  - pytorch
+  - nvidia
+  - anaconda
+  - conda-forge
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - abseil-cpp=20211102.0=h27087fc_1
+  - absl-py=2.0.0=pyhd8ed1ab_0
+  - aiohttp=3.8.5=py311h5eee18b_0
+  - aiosignal=1.3.1=pyhd8ed1ab_0
+  - asttokens=2.4.0=pyhd8ed1ab_0
+  - async-timeout=4.0.2=py311h06a4308_0
+  - attrs=23.1.0=pyh71513ae_1
+  - backcall=0.2.0=pyh9f0ad1d_0
+  - backports=1.0=pyhd8ed1ab_3
+  - backports.functools_lru_cache=1.6.5=pyhd8ed1ab_0
+  - blas=1.1=openblas
+  - blinker=1.6.3=pyhd8ed1ab_0
+  - bottleneck=1.3.5=py311hbed6279_0
+  - brotli=1.0.9=h9c3ff4c_4
+  - brotlipy=0.7.0=py311h5eee18b_1002
+  - bzip2=1.0.8=h7b6447c_0
+  - c-ares=1.19.1=h5eee18b_0
+  - ca-certificates=2023.12.12=h06a4308_0
+  - cachetools=5.3.1=pyhd8ed1ab_0
+  - cairo=1.16.0=hb05425b_5
+  - certifi=2023.11.17=py311h06a4308_0
+  - cffi=1.15.1=py311h5eee18b_3
+  - charset-normalizer=2.0.4=pyhd3eb1b0_0
+  - click=8.1.7=unix_pyh707e725_0
+  - cloudpickle=2.2.1=pyhd8ed1ab_0
+  - colorama=0.4.6=pyhd8ed1ab_0
+  - comm=0.1.4=pyhd8ed1ab_0
+  - contourpy=1.0.5=py311hdb19cb5_0
+  - cryptography=41.0.3=py311hdda0065_0
+  - cuda-cudart=11.8.89=0
+  - cuda-cupti=11.8.87=0
+  - cuda-libraries=11.8.0=0
+  - cuda-nvrtc=11.8.89=0
+  - cuda-nvtx=11.8.86=0
+  - cuda-runtime=11.8.0=0
+  - cycler=0.12.1=pyhd8ed1ab_0
+  - cyrus-sasl=2.1.28=h52b45da_1
+  - dbus=1.13.18=hb2f20db_0
+  - debugpy=1.6.7=py311h6a678d5_0
+  - decorator=5.1.1=pyhd8ed1ab_0
+  - eigen=3.4.0=h4bd325d_0
+  - exceptiongroup=1.1.3=pyhd8ed1ab_0
+  - executing=1.2.0=pyhd8ed1ab_0
+  - expat=2.5.0=h6a678d5_0
+  - ffmpeg=4.2.2=h20bf706_0
+  - filelock=3.9.0=py311h06a4308_0
+  - fontconfig=2.14.1=h4c34cd2_2
+  - fonttools=4.25.0=pyhd3eb1b0_0
+  - freetype=2.12.1=h4a9f257_0
+  - frozenlist=1.3.3=py311h5eee18b_0
+  - fsspec=2023.10.0=pyhca7485f_0
+  - giflib=5.2.1=h5eee18b_3
+  - glib=2.69.1=he621ea3_2
+  - gmp=6.2.1=h295c915_3
+  - gmpy2=2.1.2=py311hc9b5ff0_0
+  - gnutls=3.6.15=he1e5248_0
+  - google-auth=2.23.2=pyhca7485f_0
+  - google-auth-oauthlib=1.0.0=pyhd8ed1ab_1
+  - googledrivedownloader=0.4=pyhd3deb0d_1
+  - gpytorch=1.11=pyhd8ed1ab_0
+  - graphite2=1.3.14=h295c915_1
+  - grpc-cpp=1.48.2=he1ff14a_1
+  - grpcio=1.48.2=py311he1ff14a_1
+  - gst-plugins-base=1.14.1=h6a678d5_1
+  - gstreamer=1.14.1=h5eee18b_1
+  - h5py=3.9.0=py311hdd6beaf_0
+  - harfbuzz=4.3.0=hf52aaf7_1
+  - hdf5=1.12.1=h2b7332f_3
+  - icu=58.2=hf484d3e_1000
+  - idna=3.4=py311h06a4308_0
+  - imageio=2.31.5=pyh8c1a49c_0
+  - importlib-metadata=6.8.0=pyha770c72_0
+  - importlib_metadata=6.8.0=hd8ed1ab_0
+  - iniconfig=1.1.1=pyhd3eb1b0_0
+  - intel-openmp=2023.1.0=hdb19cb5_46305
+  - ipykernel=6.25.2=pyh2140261_0
+  - ipython=8.16.1=pyh0d859eb_0
+  - jaxtyping=0.2.25=pyhd8ed1ab_0
+  - jedi=0.19.1=pyhd8ed1ab_0
+  - jinja2=3.1.2=py311h06a4308_0
+  - joblib=1.2.0=py311h06a4308_0
+  - jpeg=9e=h5eee18b_1
+  - jupyter_client=8.3.1=pyhd8ed1ab_0
+  - jupyter_core=4.12.0=py311h38be061_0
+  - kiwisolver=1.4.4=py311h6a678d5_0
+  - krb5=1.20.1=h143b758_1
+  - lame=3.100=h7b6447c_0
+  - lcms2=2.12=h3be6417_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - lerc=3.0=h295c915_0
+  - libclang=14.0.6=default_hc6dbbc7_1
+  - libclang13=14.0.6=default_he11475f_1
+  - libcublas=11.11.3.6=0
+  - libcufft=10.9.0.58=0
+  - libcufile=1.7.2.10=0
+  - libcups=2.4.2=h2d74bed_1
+  - libcurand=10.3.3.141=0
+  - libcurl=7.88.1=h251f7ec_2
+  - libcusolver=11.4.1.48=0
+  - libcusparse=11.7.5.86=0
+  - libdeflate=1.17=h5eee18b_1
+  - libedit=3.1.20221030=h5eee18b_0
+  - libev=4.33=h7f8727e_1
+  - libevent=2.1.12=hdbd6064_1
+  - libffi=3.4.4=h6a678d5_0
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgfortran=3.0.0=1
+  - libgfortran-ng=11.2.0=h00389a5_1
+  - libgfortran5=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libiconv=1.16=h7f8727e_2
+  - libidn2=2.3.4=h5eee18b_0
+  - libjpeg-turbo=2.0.0=h9bf148f_0
+  - libllvm14=14.0.6=hdb19cb5_3
+  - libnghttp2=1.52.0=h2d74bed_1
+  - libnpp=11.8.0.86=0
+  - libnvjpeg=11.9.0.86=0
+  - libopenblas=0.3.21=h043d6bf_0
+  - libopus=1.3.1=h7f98852_1
+  - libpng=1.6.39=h5eee18b_0
+  - libpq=12.15=hdbd6064_1
+  - libprotobuf=3.20.3=he621ea3_0
+  - libsodium=1.0.18=h36c2ea0_1
+  - libssh2=1.10.0=hdbd6064_2
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - libtasn1=4.19.0=h5eee18b_0
+  - libtiff=4.5.1=h6a678d5_0
+  - libunistring=0.9.10=h27cfd23_0
+  - libuuid=1.41.5=h5eee18b_0
+  - libvpx=1.7.0=h439df22_0
+  - libwebp=1.3.2=h11a3e52_0
+  - libwebp-base=1.3.2=h5eee18b_0
+  - libxcb=1.15=h7f8727e_0
+  - libxkbcommon=1.0.1=h5eee18b_1
+  - libxml2=2.10.4=hcbfbd50_0
+  - libxslt=1.1.37=h2085143_0
+  - linear_operator=0.5.2=pyhd8ed1ab_0
+  - llvm-openmp=14.0.6=h9e868ea_0
+  - lockfile=0.12.2=py311h06a4308_0
+  - lz4-c=1.9.4=h6a678d5_0
+  - markdown=3.5=pyhd8ed1ab_0
+  - markupsafe=2.1.1=py311h5eee18b_0
+  - matplotlib=3.7.2=py311h06a4308_0
+  - matplotlib-base=3.7.2=py311ha02d727_0
+  - matplotlib-inline=0.1.6=pyhd8ed1ab_0
+  - mkl=2023.1.0=h213fc3f_46343
+  - mkl-service=2.4.0=py311h5eee18b_1
+  - mpc=1.1.0=h10f8cd9_1
+  - mpfr=4.0.2=hb69a4c5_1
+  - mpmath=1.3.0=py311h06a4308_0
+  - multidict=6.0.2=py311h5eee18b_0
+  - munkres=1.1.4=pyh9f0ad1d_0
+  - mysql=5.7.24=h721c034_2
+  - ncurses=6.4=h6a678d5_0
+  - nest-asyncio=1.5.6=pyhd8ed1ab_0
+  - nettle=3.7.3=hbbd107a_1
+  - networkx=3.1=py311h06a4308_0
+  - ninja=1.10.2=h06a4308_5
+  - ninja-base=1.10.2=hd09550d_5
+  - numexpr=2.8.7=py311h812550d_0
+  - numpy=1.26.0=py311h24aa872_0
+  - numpy-base=1.26.0=py311hbfb1bba_0
+  - oauthlib=3.2.2=pyhd8ed1ab_0
+  - openblas=0.3.3=ha44fe06_1
+  - opencv=4.6.0=py311h10ae9b0_5
+  - openh264=2.1.1=h4ff587b_0
+  - openjpeg=2.4.0=h3ad879b_0
+  - openssl=3.0.12=h7f8727e_0
+  - opt_einsum=3.3.0=pyhc1e730c_2
+  - packaging=23.2=pyhd8ed1ab_0
+  - pandas=2.0.3=py311ha02d727_0
+  - parso=0.8.3=pyhd8ed1ab_0
+  - pcre=8.45=h9c3ff4c_0
+  - pexpect=4.8.0=pyh1a96a4e_2
+  - pickleshare=0.7.5=pyhd3eb1b0_1003
+  - pillow=10.0.1=py311ha6cbd5a_0
+  - pip=23.2.1=py311h06a4308_0
+  - pixman=0.40.0=h7f8727e_1
+  - pluggy=1.0.0=py311h06a4308_1
+  - ply=3.11=py_1
+  - pretty_errors=1.2.25=pyhd8ed1ab_0
+  - prompt-toolkit=3.0.39=pyha770c72_0
+  - prompt_toolkit=3.0.39=hd8ed1ab_0
+  - protobuf=3.20.3=py311h6a678d5_0
+  - psutil=5.9.0=py311h5eee18b_0
+  - ptyprocess=0.7.0=pyhd3deb0d_0
+  - pure_eval=0.2.2=pyhd8ed1ab_0
+  - pyasn1=0.5.0=pyhd8ed1ab_0
+  - pyasn1-modules=0.3.0=pyhd8ed1ab_0
+  - pycparser=2.21=pyhd3eb1b0_0
+  - pygments=2.16.1=pyhd8ed1ab_0
+  - pyjwt=2.8.0=pyhd8ed1ab_0
+  - pyopenssl=23.2.0=py311h06a4308_0
+  - pyparsing=3.0.9=pyhd8ed1ab_0
+  - pyqt=5.15.7=py311h6a678d5_0
+  - pyqt5-sip=12.11.0=py311h6a678d5_0
+  - pysocks=1.7.1=py311h06a4308_0
+  - pytest=7.4.0=py311h06a4308_0
+  - python=3.11.5=h955ad1f_0
+  - python-dateutil=2.8.2=pyhd8ed1ab_0
+  - python-tzdata=2023.3=pyhd3eb1b0_0
+  - python_abi=3.11=2_cp311
+  - pytorch=2.1.0=cpu_py311h53e38e9_0
+  - pytorch-cuda=11.8=h7e8668a_5
+  - pytorch-model-summary=0.1.1=py_0
+  - pytorch-mutex=1.0=cuda
+  - pytz=2023.3.post1=py311h06a4308_0
+  - pyu2f=0.1.5=pyhd8ed1ab_0
+  - pyyaml=6.0=py311h5eee18b_1
+  - pyzmq=25.1.0=py311h6a678d5_0
+  - qt-main=5.15.2=h7358343_9
+  - qt-webengine=5.15.9=hbbf29b9_6
+  - qtwebkit=5.212=h3fafdc1_5
+  - re2=2022.04.01=h27087fc_0
+  - readline=8.2=h5eee18b_0
+  - requests=2.31.0=py311h06a4308_0
+  - requests-oauthlib=1.3.1=pyhd8ed1ab_0
+  - rsa=4.9=pyhd8ed1ab_0
+  - scikit-learn=1.2.2=py311h6a678d5_1
+  - scipy=1.11.3=py311h24aa872_0
+  - seaborn=0.12.2=py311h06a4308_0
+  - setuptools=68.0.0=py311h06a4308_0
+  - sip=6.6.2=py311h6a678d5_0
+  - six=1.16.0=pyh6c4a22f_0
+  - sqlite=3.41.2=h5eee18b_0
+  - stack_data=0.6.2=pyhd8ed1ab_0
+  - sympy=1.11.1=py311h06a4308_0
+  - tbb=2021.8.0=hdb19cb5_0
+  - tensorboard=2.14.1=pyhd8ed1ab_0
+  - tensorboard-data-server=0.7.0=py311h52d8a92_0
+  - threadpoolctl=2.2.0=pyh0d69192_0
+  - tk=8.6.12=h1ccaba5_0
+  - toml=0.10.2=pyhd8ed1ab_0
+  - torchaudio=2.1.0=py311_cu118
+  - torchinfo=1.8.0=pyhd8ed1ab_0
+  - torchtriton=2.1.0=py311
+  - torchvision=0.15.2=cpu_py311h6e929fa_0
+  - tornado=6.3.3=py311h5eee18b_0
+  - tqdm=4.66.1=pyhd8ed1ab_0
+  - traitlets=5.11.2=pyhd8ed1ab_0
+  - typeguard=2.13.3=py311h06a4308_0
+  - typing-extensions=4.7.1=py311h06a4308_0
+  - typing_extensions=4.7.1=py311h06a4308_0
+  - tzdata=2023c=h04d1e81_0
+  - urllib3=1.26.16=py311h06a4308_0
+  - wcwidth=0.2.8=pyhd8ed1ab_0
+  - werkzeug=3.0.0=pyhd8ed1ab_0
+  - wheel=0.41.2=py311h06a4308_0
+  - x264=1!157.20191217=h7b6447c_0
+  - xz=5.4.2=h5eee18b_0
+  - yaml=0.2.5=h7b6447c_0
+  - yarl=1.8.1=py311h5eee18b_0
+  - zeromq=4.3.4=h9c3ff4c_1
+  - zipp=3.17.0=pyhd8ed1ab_0
+  - zlib=1.2.13=h5eee18b_0
+  - zstd=1.5.5=hc292b87_0
+  - pip:
+    - beautifulsoup4==4.12.2
+    - gdown==4.7.1
+    - soupsieve==2.5
+    - torchsummary==1.5.1
+prefix: /home/doh16101/anaconda3/envs/CS330_torch
diff --git a/BML_project/models/Colab_example_dataloader_2024_04_04.ipynb b/BML_project/models/Colab_example_dataloader_2024_04_04.ipynb
new file mode 100644
index 0000000..4495514
--- /dev/null
+++ b/BML_project/models/Colab_example_dataloader_2024_04_04.ipynb
@@ -0,0 +1,22 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# R:\\ENGR_Chon\\Darren\\NIH_Pulsewatch\\Poincare_pt\\128x128\n",
+    "# Darren created the PT files again (because UID 120 has missing files in the original csv file)\n",
+    "# I need to prepare for my interview, and I will tar those PT files again and  test your code on Colab later."
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/BML_project/models/__pycache__/ss_gp_model.cpython-311.pyc b/BML_project/models/__pycache__/ss_gp_model.cpython-311.pyc
deleted file mode 100644
index 75ee3d4..0000000
Binary files a/BML_project/models/__pycache__/ss_gp_model.cpython-311.pyc and /dev/null differ
diff --git a/BML_project/models/ss_gp_model.py b/BML_project/models/ss_gp_model.py
index c18f06f..355a6fd 100644
--- a/BML_project/models/ss_gp_model.py
+++ b/BML_project/models/ss_gp_model.py
@@ -4,12 +4,15 @@
 
 @author: lrm22005
 """
+import os
 import numpy as np
 from tqdm import tqdm
 import torch
 import gpytorch
 from sklearn.metrics import precision_recall_fscore_support, roc_auc_score
 from sklearn.preprocessing import label_binarize
+from utils_gp.data_loader import preprocess_data_train_val,preprocess_data_test
+import time
 
 num_latents = 6  # This should match the complexity of your data or the number of tasks
 num_tasks = 4    # This should match the number of output classes or tasks
@@ -20,7 +23,8 @@
 class MultitaskGPModel(gpytorch.models.ApproximateGP):
     def __init__(self):
         # Let's use a different set of inducing points for each latent function
-        inducing_points = torch.rand(num_latents, num_inducing_points, 127 * 128)  # Assuming flattened 128x128 images
+        inducing_points = torch.rand(num_latents, num_inducing_points, 128 * 128)  # Assuming flattened 128x128 images
+        # Dong, 01/22/2024: I will use 128 * 128.
 
         # We have to mark the CholeskyVariationalDistribution as batch
         # so that we learn a variational distribution for each task
@@ -69,11 +73,49 @@ def forward(self, x):
         return latent_pred
 
 
-def train_gp_model(train_loader, val_loader, num_iterations=50, n_classes=4, patience=10, checkpoint_path='model_checkpoint_full.pt'):
+def train_gp_model(train_loader, val_loader, batch_size,\
+                   data_format, clinical_trial_train, clinical_trial_test,\
+                   clinical_trial_unlabeled,\
+                   num_iterations=50, n_classes=4, patience=10, checkpoint_path='model_checkpoint_full.pt',\
+                   resume_training=False,\
+                   datackpt_name = 'dataset_checkpoint.pt',modelckpt_name = 'model_checkpoint_full.pt'):
+    print(f'Debug: resume_training:{resume_training}, checkpoint_path: {checkpoint_path}')
     model = MultitaskGPModel().to(device)
     likelihood = gpytorch.likelihoods.SoftmaxLikelihood(num_features=4, num_classes=4).to(device)
     optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
     mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_loader.dataset))
+
+    # Load checkpoint if resuming training for gp model.
+    start_epoch = 0
+    flag_reload_dataloader = False # We do not need to reset train loader in the new epoch.
+    ckpt_model_file = os.path.join(checkpoint_path,modelckpt_name)
+    if resume_training and os.path.exists(ckpt_model_file):
+        print(f'Debug: loading ckpt: {ckpt_model_file}')
+        checkpoint = torch.load(ckpt_model_file)
+        model.load_state_dict(checkpoint['model_state_dict'])
+        likelihood.load_state_dict(checkpoint['likelihood_state_dict'])
+        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+        start_epoch = checkpoint.get('epoch', 0) # Resume from the same epoch because you did not finished it.
+
+        # Update the dataloader if there are segments finished.
+        finished_seg_names = checkpoint['finished_seg_names']
+
+        if len(finished_seg_names) > 0:
+            # There were segments used in training. Only update the train loader.
+            flag_reload_dataloader = True
+            print('Debug: renewing train_loader now...')
+            startTime_for_tictoc = time.time()
+            # ---- Dong, 02/15/2024: I want to test training on large dataset and resume training. ----
+            # train_loader,_,_ = preprocess_data_train_val(data_format, clinical_trial_train, clinical_trial_test, batch_size, finished_seg_names,\
+            #                   read_all_labels=False)
+            train_loader = preprocess_data_test(data_format = data_format, \
+                                       clinical_trial_unlabeled=clinical_trial_unlabeled, \
+                                       batch_size=batch_size,\
+                                       finished_seg_names=finished_seg_names,\
+                                       read_all_labels=False)
+            endTime_for_tictoc = time.time() - startTime_for_tictoc
+            print(f'Debug: took {endTime_for_tictoc} to renew the train_loader')
+
     best_val_loss = float('inf')
     epochs_no_improve = 0
 
@@ -85,19 +127,69 @@ def train_gp_model(train_loader, val_loader, num_iterations=50, n_classes=4, pat
         'train_loss': []  # Add a list to store training losses
     }
 
-    for epoch in tqdm(range(num_iterations), desc='Training', unit='epoch', leave=False):
-        for train_batch in train_loader:
+    for epoch in tqdm(range(start_epoch,num_iterations), desc='Training', unit='epoch', leave=False):
+        finished_idx = []
+        finished_seg_names = []
+        for batch_index, train_batch in enumerate(train_loader):
+            print(f'Debug: now in a new batch of data! {batch_index}/{len(train_loader)}') # train_batch is the image data.
             model.train()
             likelihood.train()
             optimizer.zero_grad()
+
             train_x = train_batch['data'].reshape(train_batch['data'].size(0), -1).to(device)  # Use reshape here
             train_y = train_batch['label'].to(device)
+            # Get finished segment index in dataloader and segment name.
+            temp_finished_idx = train_batch['idx']
+            temp_finished_seg_names = train_batch['segment_name']
+            print('Debug: temp_finished_idx:',temp_finished_idx)
+            print('Debug: temp_finished_segment_name:',temp_finished_seg_names)
+            finished_idx.append(temp_finished_idx)
+            finished_seg_names.append(temp_finished_seg_names)
             output = model(train_x)
             loss = -mll(output, train_y)
             metrics['train_loss'].append(loss.item())  # Store the training loss
             loss.backward()
             optimizer.step()
 
+            save_ckpt_model_path = os.path.join(checkpoint_path,modelckpt_name)
+            torch.save({
+                'epoch': epoch,
+                'model_state_dict': model.state_dict(),
+                'likelihood_state_dict': likelihood.state_dict(),
+                'optimizer_state_dict': optimizer.state_dict(),
+                'best_val_loss': best_val_loss,
+                'finished_seg_names':finished_seg_names,
+                'finished_idx':finished_idx
+                # Include other metrics as needed
+            }, save_ckpt_model_path)
+
+            # Optionally, save the dataset state at intervals or after certain conditions
+            save_ckpt_dataset_path = os.path.join(checkpoint_path,datackpt_name)
+            train_loader.dataset.save_checkpoint(save_ckpt_dataset_path)  # Here, manage the index as needed
+
+            # import sys
+            # if epoch == 3 and batch_index == 5:
+            #     sys.exit(f"Debug: Manually stop the program at epoch {epoch} batch {batch_index}.")
+        
+        # Reset the finished segments again because we finished one epoch.
+        finished_idx = [] 
+        finished_seg_names = []
+        if flag_reload_dataloader:
+            print('Debug: reset the train_loader now...')
+            # Reset the train dataloader now.
+            startTime_for_tictoc = time.time()
+            # --- Dong, 02/15/2024:
+            # train_loader,_,_ = preprocess_data_train_val(data_format, clinical_trial_train, clinical_trial_test, batch_size, finished_seg_names,\
+                            #   read_all_labels=False)
+            train_loader = preprocess_data_test(data_format = data_format, \
+                                       clinical_trial_unlabeled=clinical_trial_unlabeled, \
+                                       batch_size=batch_size,\
+                                       finished_seg_names=finished_seg_names,\
+                                       read_all_labels=False)
+            endTime_for_tictoc = time.time() - startTime_for_tictoc
+            print(f'Debug: took {endTime_for_tictoc} to reset the train_loader')
+            flag_reload_dataloader = False # Turn off the flag for reseting train dataloader.
+
         # Stochastic validation
         model.eval()
         likelihood.eval()
@@ -130,19 +222,46 @@ def train_gp_model(train_loader, val_loader, num_iterations=50, n_classes=4, pat
         if val_loss < best_val_loss:
             best_val_loss = val_loss
             epochs_no_improve = 0
-            torch.save({'model_state_dict': model.state_dict(),
-                        'likelihood_state_dict': likelihood.state_dict(),
-                        'optimizer_state_dict': optimizer.state_dict()}, checkpoint_path)
+            # torch.save({'model_state_dict': model.state_dict(),
+            #             'likelihood_state_dict': likelihood.state_dict(),
+            #             'optimizer_state_dict': optimizer.state_dict(),
+            #             'train_loader':train_loader,
+            #             'val_loader':val_loader
+            #             }, checkpoint_path)
         else:
             epochs_no_improve += 1
             if epochs_no_improve >= patience:
                 print(f"Early stopping triggered at epoch {epoch+1}")
                 break
+            
+        # Save checkpoint at the end of each epoch
+        save_ckpt_model_path = os.path.join(checkpoint_path,modelckpt_name)
+        torch.save({
+            'epoch': epoch,
+            'model_state_dict': model.state_dict(),
+            'likelihood_state_dict': likelihood.state_dict(),
+            'optimizer_state_dict': optimizer.state_dict(),
+            'best_val_loss': best_val_loss,
+            'finished_seg_names':finished_seg_names,
+            'finished_idx':finished_idx
+            # Include other metrics as needed
+        }, save_ckpt_model_path)
+        print('Debug: saved model checkpoint with epoch.',save_ckpt_model_path)
+
+        # Optionally, save the dataset state at intervals or after certain conditions
+        save_ckpt_dataset_path = os.path.join(checkpoint_path,datackpt_name)
+        train_loader.dataset.save_checkpoint(save_ckpt_dataset_path)  # Finished all batches, so start from zero.
+
+        if epochs_no_improve >= patience:
+            print(f"Early stopping triggered at epoch {epoch+1}")
+            break
 
-    checkpoint = torch.load(checkpoint_path)
-    model.load_state_dict(checkpoint['model_state_dict'])
-    likelihood.load_state_dict(checkpoint['likelihood_state_dict'])
-    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+    # Optionally, load the best model at the end of training
+    if os.path.exists(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path)
+        model.load_state_dict(checkpoint['model_state_dict'])
+        likelihood.load_state_dict(checkpoint['likelihood_state_dict'])
+        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
 
     return model, likelihood, metrics
 
diff --git a/BML_project/ss_main.py b/BML_project/ss_main.py
index b784ce4..326f80f 100644
--- a/BML_project/ss_main.py
+++ b/BML_project/ss_main.py
@@ -4,16 +4,48 @@
 
 @author: lrm22005
 """
-import tqdm
+from tqdm import tqdm
 import torch
-from utils_gp.data_loader import preprocess_data, split_uids, update_train_loader_with_uncertain_samples
+from utils_gp.data_loader import preprocess_data_train_val, split_uids, update_train_loader_with_uncertain_samples, preprocess_data_test
 from models.ss_gp_model import MultitaskGPModel, train_gp_model
 from utils_gp.ss_evaluation import stochastic_evaluation, evaluate_model_on_all_data
 from active_learning.ss_active_learning import stochastic_uncertainty_sampling, run_minibatch_kmeans, stochastic_compare_kmeans_gp_predictions
 from utils_gp.visualization import plot_comparative_results, plot_training_performance, plot_results
+import os
+import pickle
+from datetime import datetime
+now = datetime.now() # Get the time now for model checkpoint saving.
+
+dt_string = now.strftime("%Y_%m_%d_%H_%M_%S") # YYYY_mm_dd_HH_MM_SS, for model saving.
+print("The date and time suffix of the model file is", dt_string)
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
+class CheckpointManager:
+    def __init__(self, checkpoint_dir):
+        self.checkpoint_dir = checkpoint_dir  # Store the directory path for checkpoints
+        if not os.path.exists(checkpoint_dir):  # Check if the directory exists
+            os.makedirs(checkpoint_dir)  # Create the directory if it does not exist
+
+    def save_checkpoint(self, loader_name, iteration, additional_state):
+        # Construct the checkpoint file path using the loader name
+        checkpoint_path = os.path.join(self.checkpoint_dir, f"{loader_name}_checkpoint.pkl")
+        checkpoint = {
+            'iteration': iteration,  # Store the current iteration
+            'additional_state': additional_state  # Store any additional state information
+        }
+        with open(checkpoint_path, 'wb') as f:  # Open the file in write-binary mode
+            pickle.dump(checkpoint, f)  # Serialize the checkpoint dictionary to the file
+
+    def load_checkpoint(self, loader_name):
+        # Construct the checkpoint file path using the loader name
+        checkpoint_path = os.path.join(self.checkpoint_dir, f"{loader_name}_checkpoint.pkl")
+        try:
+            with open(checkpoint_path, 'rb') as f:  # Open the file in read-binary mode
+                return pickle.load(f)  # Deserialize the checkpoint file and return it
+        except FileNotFoundError:  # Handle the case where the checkpoint file does not exist
+            return None  # Return None if the file is not found
+        
 def main():
     # Set parameters like n_classes, batch_size, etc.
     n_classes = 4
@@ -21,7 +53,29 @@ def main():
     clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled = split_uids()
     data_format = 'pt'
     # Preprocess data
-    train_loader, val_loader, test_loader = preprocess_data(data_format, clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled, batch_size)
+    # ---- Dong, 02/15/2024: I want to test loading large amount dataset. ----
+    # train_loader, val_loader, saving_path = preprocess_data_train_val(data_format = data_format, \
+    _, val_loader, saving_path = preprocess_data_train_val(data_format = data_format, \
+                                                clinical_trial_train=clinical_trial_train, \
+                                                clinical_trial_test=clinical_trial_test, \
+                                                batch_size=batch_size,\
+                                                finished_seg_names = [],\
+                                                read_all_labels=False)
+    # ---- Dong, 02/15/2024: I want to test loading large amount dataset. ----
+    # test_loader = preprocess_data_test(data_format = data_format, \
+    train_loader = preprocess_data_test(data_format = data_format, \
+                                       clinical_trial_unlabeled=clinical_trial_unlabeled, \
+                                       batch_size=batch_size,\
+                                       finished_seg_names=[],\
+                                       read_all_labels=False)
+
+    menu_segment_names = train_loader.dataset.segment_names # All the segments to be run in the training dataset.
+    menu_labels = train_loader.dataset.labels # All the ground truth labels 
+    print('Debug: len(menu_segment_names)',len(menu_segment_names))
+    print('Debug: len(menu_labels)',len(menu_labels))
+
+    print('Debug: len(train_loader)',len(train_loader))
+    print('Debug: dir(train_loader.dataset)',dir(train_loader.dataset))
 
     kmeans_model = run_minibatch_kmeans(train_loader, n_clusters=n_classes, device=device)
 
@@ -33,7 +87,21 @@ def main():
     }
     
     # Initial model training
-    model, likelihood, training_metrics = train_gp_model(train_loader, val_loader, num_iterations=50, n_classes=n_classes)
+    model, likelihood, training_metrics = train_gp_model(
+        train_loader = train_loader,
+        val_loader = val_loader,
+        num_iterations=50, 
+        n_classes=n_classes,
+        patience=10,
+        checkpoint_path=saving_path,
+        resume_training=True,
+        datackpt_name = 'dataset_checkpoint.pt',
+        modelckpt_name = 'model_checkpoint_full.pt',
+        batch_size=batch_size,
+        data_format = data_format,
+        clinical_trial_train = clinical_trial_train,
+        clinical_trial_test = clinical_trial_test,
+        clinical_trial_unlabeled=clinical_trial_unlabeled) # Dong: remember to change this function in its code.
 
     # Save the training metrics for future visualization
     results['train_loss'].extend(training_metrics['train_loss'])
@@ -42,38 +110,80 @@ def main():
     results['validation_metrics']['f1'].extend(training_metrics['f1_score'])
     # results['validation_metrics']['auc_roc'].extend(training_metrics['auc_roc'])
 
+    # --- Dong: copied from GP_Original_Checkpoint.py ---
+    # Initialize the CheckpointManager
+    checkpoint_manager = CheckpointManager(saving_path)
+
+    # Attempt to load a training checkpoint
+    train_checkpoint = checkpoint_manager.load_checkpoint('train')
+    start_iteration = train_checkpoint['iteration'] if train_checkpoint else 0
+    print('Debug: start_iteration is:',start_iteration)
+    # Dong, 01/25/2024: save it first before entering the active learning.
+    additional_state = {
+        'model_state': model.state_dict(),
+        'likelihood':likelihood,
+        'val_loader':val_loader,
+        'train_loader':train_loader
+        # Include other states like optimizer, scheduler, etc.
+    }
+    checkpoint_manager.save_checkpoint('train', start_iteration, additional_state)
+    # ---------------------------------------------------
+    
     active_learning_iterations = 10
     # Active Learning Iterations
-    for iteration in tqdm(range(active_learning_iterations), desc='Active Learning', unit='iteration', leave=True):
+    for iteration in tqdm(range(start_iteration,active_learning_iterations), desc='Active Learning', unit='iteration', leave=True):
+        print(f"Active Learning Iteration: {iteration+1}/{active_learning_iterations}")
         # Perform uncertainty sampling to select new samples from the validation set
-        uncertain_sample_indices = stochastic_uncertainty_sampling(model, likelihood, val_loader, n_samples=batch_size, n_batches=5)
-
+        uncertain_sample_indices = stochastic_uncertainty_sampling(model, likelihood, val_loader, n_samples=50, n_batches=5, device=device)
+        labeled_samples = label_samples(uncertain_sample_indices, val_loader.dataset)
         # Update the training loader with uncertain samples
-        train_loader = update_train_loader_with_uncertain_samples(train_loader, uncertain_sample_indices, batch_size)
-        print(f"Updated training data size: {len(train_loader.dataset)}")
+        train_loader = update_train_loader_with_uncertain_samples(train_loader, labeled_samples, batch_size)
+
+        # Optionally, save the dataset state at intervals or after certain conditions
+        train_loader.dataset.save_checkpoint(dataset_checkpoint_path)  # Here, manage the index as needed
 
         # Re-train the model with the updated training data
-        model, likelihood, val_metrics = train_gp_model(train_loader, val_loader, num_iterations=10, n_classes=n_classes, patience=10, checkpoint_path='model_checkpoint_last.pt')
+        model, likelihood, val_metrics = train_gp_model(
+            train_loader, val_loader, num_iterations=10, n_classes=n_classes, patience=10,
+            checkpoint_path=saving_path, resume_training=True, batch_size=batch_size)
 
         # Store the validation metrics after each active learning iteration
         results['validation_metrics']['precision'].append(val_metrics['precision'])
         results['validation_metrics']['recall'].append(val_metrics['recall'])
-        results['validation_metrics']['f1'].append(val_metrics['f1'])
+        results['validation_metrics']['f1'].append(val_metrics['f1_score'])
         # results['validation_metrics']['auc_roc'].append(val_metrics['auc_roc'])
 
+        # Save checkpoint at the end of each iteration
+        additional_state = {
+            'model_state': model.state_dict(),
+            'likelihood':likelihood,
+            'val_loader':val_loader,
+            'train_loader':train_loader
+            # Include other states like optimizer, scheduler, etc.
+        }
+        checkpoint_manager.save_checkpoint('train', iteration, additional_state)
+
     # Compare K-Means with GP model predictions after retraining
     gp_vs_kmeans_data, original_labels = stochastic_compare_kmeans_gp_predictions(kmeans_model, model, train_loader, n_batches=5, device=device)
     
     plot_comparative_results(gp_vs_kmeans_data, original_labels)
 
     # Final evaluation on test set
+    import subprocess
+    print('Start to run bash script!')
+    subprocess.call("./BML_project/untar_unlabeled_PT.sh")
+    print('End to run bash script!')
+
+    test_loader = preprocess_data_test(data_format = data_format, \
+                                       clinical_trial_unlabeled=clinical_trial_unlabeled, \
+                                       batch_size=batch_size,\
+                                       finished_seg_names=[],\
+                                       read_all_labels=False)
     test_metrics = evaluate_model_on_all_data(model, likelihood, test_loader, device, n_classes)
     test_kmeans_model = run_minibatch_kmeans(test_loader, n_clusters=n_classes, device=device)
 
     results['test_metrics'] = test_metrics
     test_gp_vs_kmeans_data, test_original_labels = stochastic_compare_kmeans_gp_predictions(test_kmeans_model, model, test_loader, n_batches=5, device=device)
-
-    print(f"Length of original_labels: {len(original_labels)}, Length of gp_predictions: {len(gp_predictions)}")
     plot_comparative_results(test_gp_vs_kmeans_data, test_original_labels)
 
     # Visualization of results
diff --git a/BML_project/utils_gp/__pycache__/data_loader.cpython-311.pyc b/BML_project/utils_gp/__pycache__/data_loader.cpython-311.pyc
deleted file mode 100644
index ed2626e..0000000
Binary files a/BML_project/utils_gp/__pycache__/data_loader.cpython-311.pyc and /dev/null differ
diff --git a/BML_project/utils_gp/__pycache__/ss_evaluation.cpython-311.pyc b/BML_project/utils_gp/__pycache__/ss_evaluation.cpython-311.pyc
deleted file mode 100644
index 46b1836..0000000
Binary files a/BML_project/utils_gp/__pycache__/ss_evaluation.cpython-311.pyc and /dev/null differ
diff --git a/BML_project/utils_gp/__pycache__/visualization.cpython-311.pyc b/BML_project/utils_gp/__pycache__/visualization.cpython-311.pyc
deleted file mode 100644
index f53ef75..0000000
Binary files a/BML_project/utils_gp/__pycache__/visualization.cpython-311.pyc and /dev/null differ
diff --git a/BML_project/utils_gp/data_loader.py b/BML_project/utils_gp/data_loader.py
index fe3bc7e..bd22a79 100644
--- a/BML_project/utils_gp/data_loader.py
+++ b/BML_project/utils_gp/data_loader.py
@@ -5,6 +5,8 @@
 @author: lrm22005
 """
 import os
+# For saving checkpoints
+from pathlib import Path
 import numpy as np
 import pandas as pd
 from PIL import Image
@@ -12,10 +14,28 @@
 from torch.utils.data import Dataset, DataLoader
 from sklearn.preprocessing import StandardScaler
 from torchvision.transforms import ToTensor
+import socket
+# Downsampling image
+import cv2
+# import torchvision.transforms as T
+# transform for rectangular resize
+img_size = 32 # Dong, 01/30/2024: this is for testing the CIFAR10 models.
+# transform = T.Resize((img_size,img_size))
 
 def split_uids():
     # ====== Load the per subject arrythmia summary ======
-    df_summary = pd.read_csv(r'\\grove.ad.uconn.edu\research\ENGR_Chon\NIH_Pulsewatch_Database\Adjudication_UConn\final_attemp_4_1_Dong_Ohm_summary_20231025.csv')
+    your_computer_name = socket.gethostname()
+    if your_computer_name == 'localhost.localdomain':
+        # Dong, 12/09/2023: I am so sick of changing the path every time on different computer.
+        # This is Cassey's Luis server name.
+        df_summary = pd.read_csv(r'/mnt/r/ENGR_Chon/NIH_Pulsewatch_Database/Adjudication_UConn/final_attemp_4_1_Dong_Ohm_summary_20231025.csv')
+    elif your_computer_name == 'Darren_computer_name':
+        # Darren, you can put your computer name in the elif condition to separate it from Luis's computer.
+        df_summary = pd.read_csv(r'R:\ENGR_Chon\NIH_Pulsewatch_Database\Adjudication_UConn\final_attemp_4_1_Dong_Ohm_summary_20231025.csv')
+    elif your_computer_name == 'Luis_computer_name':
+        df_summary = pd.read_csv(r'\\grove.ad.uconn.edu\research\ENGR_Chon\NIH_Pulsewatch_Database\Adjudication_UConn\final_attemp_4_1_Dong_Ohm_summary_20231025.csv')
+    else:
+        df_summary = pd.read_csv(r'/content/drive/MyDrive/Adjudication_UConn/final_attemp_4_1_Dong_Ohm_summary_20231025.csv')
     df_summary['UID'] = df_summary['UID'].astype(str).str.zfill(3)
     
     df_summary['sample_nonAF'] = df_summary['NSR'] + df_summary['PACPVC'] + df_summary['SVT']
@@ -88,116 +108,101 @@ def split_uids():
     print(f'Clinical trial: selected {len(clinical_trial_test)} UIDs for testing {clinical_trial_test}')
     print(f'Clinical trial: selected {len(clinical_trial_unlabeled)} UIDs for unlabeled {clinical_trial_unlabeled}')
     
-    clinical_trial_train = [clinical_trial_train[0]]
-    clinical_trial_test = [clinical_trial_test[0]]
-    clinical_trial_unlabeled = clinical_trial_unlabeled[0:4]
+    # clinical_trial_train = [clinical_trial_train[0]]
+    # clinical_trial_test = [clinical_trial_test[0]]
+    # clinical_trial_unlabeled = clinical_trial_unlabeled[0:4]
     
     return clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled
+def extract_segment_names_and_labels(UIDs,labels_path,read_all_labels=False):
+    # Extract all segment names and labels when starting the main function.
+    # Output: 
+    #   segment_names: list of string.
+    #   labels: dictionary, with segment_names as key and label as value.
+    segment_names = []
+    labels = {}
+
+    for UID in UIDs:
+        label_file = os.path.join(labels_path, UID + "_final_attemp_4_1_Dong.csv")
+        if os.path.exists(label_file):
+            print('Debug: this file exists',label_file)
+            label_data = pd.read_csv(label_file, sep=',', header=0, names=['segment', 'label'])
+            label_segment_names = label_data['segment'].apply(lambda x: x.split('.')[0])
+            for idx, segment_name in enumerate(label_segment_names):
+                label_val = label_data['label'].values[idx]
+                if read_all_labels:
+                    # Assign -1 if label is not in [0, 1, 2, 3]
+                    labels[segment_name] = label_val if label_val in [0, 1, 2, 3] else -1
+                    if segment_name not in segment_names:
+                        segment_names.append(segment_name)
+                else:
+                    # Only add segments with labels in [0, 1, 2, 3]
+                    if label_val in [0, 1, 2, 3] and segment_name not in segment_names:
+                        segment_names.append(segment_name)
+                        labels[segment_name] = label_val
+    print('>>> Number of segments in this dataloader:',len(segment_names)) # Dong, 01/29/2024: know the number of segments before running training epochs.
+    print('>>> Number of labels in this dataloader:',len(labels))
+    return segment_names, labels
+
+def remove_finished_segment_names_and_labels(labels,finished_seg_names):
+    # From extract_segment_names_and_labels:
+    # Input: 
+    #   labels: dictionary, with segment_names as key and label as value.
+    #   finished_seg_names: list of string.
+    remain_labels = labels.copy()
+    print('Debug: type(remain_labels)',type(remain_labels))
+    for batch in finished_seg_names:
+        for key in batch:
+            remain_labels.pop(key)
+    print('Debug: len(labels)',len(labels))
+    print('Debug: len(remain_labels)',len(remain_labels))
+
+    return remain_labels
+
 
 class CustomDataset(Dataset):
-    def __init__(self, data_path, labels_path, UIDs, standardize=True, data_format='csv', read_all_labels=False, start_idx=0):
+    def __init__(self, data_path, labels_path, batch_size,segment_names,labels, standardize=True, data_format='csv', read_all_labels=False):
         self.data_path = data_path
         self.labels_path = labels_path
-        self.UIDs = UIDs
         self.standardize = standardize
         self.data_format = data_format
         self.read_all_labels = read_all_labels
         self.transforms = ToTensor()
-        self.start_idx = start_idx  # Initial batch index to start from, useful for resuming training
-        self.refresh_dataset()
-
-        # Initialize the current batch index to None, this could be used if you want to track batch progress within the dataset itself
-        self.current_batch_index = None
-
-    def refresh_dataset(self):
-        self.segment_names, self.labels = self.extract_segment_names_and_labels()
+        self.segment_names = segment_names
+        self.labels = labels
 
-    def add_uids(self, new_uids):
-        unique_new_uids = [uid for uid in new_uids if uid not in self.UIDs]
-        self.UIDs.extend(unique_new_uids)
-        self.refresh_dataset()
+        # Initialize the current batch index to None
+        self.batch_size = batch_size
 
     def __len__(self):
         return len(self.segment_names)
 
     def save_checkpoint(self, checkpoint_path):
-        # Enhanced to automatically include 'start_idx' in the checkpoint
         checkpoint = {
             'segment_names': self.segment_names,
-            'labels': self.labels,
-            'UIDs': self.UIDs,
-            'start_idx': self.start_idx  # Now also saving start_idx
+            'labels': self.labels
+            # Save the current batch index if provided
         }
         torch.save(checkpoint, checkpoint_path)
 
     def load_checkpoint(self, checkpoint_path):
         checkpoint = torch.load(checkpoint_path)
+        print('Debug: loaded dataset checkpoint!',checkpoint_path)
         self.segment_names = checkpoint['segment_names']
         self.labels = checkpoint['labels']
-        self.UIDs = checkpoint['UIDs']
-        # Now also loading and setting start_idx from checkpoint
-        self.start_idx = checkpoint.get('start_idx', 0)
         self.refresh_dataset()
+        # Load the current batch index if it exists in the checkpoint
 
     def __getitem__(self, idx):
-        actual_idx = (idx + self.start_idx) % len(self.segment_names)  # Adjust index based on start_idx and wrap around if needed
-        segment_name = self.segment_names[actual_idx]
+        segment_name = self.segment_names[idx]
         label = self.labels[segment_name]
 
-        if hasattr(self, 'all_data') and actual_idx < len(self.all_data):
-            time_freq_tensor = self.all_data[actual_idx]
+        if hasattr(self, 'all_data') and idx < len(self.all_data):
+            time_freq_tensor = self.all_data[idx]
         else:
             time_freq_tensor = self.load_data(segment_name)
-            
-        return {'data': time_freq_tensor, 'label': label, 'segment_name': segment_name}
 
-    def set_current_batch_index(self, index):
-        self.current_batch_index = index
-    
-    def get_current_batch_index(self):
-        return self.current_batch_index
-
-    def set_start_idx(self, index):
-        self.start_idx = index
-      
-    def add_data_label_pair(self, data, label):
-        # Assign a unique ID or name for the new data
-        new_id = len(self.segment_names)
-        segment_name = f"new_data_{new_id}"
-
-        # Append the new data and label
-        self.segment_names.append(segment_name)
-        self.labels[segment_name] = label
-
-        # Append the new data tensor to an attribute that holds all the data
-        if hasattr(self, 'all_data'):
-            self.all_data.append(data)
-        else:
-            self.all_data = [data]
-            
-    def extract_segment_names_and_labels(self):
-        segment_names = []
-        labels = {}
-
-        for UID in self.UIDs:
-            label_file = os.path.join(self.labels_path, UID + "_final_attemp_4_1_Dong.csv")
-            if os.path.exists(label_file):
-                label_data = pd.read_csv(label_file, sep=',', header=0, names=['segment', 'label'])
-                label_segment_names = label_data['segment'].apply(lambda x: x.split('.')[0])
-                for idx, segment_name in enumerate(label_segment_names):
-                    label_val = label_data['label'].values[idx]
-                    if self.read_all_labels:
-                        # Assign -1 if label is not in [0, 1, 2, 3]
-                        labels[segment_name] = label_val if label_val in [0, 1, 2, 3] else -1
-                        if segment_name not in segment_names:
-                            segment_names.append(segment_name)
-                    else:
-                        # Only add segments with labels in [0, 1, 2, 3]
-                        if label_val in [0, 1, 2, 3] and segment_name not in segment_names:
-                            segment_names.append(segment_name)
-                            labels[segment_name] = label_val
-
-        return segment_names, labels
+
+        return {'data': time_freq_tensor, 'label': label, 'segment_name': segment_name, 'idx': idx}
 
     def load_data(self, segment_name):
         data_path_UID = os.path.join(self.data_path, segment_name.split('_')[0])
@@ -222,32 +227,57 @@ def load_data(self, segment_name):
 
         except Exception as e:
             print(f"Error processing segment: {segment_name}. Exception: {str(e)}")
-            return torch.zeros((1, 128, 128))  # Return zeros in case of an error
+            return torch.zeros((1, img_size, img_size))  # Return zeros in case of an error
 
     def standard_scaling(self, data):
         scaler = StandardScaler()
         data = scaler.fit_transform(data.reshape(-1, data.shape[-1])).reshape(data.shape)
         return torch.Tensor(data)
 
-def load_data_split_batched(data_path, labels_path, UIDs, batch_size, standardize=False, data_format='csv', read_all_labels=False, drop_last=False, num_workers=4, start_idx=0):
-    dataset = CustomDataset(data_path, labels_path, UIDs, standardize, data_format, read_all_labels, start_idx=start_idx)
+def load_data_split_batched(data_path, labels_path, UIDs, batch_size, standardize=False, data_format='pt', read_all_labels=False, drop_last=False, num_workers=4,\
+                            finished_seg_names = []):
+    # Run the main from the beginning. Load all data into the dataloader.
+    segment_names, labels = extract_segment_names_and_labels(UIDs,labels_path,read_all_labels=read_all_labels)
+    if len(finished_seg_names) > 0:
+        # If any segments have been trained.
+        remain_labels = remove_finished_segment_names_and_labels(labels,finished_seg_names)
+        segment_names = list(remain_labels.keys())
+        labels = remain_labels.copy()
+    dataset = CustomDataset(data_path=data_path, \
+                            labels_path=labels_path, \
+                            standardize=standardize, \
+                            data_format=data_format, \
+                            read_all_labels=read_all_labels, \
+                            batch_size=batch_size,
+                            segment_names = segment_names,
+                            labels = labels)
     dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=drop_last, num_workers=num_workers, prefetch_factor=2)
     return dataloader
 
-def get_data_paths(data_format, is_linux=False, is_hpc=False):
-    if is_linux:
+def get_data_paths(data_format):
+    your_computer_name = socket.gethostname()
+    print('Debug: your_computer_name',your_computer_name)
+    if your_computer_name == 'localhost.localdomain':
         base_path = "/mnt/r/ENGR_Chon/Dong/MATLAB_generate_results/NIH_PulseWatch"
         labels_base_path = "/mnt/r/ENGR_Chon/NIH_Pulsewatch_Database/Adjudication_UConn"
         saving_base_path = "/mnt/r/ENGR_Chon/Luis/Research/Casseys_case/Project_1_analysis"
-    elif is_hpc:
+    elif your_computer_name == 'HPC_computer_name':
         base_path = "/gpfs/scratchfs1/kic14002/doh16101"
         labels_base_path = "/gpfs/scratchfs1/hfp14002/lrm22005"
         saving_base_path = "/gpfs/scratchfs1/hfp14002/lrm22005/Casseys_case/Project_1_analysis"
-    else:
+    elif your_computer_name == 'Darren_computer_name':
         # R:\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch
         base_path = "R:\ENGR_Chon\Dong\MATLAB_generate_results\\NIH_PulseWatch"
         labels_base_path = "R:\ENGR_Chon\\NIH_Pulsewatch_Database\Adjudication_UConn"
         saving_base_path = r"\\grove.ad.uconn.edu\research\ENGR_Chon\Luis\Research\Casseys_case"
+    else:
+        print('Debug: You are in Google Colab.')
+        base_path = '/content'
+        labels_base_path = '/content/drive/MyDrive/Adjudication_UConn'
+        saving_base_path = '/content/drive/MyDrive/Checkpoint_Colab'
+        # print('ERROR! YOUR DID NOT GET THE PATH.')
+        # raise ValueError
+    
     if data_format == 'csv':
         data_path = os.path.join(base_path, "TFS_csv")
         labels_path = os.path.join(labels_base_path, "final_attemp_4_1_Dong_Ohm")
@@ -262,16 +292,51 @@ def get_data_paths(data_format, is_linux=False, is_hpc=False):
         saving_path = os.path.join(saving_base_path, "Project_1_analysis")
     else:
         raise ValueError("Invalid data format. Choose 'csv' or 'png.")
+    
+    # Create the parent path for checkpoints.
+    Path(saving_path).mkdir(parents=True, exist_ok=True)
+
     return data_path, labels_path, saving_path
 
 # Function to extract and preprocess data
-def preprocess_data(data_format, clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled, batch_size, read_all_labels=False, current_batch_index=0):
-    start_idx = current_batch_index * batch_size
+def preprocess_data_train_val(data_format, clinical_trial_train, clinical_trial_test, batch_size, finished_seg_names,\
+                              read_all_labels=False):
+    # Extracts paths and loads data into train, validation, and test loaders
+    data_path, labels_path, saving_path = get_data_paths(data_format)
+
+    train_loader = load_data_split_batched(data_path=data_path, \
+                                            labels_path=labels_path, \
+                                            UIDs=clinical_trial_train, \
+                                            batch_size = batch_size, \
+                                            standardize=True, \
+                                            data_format=data_format, \
+                                            read_all_labels=read_all_labels,\
+                                            finished_seg_names = finished_seg_names)
+    # Usually the validation set will not need to resume training.
+    val_loader = load_data_split_batched(data_path=data_path, \
+                                        labels_path=labels_path, \
+                                        UIDs=clinical_trial_test, \
+                                        batch_size=batch_size, \
+                                        standardize=True, \
+                                        data_format=data_format, \
+                                        read_all_labels=read_all_labels, \
+                                        finished_seg_names = [])
+    return train_loader, val_loader, saving_path
+
+# Function to extract and preprocess data
+def preprocess_data_test(data_format, clinical_trial_unlabeled, batch_size, finished_seg_names,\
+                         read_all_labels=False):
+    # Extracts paths and loads data into train, validation, and test loaders
     data_path, labels_path, saving_path = get_data_paths(data_format)
-    train_loader = load_data_split_batched(data_path, labels_path, clinical_trial_train, batch_size, standardize=True, data_format=data_format, read_all_labels=read_all_labels, start_idx=start_idx)
-    val_loader = load_data_split_batched(data_path, labels_path, clinical_trial_test, batch_size, standardize=True, data_format=data_format, read_all_labels=read_all_labels, start_idx=start_idx)
-    test_loader = load_data_split_batched(data_path, labels_path, clinical_trial_unlabeled, batch_size, standardize=True, data_format=data_format, read_all_labels=read_all_labels, start_idx=start_idx)
-    return train_loader, val_loader, test_loader
+    test_loader = load_data_split_batched(data_path=data_path, \
+                                        labels_path=labels_path, \
+                                        UIDs=clinical_trial_unlabeled, \
+                                        batch_size=batch_size, \
+                                        standardize=True, \
+                                        data_format=data_format, \
+                                        read_all_labels=read_all_labels,\
+                                        finished_seg_names=finished_seg_names)
+    return test_loader
 
 def map_samples_to_uids(uncertain_sample_indices, dataset):
     """
diff --git a/main_darren_v1-8GJQ9R3.py b/main_darren_v1-8GJQ9R3.py
new file mode 100644
index 0000000..a84b9a3
--- /dev/null
+++ b/main_darren_v1-8GJQ9R3.py
@@ -0,0 +1,260 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Apr 18 12:52:53 2024
+
+@author: lrmercadod
+"""
+import torch
+import torch.nn as nn
+import time
+import datetime as dt
+import gpytorch
+from sklearn.metrics import precision_recall_fscore_support, roc_auc_score
+from sklearn.preprocessing import label_binarize
+
+# Import my own functions and classes
+from utils.pathmaster import PathMaster
+from utils.dataloader import preprocess_data
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+num_latents = 6  # This should match the complexity of your data or the number of tasks
+num_tasks = 4    # This should match the number of output classes or tasks
+num_inducing_points = 50  # This is independent and should be sufficient for the input space
+
+class MultitaskGPModel(gpytorch.models.ApproximateGP):
+    def __init__(self):
+        # Let's use a different set of inducing points for each latent function
+        inducing_points = torch.rand(num_latents, num_inducing_points, 128 * 128)  # Assuming flattened 128x128 images
+
+        # We have to mark the CholeskyVariationalDistribution as batch
+        # so that we learn a variational distribution for each task
+        variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(
+            inducing_points.size(-2), batch_shape=torch.Size([num_latents])
+        )
+
+        # We have to wrap the VariationalStrategy in a LMCVariationalStrategy
+        # so that the output will be a MultitaskMultivariateNormal rather than a batch output
+        variational_strategy = gpytorch.variational.LMCVariationalStrategy(
+            gpytorch.variational.VariationalStrategy(
+                self, inducing_points, variational_distribution, learn_inducing_locations=True
+            ),
+            num_tasks=num_tasks,
+            num_latents=num_latents,
+            latent_dim=-1
+        )
+
+        super().__init__(variational_strategy)
+
+        # The mean and covariance modules should be marked as batch
+        # so we learn a different set of hyperparameters
+        self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([num_latents]))
+        self.covar_module = gpytorch.kernels.ScaleKernel(
+            gpytorch.kernels.RBFKernel(batch_shape=torch.Size([num_latents])),
+            batch_shape=torch.Size([num_latents])
+        )
+
+    def forward(self, x):
+        mean_x = self.mean_module(x)
+        covar_x = self.covar_module(x)
+        latent_pred = gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
+        return latent_pred
+
+def train_gp_model(train_loader, val_loader, num_iterations=50, n_classes=4, patience=10, 
+                   checkpoint_path='model_checkpoint.pt', resume_training=False):
+    model = MultitaskGPModel().to(device)
+    likelihood = gpytorch.likelihoods.SoftmaxLikelihood(num_features=4, num_classes=4).to(device)
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
+    mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_loader.dataset))
+
+    start_epoch = 0
+    if resume_training and os.path.exists(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path)
+        model.load_state_dict(checkpoint['model_state_dict'])
+        likelihood.load_state_dict(checkpoint['likelihood_state_dict'])
+        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+        start_epoch = checkpoint.get('epoch', 0)
+
+    best_val_loss = float('inf')
+    epochs_no_improve = 0
+
+    metrics = {
+        'precision': [],
+        'recall': [],
+        'f1_score': [],
+        'auc_roc': [],
+        'train_loss': []
+    }
+
+    for epoch in range(start_epoch, num_iterations):
+        model.train()
+        likelihood.train()
+        for train_batch in train_loader:
+            optimizer.zero_grad()
+            train_x = train_batch['data'].reshape(train_batch['data'].size(0), -1).to(device)
+            train_y = train_batch['label'].to(device)
+            output = model(train_x)
+            loss = -mll(output, train_y)
+            metrics['train_loss'].append(loss.item())
+            loss.backward()
+            optimizer.step()
+
+        # Stochastic validation
+        model.eval()
+        likelihood.eval()
+        with torch.no_grad():
+            val_indices = torch.randperm(len(val_loader.dataset))[:int(0.1 * len(val_loader.dataset))]
+            val_loss = 0.0
+            val_labels = []
+            val_predictions = []
+            for idx in val_indices:
+                val_batch = val_loader.dataset[idx]
+                val_x = val_batch['data'].reshape(-1).unsqueeze(0).to(device)
+                val_y = torch.tensor([val_batch['label']], device=device)
+                val_output = model(val_x)
+                val_loss_batch = -mll(val_output, val_y).sum()
+                val_loss += val_loss_batch.item()
+                val_labels.append(val_y.item())
+                val_predictions.append(val_output.mean.argmax(dim=-1).item())
+
+            precision, recall, f1, _ = precision_recall_fscore_support(val_labels, val_predictions, average='macro')
+            auc_roc = roc_auc_score(label_binarize(val_labels, classes=range(n_classes)),
+                                    label_binarize(val_predictions, classes=range(n_classes)),
+                                    multi_class='ovr')
+
+            metrics['precision'].append(precision)
+            metrics['recall'].append(recall)
+            metrics['f1_score'].append(f1)
+            metrics['auc_roc'].append(auc_roc)
+            val_loss /= len(val_indices)
+
+        if val_loss < best_val_loss:
+            best_val_loss = val_loss
+            epochs_no_improve = 0
+            torch.save({'model_state_dict': model.state_dict(),
+                        'likelihood_state_dict': likelihood.state_dict(),
+                        'optimizer_state_dict': optimizer.state_dict(),
+                        'epoch': epoch}, checkpoint_path)
+        else:
+            epochs_no_improve += 1
+            if epochs_no_improve >= patience:
+                print(f"Early stopping triggered at epoch {epoch+1}")
+                break
+
+    if os.path.exists(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path)
+        model.load_state_dict(checkpoint['model_state_dict'])
+        likelihood.load_state_dict(checkpoint['likelihood_state_dict'])
+        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+
+    return model, likelihood, metrics
+
+def evaluate_gp_model(test_loader, model, likelihood, n_classes=4):
+    model.eval()
+    likelihood.eval()
+    test_labels = []
+    test_predictions = []
+
+    with torch.no_grad():
+        for test_batch in test_loader:
+            test_x = test_batch['data'].reshape(test_batch['data'].size(0), -1).to(device)
+            test_y = test_batch['label'].to(device)
+            test_output = model(test_x)
+            test_labels.extend(test_y.tolist())
+            test_predictions.extend(test_output.mean.argmax(dim=-1).tolist())
+
+    precision, recall, f1, _ = precision_recall_fscore_support(test_labels, test_predictions, average='macro')
+    auc_roc = roc_auc_score(label_binarize(test_labels, classes=range(n_classes)),
+                            label_binarize(test_predictions, classes=range(n_classes)),
+                            multi_class='ovr')
+
+    metrics = {
+        'precision': precision,
+        'recall': recall,
+        'f1_score': f1,
+        'auc_roc': auc_roc
+    }
+
+    return metrics
+
+def main():
+    # Device and drives
+    is_linux = False
+    is_hpc = False
+    is_internal = False
+    is_external = True
+    binary = False
+    
+    # Input
+    is_tfs = True
+    
+    # Database
+    database = 'mimic3'
+    
+    # Initialize the focus
+    focus = 'thesis_results_database_multiclass'
+    
+    # Initialize the file tag
+    file_tag = 'MIMIC_III'
+    
+    # Image resolution
+    img_res = '128x128_float16'
+    
+    # Data type: the type to convert the data into when it is loaded in
+    data_type = torch.float32
+    
+    # Model type
+    model_type = torch.float32
+    
+    # Create a PathMaster object
+    pathmaster = PathMaster(is_linux, is_hpc, is_tfs, is_internal, is_external, focus, file_tag, img_res)
+
+    # Image dimensions
+    img_channels = 1
+    img_size = 128 
+    downsample = None
+    standardize = True
+    
+    # Run parameters
+    n_epochs = 100
+    if binary:
+        n_classes = 2
+    else:
+        n_classes = 3
+    patience = round(n_epochs / 10) if n_epochs > 50 else 5
+    save = True
+        
+    # Resume checkpoint
+    resume_checkpoint_path = None
+    
+    # Data loading details
+    data_format = 'pt'
+    batch_size = 256
+    
+    # Preprocess database data
+    test_loader = preprocess_data(database, batch_size, standardize, img_channels, img_size, 
+                                       downsample, data_type, pathmaster, binary)
+    
+    # Training and validation
+    start_time = time.time()
+    model, likelihood, metrics = train_gp_model(train_loader, val_loader, n_epochs, 
+                                                n_classes, patience, save, pathmaster)
+    end_time = time.time()
+    time_passed = end_time - start_time
+    print('\nTraining and validation took %.2f minutes' % (time_passed / 60))
+    
+    # Evaluation
+    start_time = time.time()
+    test_metrics = evaluate_gp_model(test_loader, model, likelihood, n_classes)
+    end_time = time.time()
+    time_passed = end_time - start_time
+    print('\nTesting took %.2f seconds' % time_passed)
+    
+    print('Test Metrics:')
+    print('Precision: %.4f' % test_metrics['precision'])
+    print('Recall: %.4f' % test_metrics['recall'])
+    print('F1 Score: %.4f' % test_metrics['f1_score'])
+    print('AUC-ROC: %.4f' % test_metrics['auc_roc'])
+ 
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/main_darren_v1.py b/main_darren_v1.py
new file mode 100644
index 0000000..29ec642
--- /dev/null
+++ b/main_darren_v1.py
@@ -0,0 +1,265 @@
+import os
+import torch
+import gpytorch
+from sklearn.metrics import precision_recall_fscore_support, roc_auc_score
+from sklearn.preprocessing import label_binarize
+from torch.utils.data import Dataset, DataLoader
+import numpy as np
+import random
+import time
+
+# Seeds
+torch.manual_seed(42)
+np.random.seed(42)
+random.seed(42)
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+num_latents = 6  # This should match the complexity of your data or the number of tasks
+num_tasks = 4    # This should match the number of output classes or tasks
+num_inducing_points = 50  # This is independent and should be sufficient for the input space
+
+class MultitaskGPModel(gpytorch.models.ApproximateGP):
+    def __init__(self):
+        # Let's use a different set of inducing points for each latent function
+        inducing_points = torch.rand(num_latents, num_inducing_points, 128 * 128)  # Assuming flattened 128x128 images
+
+        # We have to mark the CholeskyVariationalDistribution as batch
+        # so that we learn a variational distribution for each task
+        variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(
+            inducing_points.size(-2), batch_shape=torch.Size([num_latents])
+        )
+
+        # We have to wrap the VariationalStrategy in a LMCVariationalStrategy
+        # so that the output will be a MultitaskMultivariateNormal rather than a batch output
+        variational_strategy = gpytorch.variational.LMCVariationalStrategy(
+            gpytorch.variational.VariationalStrategy(
+                self, inducing_points, variational_distribution, learn_inducing_locations=True
+            ),
+            num_tasks=num_tasks,
+            num_latents=num_latents,
+            latent_dim=-1
+        )
+
+        super().__init__(variational_strategy)
+
+        # The mean and covariance modules should be marked as batch
+        # so we learn a different set of hyperparameters
+        self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([num_latents]))
+        self.covar_module = gpytorch.kernels.ScaleKernel(
+            gpytorch.kernels.RBFKernel(batch_shape=torch.Size([num_latents])),
+            batch_shape=torch.Size([num_latents])
+        )
+
+    def forward(self, x):
+        mean_x = self.mean_module(x)
+        covar_x = self.covar_module(x)
+        latent_pred = gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
+        return latent_pred
+
+class CustomDataset(Dataset):
+    def __init__(self, data_path, labels_path, binary=False):
+        self.data_path = data_path
+        self.labels_path = labels_path
+        self.binary = binary
+        self.segment_names, self.labels = self.extract_segment_names_and_labels()
+
+    def __len__(self):
+        return len(self.segment_names)
+
+    def __getitem__(self, idx):
+        segment_name = self.segment_names[idx]
+        label = self.labels[segment_name]
+        data_tensor = torch.load(os.path.join(self.data_path, segment_name + '.pt'))
+        return {'data': data_tensor, 'label': label, 'segment_name': segment_name}
+
+    def extract_segment_names_and_labels(self):
+        segment_names = []
+        labels = {}
+
+        with open(self.labels_path, 'r') as file:
+            lines = file.readlines()
+            for line in lines[1:]:  # Skip the header line
+                segment_name, label = line.strip().split(',')
+                label = int(float(label))  # Convert the label to float first, then to int
+                if self.binary and label == 2:
+                    label = 0  # Convert PAC/PVC to non-AF (0) for binary classification
+                segment_names.append(segment_name)
+                labels[segment_name] = label
+
+        return segment_names, labels
+
+def load_data(data_path, labels_path, batch_size, binary=False):
+    dataset = CustomDataset(data_path, labels_path, binary)
+    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
+    return dataloader
+
+def train_gp_model(train_loader, val_loader, num_iterations=50, n_classes=4, patience=10,
+                   checkpoint_path='model_checkpoint.pt', resume_training=False):
+    model = MultitaskGPModel().to(device)
+    likelihood = gpytorch.likelihoods.SoftmaxLikelihood(num_features=4, num_classes=4).to(device)
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
+    mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_loader.dataset))
+
+    start_epoch = 0
+    if resume_training and os.path.exists(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path)
+        model.load_state_dict(checkpoint['model_state_dict'])
+        likelihood.load_state_dict(checkpoint['likelihood_state_dict'])
+        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+        start_epoch = checkpoint.get('epoch', 0)
+
+    best_val_loss = float('inf')
+    epochs_no_improve = 0
+
+    metrics = {
+        'precision': [],
+        'recall': [],
+        'f1_score': [],
+        'auc_roc': [],
+        'train_loss': []
+    }
+
+    for epoch in range(start_epoch, num_iterations):
+        model.train()
+        likelihood.train()
+        for train_batch in train_loader:
+            optimizer.zero_grad()
+            train_x = train_batch['data'].reshape(train_batch['data'].size(0), -1).to(device)
+            train_y = train_batch['label'].to(device)
+            output = model(train_x)
+            loss = -mll(output, train_y)
+            metrics['train_loss'].append(loss.item())
+            loss.backward()
+            optimizer.step()
+
+        # Stochastic validation
+        model.eval()
+        likelihood.eval()
+        with torch.no_grad():
+            val_indices = torch.randperm(len(val_loader.dataset))[:int(0.1 * len(val_loader.dataset))]
+            val_loss = 0.0
+            val_labels = []
+            val_predictions = []
+            for idx in val_indices:
+                val_batch = val_loader.dataset[idx]
+                val_x = val_batch['data'].reshape(-1).unsqueeze(0).to(device)
+                val_y = torch.tensor([val_batch['label']], device=device)
+                val_output = model(val_x)
+                val_loss_batch = -mll(val_output, val_y).sum()
+                val_loss += val_loss_batch.item()
+                val_labels.append(val_y.item())
+                val_predictions.append(val_output.mean.argmax(dim=-1).item())
+
+            precision, recall, f1, _ = precision_recall_fscore_support(val_labels, val_predictions, average='macro')
+            auc_roc = roc_auc_score(label_binarize(val_labels, classes=range(n_classes)),
+                                    label_binarize(val_predictions, classes=range(n_classes)),
+                                    multi_class='ovr')
+
+            metrics['precision'].append(precision)
+            metrics['recall'].append(recall)
+            metrics['f1_score'].append(f1)
+            metrics['auc_roc'].append(auc_roc)
+            val_loss /= len(val_indices)
+
+        if val_loss < best_val_loss:
+            best_val_loss = val_loss
+            epochs_no_improve = 0
+            torch.save({'model_state_dict': model.state_dict(),
+                        'likelihood_state_dict': likelihood.state_dict(),
+                        'optimizer_state_dict': optimizer.state_dict(),
+                        'epoch': epoch}, checkpoint_path)
+        else:
+            epochs_no_improve += 1
+            if epochs_no_improve >= patience:
+                print(f"Early stopping triggered at epoch {epoch+1}")
+                break
+
+    if os.path.exists(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path)
+        model.load_state_dict(checkpoint['model_state_dict'])
+        likelihood.load_state_dict(checkpoint['likelihood_state_dict'])
+        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+
+    return model, likelihood, metrics
+
+def evaluate_gp_model(test_loader, model, likelihood, n_classes=4):
+    model.eval()
+    likelihood.eval()
+    test_labels = []
+    test_predictions = []
+
+    with torch.no_grad():
+        for test_batch in test_loader:
+            test_x = test_batch['data'].reshape(test_batch['data'].size(0), -1).to(device)
+            test_y = test_batch['label'].to(device)
+            test_output = model(test_x)
+            test_labels.extend(test_y.tolist())
+            test_predictions.extend(test_output.mean.argmax(dim=-1).tolist())
+
+    precision, recall, f1, _ = precision_recall_fscore_support(test_labels, test_predictions, average='macro')
+    auc_roc = roc_auc_score(label_binarize(test_labels, classes=range(n_classes)),
+                            label_binarize(test_predictions, classes=range(n_classes)),
+                            multi_class='ovr')
+
+    metrics = {
+        'precision': precision,
+        'recall': recall,
+        'f1_score': f1,
+        'auc_roc': auc_roc
+    }
+
+    return metrics
+
+def main():
+    # Paths
+    base_path = r"\\grove.ad.uconn.edu\\research\\ENGR_Chon\Darren\\NIH_Pulsewatch"
+    smote_type = 'Cassey5k_SMOTE'
+    split = 'holdout_60_10_30'
+    data_path_train = os.path.join(base_path, "TFS_pt", smote_type, split, "train")
+    data_path_val = os.path.join(base_path, "TFS_pt", smote_type, split, "validate")
+    data_path_test = os.path.join(base_path, "TFS_pt", smote_type, split, "test")
+    labels_path_train = os.path.join(base_path, "TFS_pt", smote_type, split, "Cassey5k_SMOTE_train_names_labels.csv")
+    labels_path_val = os.path.join(base_path, "TFS_pt", smote_type, split, "Cassey5k_SMOTE_validate_names_labels.csv")
+    labels_path_test = os.path.join(base_path, "TFS_pt", smote_type, split, "Cassey5k_SMOTE_test_names_labels.csv")
+
+    # Parameters
+    binary = False
+    n_epochs = 100
+    if binary:
+        n_classes = 2
+    else:
+        n_classes = 3
+    patience = round(n_epochs / 10) if n_epochs > 50 else 5
+    save = True
+    resume_checkpoint_path = None
+    batch_size = 256
+
+    # Data loading
+    train_loader = load_data(data_path_train, labels_path_train, batch_size, binary)
+    val_loader = load_data(data_path_val, labels_path_val, batch_size, binary)
+    test_loader = load_data(data_path_test, labels_path_test, batch_size, binary)
+
+    # Training and validation
+    start_time = time.time()
+    model, likelihood, metrics = train_gp_model(train_loader, val_loader, n_epochs,
+                                                n_classes, patience, save)
+    end_time = time.time()
+    time_passed = end_time - start_time
+    print('\nTraining and validation took %.2f minutes' % (time_passed / 60))
+
+    # Evaluation
+    start_time = time.time()
+    test_metrics = evaluate_gp_model(test_loader, model, likelihood, n_classes)
+    end_time = time.time()
+    time_passed = end_time - start_time
+    print('\nTesting took %.2f seconds' % time_passed)
+
+    print('Test Metrics:')
+    print('Precision: %.4f' % test_metrics['precision'])
+    print('Recall: %.4f' % test_metrics['recall'])
+    print('F1 Score: %.4f' % test_metrics['f1_score'])
+    print('AUC-ROC: %.4f' % test_metrics['auc_roc'])
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/transfer_data/tar_PT_files.sh b/transfer_data/tar_PT_files.sh
new file mode 100644
index 0000000..600666d
--- /dev/null
+++ b/transfer_data/tar_PT_files.sh
@@ -0,0 +1,19 @@
+source_path="/mnt/r/ENGR_Chon/Dong/MATLAB_generate_results/NIH_PulseWatch/PT_format"
+source_path_all="/mnt/r/ENGR_Chon/Dong/MATLAB_generate_results/NIH_PulseWatch/PT_format/*"
+dest_path="/mnt/r/ENGR_Chon/Dong/MATLAB_generate_results/NIH_PulseWatch/tar_PT_format"
+if [ ! -d $dest_path ]
+then
+	mkdir -p $dest_path
+	echo "Root dest_path does not exist, just created it: $dest_path"
+fi
+
+for d in $source_path_all; do
+	sub_d=${d##*/}
+	echo sub_d
+	dest_tar="${dest_path}/${sub_d}.tar"
+	if [ ! -f $dest_tar ]
+	then
+		tar -C $source_path -cvf $dest_tar $sub_d
+		echo "Finished archive: $dest_tar"
+	fi
+done;
\ No newline at end of file
diff --git a/transfer_data/tar_PT_files_single_UID.sh b/transfer_data/tar_PT_files_single_UID.sh
new file mode 100644
index 0000000..dc3f3c0
--- /dev/null
+++ b/transfer_data/tar_PT_files_single_UID.sh
@@ -0,0 +1,5 @@
+source_path="/mnt/r/ENGR_Chon/Dong/MATLAB_generate_results/NIH_PulseWatch/PT_format"
+dest_path="/mnt/r/ENGR_Chon/Dong/MATLAB_generate_results/NIH_PulseWatch/tar_PT_format"
+sub_d="120"
+dest_tar="${dest_path}/${sub_d}.tar"
+tar -C $source_path -cvf $dest_tar $sub_d
\ No newline at end of file
diff --git a/utils/__pycache__/dataloader.cpython-310.pyc b/utils/__pycache__/dataloader.cpython-310.pyc
new file mode 100644
index 0000000..ae5efbe
Binary files /dev/null and b/utils/__pycache__/dataloader.cpython-310.pyc differ
diff --git a/utils/__pycache__/dataloader.cpython-311.pyc b/utils/__pycache__/dataloader.cpython-311.pyc
new file mode 100644
index 0000000..063e8f7
Binary files /dev/null and b/utils/__pycache__/dataloader.cpython-311.pyc differ
diff --git a/utils/__pycache__/dataloader.cpython-312.pyc b/utils/__pycache__/dataloader.cpython-312.pyc
new file mode 100644
index 0000000..af61a83
Binary files /dev/null and b/utils/__pycache__/dataloader.cpython-312.pyc differ
diff --git a/utils/__pycache__/dataloader.cpython-39.pyc b/utils/__pycache__/dataloader.cpython-39.pyc
new file mode 100644
index 0000000..1149806
Binary files /dev/null and b/utils/__pycache__/dataloader.cpython-39.pyc differ
diff --git a/utils/__pycache__/dataloader_batch.cpython-310.pyc b/utils/__pycache__/dataloader_batch.cpython-310.pyc
new file mode 100644
index 0000000..6b49db6
Binary files /dev/null and b/utils/__pycache__/dataloader_batch.cpython-310.pyc differ
diff --git a/utils/__pycache__/dataloader_database.cpython-310.pyc b/utils/__pycache__/dataloader_database.cpython-310.pyc
new file mode 100644
index 0000000..4ccea8f
Binary files /dev/null and b/utils/__pycache__/dataloader_database.cpython-310.pyc differ
diff --git a/utils/__pycache__/dataloader_smote.cpython-310.pyc b/utils/__pycache__/dataloader_smote.cpython-310.pyc
new file mode 100644
index 0000000..8070ab1
Binary files /dev/null and b/utils/__pycache__/dataloader_smote.cpython-310.pyc differ
diff --git a/utils/__pycache__/get_paths.cpython-310.pyc b/utils/__pycache__/get_paths.cpython-310.pyc
new file mode 100644
index 0000000..18b2bb2
Binary files /dev/null and b/utils/__pycache__/get_paths.cpython-310.pyc differ
diff --git a/utils/__pycache__/get_paths.cpython-311.pyc b/utils/__pycache__/get_paths.cpython-311.pyc
new file mode 100644
index 0000000..6e0bdbe
Binary files /dev/null and b/utils/__pycache__/get_paths.cpython-311.pyc differ
diff --git a/utils/__pycache__/misc_func.cpython-310.pyc b/utils/__pycache__/misc_func.cpython-310.pyc
new file mode 100644
index 0000000..5959146
Binary files /dev/null and b/utils/__pycache__/misc_func.cpython-310.pyc differ
diff --git a/utils/__pycache__/model_func.cpython-310.pyc b/utils/__pycache__/model_func.cpython-310.pyc
new file mode 100644
index 0000000..94b284b
Binary files /dev/null and b/utils/__pycache__/model_func.cpython-310.pyc differ
diff --git a/utils/__pycache__/model_func.cpython-311.pyc b/utils/__pycache__/model_func.cpython-311.pyc
new file mode 100644
index 0000000..f798459
Binary files /dev/null and b/utils/__pycache__/model_func.cpython-311.pyc differ
diff --git a/utils/__pycache__/model_func.cpython-312.pyc b/utils/__pycache__/model_func.cpython-312.pyc
new file mode 100644
index 0000000..8a31f82
Binary files /dev/null and b/utils/__pycache__/model_func.cpython-312.pyc differ
diff --git a/utils/__pycache__/model_func_batch.cpython-310.pyc b/utils/__pycache__/model_func_batch.cpython-310.pyc
new file mode 100644
index 0000000..3b0039f
Binary files /dev/null and b/utils/__pycache__/model_func_batch.cpython-310.pyc differ
diff --git a/utils/__pycache__/pathmaster.cpython-310.pyc b/utils/__pycache__/pathmaster.cpython-310.pyc
new file mode 100644
index 0000000..5411969
Binary files /dev/null and b/utils/__pycache__/pathmaster.cpython-310.pyc differ
diff --git a/utils/__pycache__/pathmaster.cpython-312.pyc b/utils/__pycache__/pathmaster.cpython-312.pyc
new file mode 100644
index 0000000..e70d54b
Binary files /dev/null and b/utils/__pycache__/pathmaster.cpython-312.pyc differ
diff --git a/utils/__pycache__/pathmaster.cpython-39.pyc b/utils/__pycache__/pathmaster.cpython-39.pyc
new file mode 100644
index 0000000..d0ab4b5
Binary files /dev/null and b/utils/__pycache__/pathmaster.cpython-39.pyc differ
diff --git a/utils/__pycache__/plot_save_func.cpython-310.pyc b/utils/__pycache__/plot_save_func.cpython-310.pyc
new file mode 100644
index 0000000..902fe16
Binary files /dev/null and b/utils/__pycache__/plot_save_func.cpython-310.pyc differ
diff --git a/utils/__pycache__/plot_save_func.cpython-311.pyc b/utils/__pycache__/plot_save_func.cpython-311.pyc
new file mode 100644
index 0000000..5de7e02
Binary files /dev/null and b/utils/__pycache__/plot_save_func.cpython-311.pyc differ
diff --git a/utils/__pycache__/plot_save_func.cpython-312.pyc b/utils/__pycache__/plot_save_func.cpython-312.pyc
new file mode 100644
index 0000000..a7005b4
Binary files /dev/null and b/utils/__pycache__/plot_save_func.cpython-312.pyc differ
diff --git a/utils/__pycache__/plot_save_func.cpython-39.pyc b/utils/__pycache__/plot_save_func.cpython-39.pyc
new file mode 100644
index 0000000..35f1877
Binary files /dev/null and b/utils/__pycache__/plot_save_func.cpython-39.pyc differ
diff --git a/utils/__pycache__/train_func.cpython-310.pyc b/utils/__pycache__/train_func.cpython-310.pyc
new file mode 100644
index 0000000..c11ce94
Binary files /dev/null and b/utils/__pycache__/train_func.cpython-310.pyc differ
diff --git a/utils/__pycache__/train_func.cpython-311.pyc b/utils/__pycache__/train_func.cpython-311.pyc
new file mode 100644
index 0000000..8790f6f
Binary files /dev/null and b/utils/__pycache__/train_func.cpython-311.pyc differ
diff --git a/utils/dataloader.py b/utils/dataloader.py
new file mode 100644
index 0000000..4a382e7
--- /dev/null
+++ b/utils/dataloader.py
@@ -0,0 +1,895 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Feb 26 18:29:59 2024
+
+@author: dchen
+"""
+import os
+import numpy as np
+import pandas as pd
+from PIL import Image
+import torch
+from torch.utils.data import Dataset, DataLoader
+from sklearn.preprocessing import StandardScaler
+from torchvision.transforms import ToTensor
+import math
+from numpy import random
+from numpy.random import choice
+import cv2
+from pyarrow import csv
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Seeds
+torch.manual_seed(42)
+np.random.seed(42)
+random.seed(42)
+
+
+def split_uids_60_10_30(pathmaster):
+    # ====== Load the per subject arrythmia summary ======
+    file_path = pathmaster.summary_path()
+    # df_summary = pd.read_csv(file_path)
+
+    # Read the CSV file using pyarrow.csv.read_csv
+    table_summary = csv.read_csv(file_path)
+    df_summary = table_summary.to_pandas()
+    
+    df_summary['UID'] = df_summary['UID'].astype(str).str.zfill(3) # Pads each UIDs with enough zeroes to be 3 characters
+    
+    df_summary['sample_nonAF'] = df_summary['NSR'] + df_summary['PACPVC'] + df_summary['SVT']
+    df_summary['sample_AF'] = df_summary['AF']
+    
+    df_summary['sample_nonAF_ratio'] = df_summary['sample_nonAF'] / (df_summary['sample_AF'] + df_summary['sample_nonAF'])
+    
+    # Filter out 0-segment UIDs and UIDs without NSR, AF, and/or PAC/PVC
+    remaining_UIDs = []
+    
+    for index, row in df_summary.iterrows():
+        UID = row['UID']
+        if row['TOTAL'] == 0:
+            # There is no segment in this subject, skip this UID.
+            print(f'---------UID {UID} has no segments.------------')
+        elif (row['NSR'] > 0 or row['AF'] > 0 or row['PACPVC'] > 0): # Append UID only if it contains NSR, AF, or PAC/PVC
+            remaining_UIDs.append(UID)
+        else:
+            print(f'---------UID {UID} has no AF, NSR, or PAC/PVC segments.------------')
+        
+    # Split UIDs    
+    uid_nsr_train = ['011', '014', '030', '037', '044', '050', '055', '058', '074', '083', '091', '098', '101', '106', '109', '119']
+    uid_nsr_val = ['041', '056', '325']
+    uid_nsr_test = ['003', '012', '020', '024', '027', '035', '036', '047']
+    
+    uid_af_train = ['017', '301', '302', '305', '306', '318', '319', '320', '321', '322', '324', '329', '402', '405', '406', '407', '416', '420', '421']
+    uid_af_val = ['400', '409', '422']
+    uid_af_test = ['307', '310', '311', '312', '410', '413', '414', '415', '423']
+    
+    uid_pacpvc_train = ['005', '007', '013', '021', '022', '026', '028', '029', '042', '064', '068', '073', '080', '086', '087', '089', '093', '104', '110', '113', '120', '327', '408']
+    uid_pacpvc_val = ['045', '054', '112']
+    uid_pacpvc_test = ['002', '038', '039', '052', '053', '069', '070', '075', '078', '090', '100', '419']
+    
+    # Total UID counts
+    total_uid_nsr = uid_nsr_train + uid_nsr_val + uid_nsr_test
+    total_uid_af = uid_af_train + uid_af_val + uid_af_test
+    total_uid_pacpvc = uid_pacpvc_train + uid_pacpvc_val + uid_pacpvc_test
+    total_uid = total_uid_pacpvc + total_uid_af + total_uid_nsr
+    
+    print('Number of total and unique UIDs:', len(total_uid),'|', len(np.unique(total_uid)))
+    print('Number of total and unique NSR UIDs:', len(total_uid_nsr),'|', len(np.unique(total_uid_nsr)))
+    print('Number of total and unique AF UIDs:', len(total_uid_af),'|', len(np.unique(total_uid_af)))
+    print('Number of total and unique PAC/PVC UIDs:', len(total_uid_pacpvc),'|', len(np.unique(total_uid_pacpvc)))
+    
+    train_set = uid_nsr_train + uid_af_train + uid_pacpvc_train
+    val_set = uid_nsr_val + uid_af_val + uid_pacpvc_val
+    test_set = uid_nsr_test + uid_af_test + uid_pacpvc_test
+    
+    # Limit data set size to reduce computational load for optimization
+    test_set = test_set
+    
+    return train_set, val_set, test_set
+    
+
+def split_uids_60_10_30_smote(pathmaster):
+    # ====== Load the per subject arrythmia summary ======
+    file_path = pathmaster.summary_path()
+    # df_summary = pd.read_csv(file_path)
+
+    # Read the CSV file using pyarrow.csv.read_csv
+    table_summary = csv.read_csv(file_path)
+    df_summary = table_summary.to_pandas()
+    
+    df_summary['UID'] = df_summary['UID'].astype(str).str.zfill(3) # Pads each UIDs with enough zeroes to be 3 characters
+    
+    df_summary['sample_nonAF'] = df_summary['NSR'] + df_summary['PACPVC'] + df_summary['SVT']
+    df_summary['sample_AF'] = df_summary['AF']
+    
+    df_summary['sample_nonAF_ratio'] = df_summary['sample_nonAF'] / (df_summary['sample_AF'] + df_summary['sample_nonAF'])
+    
+    # Filter out 0-segment UIDs and UIDs without NSR, AF, and/or PAC/PVC
+    remaining_UIDs = []
+    
+    for index, row in df_summary.iterrows():
+        UID = row['UID']
+        if row['TOTAL'] == 0:
+            # There is no segment in this subject, skip this UID.
+            print(f'---------UID {UID} has no segments.------------')
+        elif (row['NSR'] > 0 or row['AF'] > 0 or row['PACPVC'] > 0): # Append UID only if it contains NSR, AF, or PAC/PVC
+            remaining_UIDs.append(UID)
+        else:
+            print(f'---------UID {UID} has no AF, NSR, or PAC/PVC segments.------------')
+        
+    # Split UIDs    
+    uid_nsr_train = ['003', '020', '024', '041', '044', '047', '049', '050', '058', '063', '077', '084', '088', '091', '098', '099', '106', '109', '111', '118', '325']
+    uid_nsr_val = ['014', '030', '036', '074']
+    uid_nsr_test = ['011', '012', '027', '035', '037', '055', '056', '057', '083', '094', '101', '119']
+    
+    uid_af_train = ['017', '302', '306', '307', '310', '311', '319', '321', '324', '400', '402', '405', '406', '407', '409', '410', '415', '420', '421']
+    uid_af_val = ['416', '422', '423']
+    uid_af_test = ['301', '305', '312', '318', '320', '322', '329', '413', '414']
+    
+    uid_pacpvc_train = ['005', '007', '013', '021', '022', '026', '028', '029', '042', '064', '068', '073', '080', '086', '087', '089', '093', '104', '110', '113', '120', '327', '408']
+    uid_pacpvc_val = ['045', '054', '112']
+    uid_pacpvc_test = ['002', '038', '039', '052', '053', '069', '070', '075', '078', '090', '100', '419']
+    
+    # Total UID counts
+    total_uid_nsr = uid_nsr_train + uid_nsr_val + uid_nsr_test
+    total_uid_af = uid_af_train + uid_af_val + uid_af_test
+    total_uid_pacpvc = uid_pacpvc_train + uid_pacpvc_val + uid_pacpvc_test
+    total_uid = total_uid_pacpvc + total_uid_af + total_uid_nsr
+    
+    print('Number of total and unique UIDs:', len(total_uid),'|', len(np.unique(total_uid)))
+    print('Number of total and unique NSR UIDs:', len(total_uid_nsr),'|', len(np.unique(total_uid_nsr)))
+    print('Number of total and unique AF UIDs:', len(total_uid_af),'|', len(np.unique(total_uid_af)))
+    print('Number of total and unique PAC/PVC UIDs:', len(total_uid_pacpvc),'|', len(np.unique(total_uid_pacpvc)))
+    
+    train_set = uid_nsr_train + uid_af_train + uid_pacpvc_train
+    val_set = uid_nsr_val + uid_af_val + uid_pacpvc_val
+    test_set = uid_nsr_test + uid_af_test + uid_pacpvc_test
+    
+    return train_set, val_set, test_set
+
+
+def split_uids_60_10_30_noPACPVC(pathmaster):
+    # ====== Load the per subject arrythmia summary ======
+    file_path = pathmaster.summary_path()
+    # df_summary = pd.read_csv(file_path)
+
+    # Read the CSV file using pyarrow.csv.read_csv
+    table_summary = csv.read_csv(file_path)
+    df_summary = table_summary.to_pandas()
+    
+    df_summary['UID'] = df_summary['UID'].astype(str).str.zfill(3) # Pads each UIDs with enough zeroes to be 3 characters
+    
+    df_summary['sample_nonAF'] = df_summary['NSR'] + df_summary['PACPVC'] + df_summary['SVT']
+    df_summary['sample_AF'] = df_summary['AF']
+    
+    df_summary['sample_nonAF_ratio'] = df_summary['sample_nonAF'] / (df_summary['sample_AF'] + df_summary['sample_nonAF'])
+    
+    # Filter out 0-segment UIDs and UIDs without NSR, AF, and/or PAC/PVC
+    remaining_UIDs = []
+    
+    for index, row in df_summary.iterrows():
+        UID = row['UID']
+        if row['TOTAL'] == 0:
+            # There is no segment in this subject, skip this UID.
+            print(f'---------UID {UID} has no segments.------------')
+        elif (row['NSR'] > 0 or row['AF'] > 0 or row['PACPVC'] > 0): # Append UID only if it contains NSR, AF, or PAC/PVC
+            remaining_UIDs.append(UID)
+        else:
+            print(f'---------UID {UID} has no AF, NSR, or PAC/PVC segments.------------')
+        
+    # Split UIDs    
+    uid_nsr_train = ['003', '020', '024', '041', '044', '047', '049', '050', '058', '063', '077', '084', '088', '091', '098', '099', '106', '109', '111', '118', '325']
+    uid_nsr_val = ['014', '030', '036', '074']
+    uid_nsr_test = ['011', '012', '027', '035', '037', '055', '056', '057', '083', '094', '101', '119']
+    
+    uid_af_train = ['017', '302', '306', '307', '310', '311', '319', '321', '324', '400', '402', '405', '406', '407', '409', '410', '415', '420', '421']
+    uid_af_val = ['416', '422', '423']
+    uid_af_test = ['301', '305', '312', '318', '320', '322', '329', '413', '414']
+    
+    uid_pacpvc_train = [] # ['005', '007', '013', '021', '022', '026', '028', '029', '042', '064', '068', '073', '080', '086', '087', '089', '093', '104', '110', '113', '120', '327', '408']
+    uid_pacpvc_val = [] # ['045', '054', '112']
+    uid_pacpvc_test = [] # ['002', '038', '039', '052', '053', '069', '070', '075', '078', '090', '100', '419']
+    
+    # Total UID counts
+    total_uid_nsr = uid_nsr_train + uid_nsr_val + uid_nsr_test
+    total_uid_af = uid_af_train + uid_af_val + uid_af_test
+    total_uid_pacpvc = uid_pacpvc_train + uid_pacpvc_val + uid_pacpvc_test
+    total_uid = total_uid_pacpvc + total_uid_af + total_uid_nsr
+    
+    print('Number of total and unique UIDs:', len(total_uid),'|', len(np.unique(total_uid)))
+    print('Number of total and unique NSR UIDs:', len(total_uid_nsr),'|', len(np.unique(total_uid_nsr)))
+    print('Number of total and unique AF UIDs:', len(total_uid_af),'|', len(np.unique(total_uid_af)))
+    print('Number of total and unique PAC/PVC UIDs:', len(total_uid_pacpvc),'|', len(np.unique(total_uid_pacpvc)))
+    
+    train_set = uid_nsr_train + uid_af_train + uid_pacpvc_train
+    val_set = uid_nsr_val + uid_af_val + uid_pacpvc_val
+    test_set = uid_nsr_test + uid_af_test + uid_pacpvc_test
+    
+    return train_set, val_set, test_set
+
+
+def split_uids_60_10_30_noNSR(pathmaster):
+    # ====== Load the per subject arrythmia summary ======
+    file_path = pathmaster.summary_path()
+    # df_summary = pd.read_csv(file_path)
+
+    # Read the CSV file using pyarrow.csv.read_csv
+    table_summary = csv.read_csv(file_path)
+    df_summary = table_summary.to_pandas()
+    
+    df_summary['UID'] = df_summary['UID'].astype(str).str.zfill(3) # Pads each UIDs with enough zeroes to be 3 characters
+    
+    df_summary['sample_nonAF'] = df_summary['NSR'] + df_summary['PACPVC'] + df_summary['SVT']
+    df_summary['sample_AF'] = df_summary['AF']
+    
+    df_summary['sample_nonAF_ratio'] = df_summary['sample_nonAF'] / (df_summary['sample_AF'] + df_summary['sample_nonAF'])
+    
+    # Filter out 0-segment UIDs and UIDs without NSR, AF, and/or PAC/PVC
+    remaining_UIDs = []
+    
+    for index, row in df_summary.iterrows():
+        UID = row['UID']
+        if row['TOTAL'] == 0:
+            # There is no segment in this subject, skip this UID.
+            print(f'---------UID {UID} has no segments.------------')
+        elif (row['NSR'] > 0 or row['AF'] > 0 or row['PACPVC'] > 0): # Append UID only if it contains NSR, AF, or PAC/PVC
+            remaining_UIDs.append(UID)
+        else:
+            print(f'---------UID {UID} has no AF, NSR, or PAC/PVC segments.------------')
+        
+    # Split UIDs    
+    uid_nsr_train = [] # ['003', '020', '024', '041', '044', '047', '049', '050', '058', '063', '077', '084', '088', '091', '098', '099', '106', '109', '111', '118', '325']
+    uid_nsr_val = [] # ['014', '030', '036', '074']
+    uid_nsr_test = [] # ['011', '012', '027', '035', '037', '055', '056', '057', '083', '094', '101', '119']
+    
+    uid_af_train = ['017', '302', '306', '307', '310', '311', '319', '321', '324', '400', '402', '405', '406', '407', '409', '410', '415', '420', '421']
+    uid_af_val = ['416', '422', '423']
+    uid_af_test = ['301', '305', '312', '318', '320', '322', '329', '413', '414']
+    
+    uid_pacpvc_train = ['005', '007', '013', '021', '022', '026', '028', '029', '042', '064', '068', '073', '080', '086', '087', '089', '093', '104', '110', '113', '120', '327', '408']
+    uid_pacpvc_val = ['045', '054', '112']
+    uid_pacpvc_test = ['002', '038', '039', '052', '053', '069', '070', '075', '078', '090', '100', '419']
+    
+    # Total UID counts
+    total_uid_nsr = uid_nsr_train + uid_nsr_val + uid_nsr_test
+    total_uid_af = uid_af_train + uid_af_val + uid_af_test
+    total_uid_pacpvc = uid_pacpvc_train + uid_pacpvc_val + uid_pacpvc_test
+    total_uid = total_uid_pacpvc + total_uid_af + total_uid_nsr
+    
+    print('Number of total and unique UIDs:', len(total_uid),'|', len(np.unique(total_uid)))
+    print('Number of total and unique NSR UIDs:', len(total_uid_nsr),'|', len(np.unique(total_uid_nsr)))
+    print('Number of total and unique AF UIDs:', len(total_uid_af),'|', len(np.unique(total_uid_af)))
+    print('Number of total and unique PAC/PVC UIDs:', len(total_uid_pacpvc),'|', len(np.unique(total_uid_pacpvc)))
+    
+    train_set = uid_nsr_train + uid_af_train + uid_pacpvc_train
+    val_set = uid_nsr_val + uid_af_val + uid_pacpvc_val
+    test_set = uid_nsr_test + uid_af_test + uid_pacpvc_test
+    
+    return train_set, val_set, test_set
+
+
+def split_uids_60_10_30_balanced(pathmaster):
+    # ====== Load the per subject arrythmia summary ======
+    file_path = pathmaster.summary_path()
+    # df_summary = pd.read_csv(file_path)
+
+    # Read the CSV file using pyarrow.csv.read_csv
+    table_summary = csv.read_csv(file_path)
+    df_summary = table_summary.to_pandas()
+    
+    df_summary['UID'] = df_summary['UID'].astype(str).str.zfill(3) # Pads each UIDs with enough zeroes to be 3 characters
+    
+    df_summary['sample_nonAF'] = df_summary['NSR'] + df_summary['PACPVC'] + df_summary['SVT']
+    df_summary['sample_AF'] = df_summary['AF']
+    
+    df_summary['sample_nonAF_ratio'] = df_summary['sample_nonAF'] / (df_summary['sample_AF'] + df_summary['sample_nonAF'])
+    
+    # Filter out 0-segment UIDs and UIDs without NSR, AF, and/or PAC/PVC
+    remaining_UIDs = []
+    
+    for index, row in df_summary.iterrows():
+        UID = row['UID']
+        if row['TOTAL'] == 0:
+            # There is no segment in this subject, skip this UID.
+            print(f'---------UID {UID} has no segments.------------')
+        elif (row['NSR'] > 0 or row['AF'] > 0 or row['PACPVC'] > 0): # Append UID only if it contains NSR, AF, or PAC/PVC
+            remaining_UIDs.append(UID)
+        else:
+            print(f'---------UID {UID} has no AF, NSR, or PAC/PVC segments.------------')
+        
+    # Split UIDs    
+    uid_nsr_train = ['041', '044', '047', '050', '058', '063', '091', '098', '106', '111', '325']
+    uid_nsr_val = ['014', '030', '036', '074']
+    uid_nsr_test = ['011', '012', '027', '035', '037', '055', '056', '057', '083', '094', '101', '119']
+    
+    uid_af_train = ['017', '302', '306', '307', '310', '311', '319', '321', '324', '400', '402', '407', '409', '415', '420', '421']
+    uid_af_val = ['416', '422', '423']
+    uid_af_test = ['301', '305', '312', '318', '320', '322', '329', '413', '414']
+    
+    uid_pacpvc_train = ['005', '007', '013', '021', '022', '026', '028', '029', '042', '064', '068', '073', '080', '086', '087', '089', '093', '104', '110', '113', '120', '327', '408']
+    uid_pacpvc_val = ['045', '054', '112']
+    uid_pacpvc_test = ['002', '038', '039', '052', '053', '069', '070', '075', '078', '090', '100', '419']
+    
+    # Total UID counts
+    total_uid_nsr = uid_nsr_train + uid_nsr_val + uid_nsr_test
+    total_uid_af = uid_af_train + uid_af_val + uid_af_test
+    total_uid_pacpvc = uid_pacpvc_train + uid_pacpvc_val + uid_pacpvc_test
+    total_uid = total_uid_pacpvc + total_uid_af + total_uid_nsr
+    
+    print('Number of total and unique UIDs:', len(total_uid),'|', len(np.unique(total_uid)))
+    print('Number of total and unique NSR UIDs:', len(total_uid_nsr),'|', len(np.unique(total_uid_nsr)))
+    print('Number of total and unique AF UIDs:', len(total_uid_af),'|', len(np.unique(total_uid_af)))
+    print('Number of total and unique PAC/PVC UIDs:', len(total_uid_pacpvc),'|', len(np.unique(total_uid_pacpvc)))
+    
+    train_set = uid_nsr_train + uid_af_train + uid_pacpvc_train
+    val_set = uid_nsr_val + uid_af_val + uid_pacpvc_val
+    test_set = uid_nsr_test + uid_af_test + uid_pacpvc_test
+    
+    return train_set, val_set, test_set
+
+
+def split_uids_2fold_60_40(pathmaster):
+    # ====== Load the per subject arrythmia summary ======
+    file_path = pathmaster.summary_path()
+    # df_summary = pd.read_csv(file_path)
+
+    # Read the CSV file using pyarrow.csv.read_csv
+    table_summary = csv.read_csv(file_path)
+    df_summary = table_summary.to_pandas()
+    
+    df_summary['UID'] = df_summary['UID'].astype(str).str.zfill(3) # Pads each UIDs with enough zeroes to be 3 characters
+    
+    df_summary['sample_nonAF'] = df_summary['NSR'] + df_summary['PACPVC'] + df_summary['SVT']
+    df_summary['sample_AF'] = df_summary['AF']
+    
+    df_summary['sample_nonAF_ratio'] = df_summary['sample_nonAF'] / (df_summary['sample_AF'] + df_summary['sample_nonAF'])
+    
+    # Filter out 0-segment UIDs and UIDs without NSR, AF, and/or PAC/PVC
+    remaining_UIDs = []
+    
+    for index, row in df_summary.iterrows():
+        UID = row['UID']
+        if row['TOTAL'] == 0:
+            # There is no segment in this subject, skip this UID.
+            print(f'---------UID {UID} has no segments.------------')
+        elif (row['NSR'] > 0 or row['AF'] > 0 or row['PACPVC'] > 0): # Append UID only if it contains NSR, AF, or PAC/PVC
+            remaining_UIDs.append(UID)
+        else:
+            print(f'---------UID {UID} has no AF, NSR, or PAC/PVC segments.------------')
+        
+    # Split UIDs    
+    uid_pacpvc_fold1 = ['007', '022', '028', '038', '054', '068', '075', '086', '087', '093', '120', '327']
+    uid_pacpvc_fold2 = ['002', '005', '013', '021', '026', '029', '045', '073', '089', '100', '112', '408']
+    uid_pacpvc_test = ['039', '042', '052', '053', '064', '069', '070', '078', '080', '090', '104', '110', '113', '419']
+    
+    uid_af_fold1 = ['305', '307', '311', '318', '320', '322', '405', '415', '423']
+    uid_af_fold2 = ['301', '319', '321', '324', '329', '400', '406', '409', '416']
+    uid_af_test = ['017', '302', '306', '310', '312', '402', '407', '410', '413', '414', '420', '421', '422']
+    
+    uid_nsr_fold1 = ['011', '014', '041', '050', '056', '058', '083', '106', '109']
+    uid_nsr_fold2 = ['037', '047', '055', '074', '091', '098', '101', '119', '325']
+    uid_nsr_test = ['003', '012', '020', '024', '027', '030', '035', '036', '044', '049', '057', '063', '077', '084', '088', '094', '099', '111', '118']
+    
+    # Total UID counts
+    total_uid_pacpvc = uid_pacpvc_fold1 + uid_pacpvc_fold2 + uid_pacpvc_test
+    total_uid_af = uid_af_fold1 + uid_af_fold2 + uid_af_test
+    total_uid_nsr = uid_nsr_fold1 + uid_nsr_fold2 + uid_nsr_test
+    total_uid = total_uid_pacpvc + total_uid_af + total_uid_nsr
+    
+    print('Number of total and unique UIDs:', len(total_uid),'|', len(np.unique(total_uid)))
+    print('Number of total and unique NSR UIDs:', len(total_uid_nsr),'|', len(np.unique(total_uid_nsr)))
+    print('Number of total and unique AF UIDs:', len(total_uid_af),'|', len(np.unique(total_uid_af)))
+    print('Number of total and unique PAC/PVC UIDs:', len(total_uid_pacpvc),'|', len(np.unique(total_uid_pacpvc)))
+    
+    cross_val_fold1 = uid_nsr_fold1 + uid_af_fold1 + uid_pacpvc_fold1
+    cross_val_fold2 = uid_nsr_fold2 + uid_af_fold2 + uid_pacpvc_fold2
+    test = uid_nsr_test + uid_af_test + uid_pacpvc_test
+    
+    # # Limit data set size to reduce computational load for optimization
+    # cross_val_fold1 = uid_nsr_fold1[:2] + uid_af_fold1[:2] + uid_pacpvc_fold1[:2]
+    # cross_val_fold2 = uid_nsr_fold2[:2] + uid_af_fold2[:2] + uid_pacpvc_fold2[:2]
+    # test = uid_nsr_test[:2] + uid_af_test[:2] + uid_pacpvc_test[:2]
+    
+    return cross_val_fold1, cross_val_fold2, test
+    
+    
+def split_uids_2fold_60_40_smote(pathmaster):
+    # ====== Load the per subject arrythmia summary ======
+    file_path = pathmaster.summary_path()
+    # df_summary = pd.read_csv(file_path)
+
+    # Read the CSV file using pyarrow.csv.read_csv
+    table_summary = csv.read_csv(file_path)
+    df_summary = table_summary.to_pandas()
+    
+    df_summary['UID'] = df_summary['UID'].astype(str).str.zfill(3) # Pads each UIDs with enough zeroes to be 3 characters
+    
+    df_summary['sample_nonAF'] = df_summary['NSR'] + df_summary['PACPVC'] + df_summary['SVT']
+    df_summary['sample_AF'] = df_summary['AF']
+    
+    df_summary['sample_nonAF_ratio'] = df_summary['sample_nonAF'] / (df_summary['sample_AF'] + df_summary['sample_nonAF'])
+    
+    # Filter out 0-segment UIDs and UIDs without NSR, AF, and/or PAC/PVC
+    remaining_UIDs = []
+    
+    for index, row in df_summary.iterrows():
+        UID = row['UID']
+        if row['TOTAL'] == 0:
+            # There is no segment in this subject, skip this UID.
+            print(f'---------UID {UID} has no segments.------------')
+        elif (row['NSR'] > 0 or row['AF'] > 0 or row['PACPVC'] > 0): # Append UID only if it contains NSR, AF, or PAC/PVC
+            remaining_UIDs.append(UID)
+        else:
+            print(f'---------UID {UID} has no AF, NSR, or PAC/PVC segments.------------')
+        
+    # Split UIDs    
+    uid_nsr_fold1 = ['020', '030', '037', '041', '058', '077', '084', '106', '109', '118', '325']
+    uid_nsr_fold2 = ['003', '014', '036', '044', '047', '049', '063', '083', '088', '091', '099']
+    uid_nsr_test = ['011', '012', '024', '027', '035', '050', '055', '056', '057', '074', '094', '098', '101', '111', '119']
+    
+    uid_af_fold1 = ['302', '306', '307', '402', '405', '415', '420', '421', '422']
+    uid_af_fold2 = ['310', '321', '324', '406', '407', '409', '414', '416', '423']
+    uid_af_test = ['017', '301', '305', '311', '312', '318', '319', '320', '322', '329', '400', '410', '413']
+    
+    uid_pacpvc_fold1 = ['007', '022', '028', '038', '054', '068', '075', '086', '087', '093', '120', '327']
+    uid_pacpvc_fold2 = ['002', '005', '013', '021', '026', '029', '045', '073', '089', '100', '112', '408']
+    uid_pacpvc_test = ['039', '042', '052', '053', '064', '069', '070', '078', '080', '090', '104', '110', '113', '419']
+    
+    # Total UID counts
+    total_uid_nsr = uid_nsr_fold1 + uid_nsr_fold2 + uid_nsr_test
+    total_uid_af = uid_af_fold1 + uid_af_fold2 + uid_af_test
+    total_uid_pacpvc = uid_pacpvc_fold1 + uid_pacpvc_fold2 + uid_pacpvc_test
+    total_uid = total_uid_pacpvc + total_uid_af + total_uid_nsr
+    
+    print('Number of total and unique UIDs:', len(total_uid),'|', len(np.unique(total_uid)))
+    print('Number of total and unique NSR UIDs:', len(total_uid_nsr),'|', len(np.unique(total_uid_nsr)))
+    print('Number of total and unique AF UIDs:', len(total_uid_af),'|', len(np.unique(total_uid_af)))
+    print('Number of total and unique PAC/PVC UIDs:', len(total_uid_pacpvc),'|', len(np.unique(total_uid_pacpvc)))
+    
+    cross_val_fold1 = uid_nsr_fold1 + uid_af_fold1 + uid_pacpvc_fold1
+    cross_val_fold2 = uid_nsr_fold2 + uid_af_fold2 + uid_pacpvc_fold2
+    test_set = uid_nsr_test + uid_af_test + uid_pacpvc_test
+    
+    return cross_val_fold1, cross_val_fold2, test_set   
+    
+
+def split_uids(pathmaster):
+    # ====== Load the per subject arrythmia summary ======
+    file_path = pathmaster.summary_path()
+    # df_summary = pd.read_csv(file_path)
+
+    # Read the CSV file using pyarrow.csv.read_csv
+    table_summary = csv.read_csv(file_path)
+    df_summary = table_summary.to_pandas()
+    
+    df_summary['UID'] = df_summary['UID'].astype(str).str.zfill(3) # Pads each UIDs with enough zeroes to be 3 characters
+    
+    df_summary['sample_nonAF'] = df_summary['NSR'] + df_summary['PACPVC'] + df_summary['SVT']
+    df_summary['sample_AF'] = df_summary['AF']
+    
+    df_summary['sample_nonAF_ratio'] = df_summary['sample_nonAF'] / (df_summary['sample_AF'] + df_summary['sample_nonAF'])
+    
+    all_UIDs = df_summary['UID'].unique()
+   
+    # ====================================================
+    # ====== AF trial separation ======
+    # R:\ENGR_Chon\Dong\Numbers\Pulsewatch_numbers\Fahimeh_CNNED_general_ExpertSystemwApplication\tbl_file_name\TrainingSet_final_segments
+    AF_trial_Fahimeh_train = ['402','410']
+    AF_trial_Fahimeh_test = ['301', '302', '305', '306', '307', '310', '311', 
+                             '312', '318', '319', '320', '321', '322', '324', 
+                             '325', '327', '329', '400', '406', '407', '409',
+                             '414']
+    AF_trial_Fahimeh_did_not_use = ['405', '413', '415', '416', '420', '421', '422', '423']
+    AF_trial_paroxysmal_AF = ['408','419']
+    
+    AF_trial_train = AF_trial_Fahimeh_train
+    AF_trial_test = AF_trial_Fahimeh_test
+    AF_trial_unlabeled = AF_trial_Fahimeh_did_not_use + AF_trial_paroxysmal_AF
+    print(f'AF trial: {len(AF_trial_train)} training subjects {AF_trial_train}')
+    print(f'AF trial: {len(AF_trial_test)} testing subjects {AF_trial_test}')
+    print(f'AF trial: {len(AF_trial_unlabeled)} unlabeled subjects {AF_trial_unlabeled}')
+  
+    # =================================
+    # === Clinical trial AF subjects separation ===
+    clinical_trial_AF_subjects = ['005', '017', '026', '051', '075', '082']
+    
+    # Filter out AF trial and 0-segment UIDs
+    remaining_UIDs = []
+    count_NSR = []
+    
+    for index, row in df_summary.iterrows():
+        UID = row['UID']
+        this_NSR = row['sample_nonAF']
+        if math.isnan(row['sample_nonAF_ratio']): # sample_nonAF is never NaN, sample_nonAF_ratio may be NaN
+            # There is no segment in this subject, skip this UID.
+            print(f'---------UID {UID} has no segments.------------')
+            continue # If a UID has no segments, skip the rest of the for loop for this index, row
+        if UID not in AF_trial_train and UID not in AF_trial_test and UID not in clinical_trial_AF_subjects \
+            and UID[0] != '3' and UID[0] != '4':
+            remaining_UIDs.append(UID)
+            count_NSR.append(this_NSR)
+    
+    # From the candidate UIDs, select a subset to be used for training, validation, and testing
+    random.seed(seed=42)
+    
+    list_of_candidates = remaining_UIDs
+    number_of_items_to_pick = round(len(list_of_candidates) * 0.25) # 15% labeled for training, 10% for testing.
+    sum_NSR = sum(count_NSR)
+    
+    # probability_distribution = [x/sum_NSR for x in count_NSR] # Proportion of total NSR segments for each UID
+    probability_distribution = [(1-x/sum_NSR)/ (len(count_NSR)-1) for x in count_NSR] # Subjects with fewer segments have higher chance to be selected.
+    draw = choice(list_of_candidates, number_of_items_to_pick,
+                  p=probability_distribution, replace=False)
+    
+    # Ensures that training set contains both AF and non-AF
+    clinical_trial_train_nonAF = list(draw[:round(len(list_of_candidates) * 0.12)]) # Draws the first X number of candidates equal to 7% of the total list of candidates
+    clinical_trial_train_temp = clinical_trial_train_nonAF + clinical_trial_AF_subjects[:round(len(clinical_trial_AF_subjects)/2)]
+    clinical_trial_train = []
+    
+    for UID in clinical_trial_train_temp:
+        # UID 051 and 108 and maybe other UIDs had no segments (unknown reason).
+        if UID in all_UIDs:
+            clinical_trial_train.append(UID) # Only use the UIDs that are in the summary to test
+    
+    # Ensures that the testing set contains both AF and non-AF
+    clinical_trial_test_nonAF = list(draw[round(len(list_of_candidates) * 0.12):]) # Draws the remaining candidates
+    clinical_trial_test_temp = clinical_trial_test_nonAF + clinical_trial_AF_subjects[round(len(clinical_trial_AF_subjects)/2):]
+    clinical_trial_test = []
+    for UID in clinical_trial_test_temp:
+        # UID 051 and 108 and maybe other UIDs had no segments (unknown reason).
+        if UID in all_UIDs:
+            clinical_trial_test.append(UID) # Only use the UIDs that are in the summary to test
+    
+    # Uses all remaining subset of UIDs from original list not used in training or validating for testing
+    clinical_trial_unlabeled = []
+    for UID in remaining_UIDs: # Changed from all_UIDs to remove UIDs with 0 segments (i.e. UID 108)
+        if UID not in clinical_trial_train and UID not in clinical_trial_test and UID[0] != '3' and UID[0] != '4':
+            clinical_trial_unlabeled.append(UID)
+    
+    # Sum up to 74 UIDs, all of the ones that do not start with '3' or '4' and dropping UID 108 which has 0 segments
+    print(f'Clinical trial: selected {len(clinical_trial_train)} UIDs for training {clinical_trial_train}') # Contains both non-AF and AF clinical trial subjects
+    print(f'Clinical trial: selected {len(clinical_trial_test)} UIDs for testing {clinical_trial_test}') # Contains both non-AF and AF clinical trial subjects
+    print(f'Clinical trial: selected {len(clinical_trial_unlabeled)} UIDs for unlabeled {clinical_trial_unlabeled}') # All remaining clinical trial subjects...probably contains both AF and non-AF
+    
+    # Used to make sure the model runs correctly
+    clinical_trial_train = ['063','416','005'] # Training
+    clinical_trial_test = ['058','409','054'] # Evaluation
+    clinical_trial_unlabeled = ['029','036','421'] # Testing
+    
+    return clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled
+
+
+class CustomDataset(Dataset):
+    def __init__(self, data_path, labels_path, UIDs, standardize=True, data_format='csv', read_all_labels=False, 
+                 start_idx=0, img_channels=1, img_size=128, downsample=None, data_type=torch.float32, is_tfs=True, binary=False):
+        self.data_path = data_path
+        self.labels_path = labels_path
+        self.UIDs = UIDs
+        self.standardize = standardize
+        self.data_format = data_format
+        self.read_all_labels = read_all_labels
+        self.transforms = ToTensor()
+        self.start_idx = start_idx  # Initial batch index to start from, useful for resuming training
+        self.img_channels = img_channels
+        self.img_size = img_size
+        self.downsample = downsample
+        self.is_tfs = is_tfs
+        self.binary = binary
+        
+        # Must be manually set so that the image resolution chosen is the one that is returned
+        self.dtype = data_type
+        
+        self.refresh_dataset()
+
+    def refresh_dataset(self):
+        self.segment_names, self.labels = self.extract_segment_names_and_labels()
+
+    def add_uids(self, new_uids):
+        unique_new_uids = [uid for uid in new_uids if uid not in self.UIDs] # Appends any unqiue new UID in self.UIDs to unique_new_uids
+        self.UIDs.extend(unique_new_uids) # Appends unique_new_uids to UIDs
+        self.refresh_dataset()
+
+    def __len__(self): # Method is implicitly called when len() is used on an instance of CustomDataset
+        return len(self.segment_names)
+
+    def save_checkpoint(self, checkpoint_path): # Likely not worth using, simply use the save_checkpoint() function in train_func.py 
+        # Enhanced to automatically include 'start_idx' in the checkpoint
+        checkpoint = {
+            'segment_names': self.segment_names,
+            'labels': self.labels,
+            'UIDs': self.UIDs,
+            'start_idx': self.start_idx  # Now also saving start_idx
+        }
+        torch.save(checkpoint, checkpoint_path) #  Using standard Python methods like pickle or json is generally recommended for dictionaries, there are no benefits for using torch.save, no real harm either
+
+    def load_checkpoint(self, checkpoint_path): # Reloads where you started off last time (not where you ended), just use analogous function in train_func.py
+        checkpoint = torch.load(checkpoint_path)
+        self.segment_names = checkpoint['segment_names'] # Seems redundant since it is overwritten by refresh_dataset()
+        self.labels = checkpoint['labels'] # Seems redundant since it is overwritten by refresh_dataset()
+        self.UIDs = checkpoint['UIDs']
+        # Now also loading and setting start_idx from checkpoint
+        self.start_idx = checkpoint.get('start_idx', 0) # Returns 0 if no start_idx found
+        self.refresh_dataset()
+
+    def __getitem__(self, idx): # Method is implicitly called when getitem() is used on an instance of CustomDataset. It is called batch_size number of times per iteration of dataloader | Loads segments as needed (lazy loading)
+        actual_idx = (idx + self.start_idx) % len(self.segment_names)  # Adjust index based on start_idx and wrap around if needed (i.e. index falls out of bounds)
+        segment_name = self.segment_names[actual_idx]
+        label = self.labels[segment_name]
+
+        if hasattr(self, 'all_data') and actual_idx < len(self.all_data): # When Luis uses adds data to train_loader in main_checkpoints.py, 
+        # new data is added (creating all_data) only after train_loader is created with its original training data. This means that if self.all_data
+        # exists, then __getitem__ is only be called in order to retrieve data newly added to train_loader in all_data
+            time_freq_tensor = self.all_data[actual_idx]
+        else:
+            time_freq_tensor = self.load_data(segment_name)
+
+        return {'data': time_freq_tensor, 'label': label, 'segment_name': segment_name}
+    
+        # When iterating over the dataloader, which returns batches of data, each batch will contain a dictionary with keys corresponding to the data and labels.
+
+        # Since the dataloader's dataset's __getitem__ method returns a dictionary with keys 'data', 'label', and 'segment_name', the returned batch will be a dictionary where:
+
+        # The 'data' key will correspond to a tensor of shape (batch_size, ...), representing the shape of the data.
+        # The 'label' key will correspond to a tensor of shape (batch_size, ...), representing the shape of the labels.
+        # The 'segment_name' key will correspond to a tensor of shape (batch_size, ...), representing the shape of the segment_name.
+
+    def set_start_idx(self, index):
+        self.start_idx = index
+      
+    def add_data_label_pair(self, data, label):
+        # Assign a unique ID or name for the new data
+        new_id = len(self.segment_names)
+        segment_name = f"new_data_{new_id}"
+
+        # Append the new data and label
+        self.segment_names.append(segment_name)
+        self.labels[segment_name] = label
+
+        # Append the new data tensor to an attribute that holds all of the newly added data
+        if hasattr(self, 'all_data'):
+            self.all_data.append(data)
+        else:
+            self.all_data = [data]
+            
+    # def extract_segment_names_and_labels(self):
+    #     segment_names = []
+    #     labels = {}
+
+    #     for UID in self.UIDs:
+    #         label_file = os.path.join(self.labels_path, UID + "_final_attemp_4_1_Dong.csv")
+    #         if os.path.exists(label_file):
+    #             # label_data = pd.read_csv(label_file, sep=',', header=0, names=['segment', 'label']) # Replaces the original headers with names
+                
+    #             # Use PyArrow to read csv
+    #             parse_options = csv.ParseOptions(delimiter=',') # Indicate delimiter
+    #             read_options = csv.ReadOptions(column_names=['segment', 'label'], skip_rows=1) # Assign desired column names and skip the first row (headers)
+    #             label_data = csv.read_csv(label_file, parse_options=parse_options, read_options=read_options)
+    #             label_data = label_data.to_pandas()
+                
+    #             label_segment_names = label_data['segment'].apply(lambda x: x.split('.')[0]) # Splits each segment name by '.' and retrieves the first part
+    #             for idx, segment_name in enumerate(label_segment_names): # enumerate() returns the value and corresponding index of each element in an iterable
+    #                 label_val = label_data['label'].values[idx]
+    #                 # Will only use NSR (0), AF (1), and PAC/PVC(2) and not SVT (3)
+    #                 if self.read_all_labels: # If reading all labels, set all labels not 0, 1, or 2 to -1 and return all labels
+    #                     # Assign -1 if label is not in [0, 1, 2]
+    #                     labels[segment_name] = label_val if label_val in [0, 1, 2] else -1
+    #                     if segment_name not in segment_names:
+    #                         segment_names.append(segment_name)
+    #                 else:
+    #                     # Only add segments with labels in [0, 1, 2]
+    #                     if label_val in [0, 1, 2] and segment_name not in segment_names:
+    #                         segment_names.append(segment_name)
+    #                         labels[segment_name] = label_val # Extracts the labels of the segments retrieved into a dictionary
+
+    #     # # Since shuffle=False for the dataloader in preprocess_data(), this is my work around for that while allowing for checkpointing
+    #     # random.seed(seed=42)
+    #     # random.shuffle(segment_names) # Will not affect the labels since the labels are in a dictionary
+
+    #     return segment_names, labels
+    
+    
+    def extract_segment_names_and_labels(self): # Only extract the segments and labels of a particular class, temporary solution
+        segment_names = []
+        labels = {}
+        
+        # If a subject is not loading and there are no errors, just these lists
+        uid_nsr = ['011', '014', '041', '050', '056', '058', '083', '106', '109',
+                   '037', '047', '055', '074', '091', '098', '101', '119', '325',
+                   '003', '012', '020', '024', '027', '030', '035', '036', '044', '049', '057', '063', '077', '084', '088', '094', '099', '111', '118']
+        uid_af = ['305', '307', '311', '318', '320', '322', '405', '415', '423',
+                  '301', '319', '321', '324', '329', '400', '406', '409', '416',
+                  '017', '302', '306', '310', '312', '402', '407', '410', '413', '414', '420', '421', '422']
+        uid_pacpvc = ['007', '022', '028', '038', '054', '068', '075', '086', '087', '093', '120', '327',
+                      '002', '005', '013', '021', '026', '029', '045', '073', '089', '100', '112', '408',
+                      '039', '042', '052', '053', '064', '069', '070', '078', '080', '090', '104', '110', '113', '419']
+    
+        for UID in self.UIDs:
+            label_file = os.path.join(self.labels_path, UID + "_final_attemp_4_1_Dong.csv")
+            if os.path.exists(label_file):
+                # label_data = pd.read_csv(label_file, sep=',', header=0, names=['segment', 'label']) # Replaces the original headers with names
+                
+                # Use PyArrow to read csv
+                parse_options = csv.ParseOptions(delimiter=',') # Indicate delimiter
+                read_options = csv.ReadOptions(column_names=['segment', 'label'], skip_rows=1) # Assign desired column names and skip the first row (headers)
+                label_data = csv.read_csv(label_file, parse_options=parse_options, read_options=read_options)
+                label_data = label_data.to_pandas()
+                
+                label_segment_names = label_data['segment'].apply(lambda x: x.split('.')[0]) # Splits each segment name by '.' and retrieves the first part
+                for idx, segment_name in enumerate(label_segment_names): # enumerate() returns the value and corresponding index of each element in an iterable
+                    label_val = label_data['label'].values[idx]
+                    # Will only use NSR (0), AF (1), and PAC/PVC(2) and not SVT (3)
+                    if self.read_all_labels: # If reading all labels, set all labels not 0, 1, or 2 to -1 and return all labels
+                        # Assign -1 if label is not in [0, 1, 2]
+                        labels[segment_name] = label_val if label_val in [0, 1, 2] else -1
+                        if segment_name not in segment_names:
+                            segment_names.append(segment_name)
+                    else:
+                        # Only add segments with labels in [0, 1, 2]
+                        if label_val in [0, 1, 2] and segment_name not in segment_names:
+                            # Temporary solution to ensure only segments of a particular class are loaded for each UID
+                            if UID in uid_nsr and label_val == 0:
+                                segment_names.append(segment_name)
+                                labels[segment_name] = label_val
+                            elif UID in uid_af and label_val == 1:
+                                segment_names.append(segment_name)
+                                labels[segment_name] = label_val
+                            elif UID in uid_pacpvc and label_val == 2:
+                                segment_names.append(segment_name)
+                                if self.binary:
+                                    labels[segment_name] = 0
+                                else:
+                                    labels[segment_name] = label_val
+
+        return segment_names, labels
+    
+
+    def load_data(self, segment_name):
+        data_path_UID = os.path.join(self.data_path, segment_name.split('_')[0])
+        if self.is_tfs:
+            seg_path = os.path.join(data_path_UID, segment_name + '_filt_STFT.' + self.data_format)
+        else:
+            seg_path = os.path.join(data_path_UID, segment_name + '_density_poincare.' + self.data_format)
+            
+
+        try: # Allows to define a block of code to be executed and specify how to handle any errors that might occur during its execution
+            if self.data_format == 'csv' and seg_path.endswith('.csv'):
+                # time_freq_plot = np.array(pd.read_csv(seg_path, header=None))
+                
+                # Use PyArrow to read csv
+                read_options = csv.ReadOptions(autogenerate_column_names=True)
+                seg_data = csv.read_csv(seg_path, read_options=read_options)
+                time_freq_plot = seg_data.to_pandas().to_numpy()
+                
+                time_freq_tensor = torch.tensor(time_freq_plot).reshape(self.img_channels, self.img_size, self.img_size)
+            elif self.data_format == 'png' and seg_path.endswith('.png'):
+                img = Image.open(seg_path)
+                img_data = np.array(img)
+                time_freq_tensor = torch.tensor(img_data).unsqueeze(0)
+            elif self.data_format == 'pt' and seg_path.endswith('.pt'):
+                time_freq_tensor = torch.load(seg_path)
+            else:
+                raise ValueError("Unsupported file format")
+
+            if self.downsample is not None:
+                # Downsample the image
+                # Use OpenCV to resize the array to downsample x downsample using INTER_AREA interpolation
+                time_freq_array = cv2.resize(np.array(time_freq_tensor.reshape(self.img_size, self.img_size).to('cpu')), (self.downsample, self.downsample), interpolation=cv2.INTER_AREA)
+                time_freq_tensor = torch.tensor(time_freq_array, dtype=self.dtype).reshape(self.img_channels, self.downsample, self.downsample)
+            else:
+                time_freq_tensor = time_freq_tensor.reshape(self.img_channels, self.img_size, self.img_size).to(self.dtype)
+
+            if self.standardize:
+                time_freq_tensor = self.standard_scaling(time_freq_tensor) # Standardize the data
+            
+            return time_freq_tensor
+
+        except Exception as e:
+            print(f"Error processing segment: {segment_name}. Exception: {str(e)}")
+            if self.downsample is not None:
+                return torch.zeros((self.img_channels, self.downsample, self.downsample))  # Return zeros in case of an error
+            else:
+                return torch.zeros((self.img_channels, self.img_size, self.img_size))  # Return zeros in case of an error
+
+    def standard_scaling(self, data):
+        scaler = StandardScaler()
+        data = scaler.fit_transform(data.reshape(-1, data.shape[-1])).reshape(data.shape) # Converts data into 2D array, standardizes it, reshapes it back into 3D (1,X,X)
+        return torch.tensor(data, dtype=self.dtype)
+
+def load_data_split_batched(data_path, labels_path, UIDs, batch_size, standardize=False, data_format='csv', 
+                            read_all_labels=False, drop_last=False, num_workers=4, start_idx=0, 
+                            img_channels=1, img_size=128, downsample=None, data_type=torch.float32, is_tfs=True, binary=False):
+    torch.manual_seed(42)
+    g = torch.Generator()
+    g.manual_seed(42)
+    
+    pin_memory = False
+    if torch.cuda.is_available():
+        pin_memory = True
+    
+    dataset = CustomDataset(data_path, labels_path, UIDs, standardize, data_format, read_all_labels, start_idx=start_idx, 
+                            img_channels=img_channels, img_size=img_size, downsample=downsample, data_type=data_type, is_tfs=is_tfs, binary=binary)
+    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=drop_last, num_workers=num_workers, prefetch_factor=2, persistent_workers=True, pin_memory=pin_memory, worker_init_fn=seed_worker, generator=g) # Prefetches 2 batches ahead of current training iteration (allows loading of data simultaneously with training). Shuffle is set to False to resume training at a specific batch.
+    return dataloader
+
+def seed_worker(worker_id):
+    worker_seed = torch.initial_seed() % 2**32
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)
+
+# Function to extract and preprocess data
+def preprocess_data(data_format, clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled, batch_size, standardize=False,
+                    read_all_labels=False, img_channels=1, img_size=128, downsample=None, data_type=torch.float32, pathmaster=None, binary=False):
+    start_idx = 0
+    data_path, labels_path = pathmaster.data_paths(data_format)
+    
+    if data_format == 'csv':
+        num_workers = 6
+    elif data_format == 'pt':
+        num_workers = 8
+    
+    train_loader = load_data_split_batched(data_path, labels_path, clinical_trial_train, batch_size, standardize=standardize, 
+                                           data_format=data_format, read_all_labels=read_all_labels, num_workers=num_workers,
+                                           start_idx=start_idx, img_channels=img_channels, img_size=img_size, downsample=downsample,
+                                           data_type=data_type, is_tfs=pathmaster.is_tfs, binary=binary)
+    val_loader = load_data_split_batched(data_path, labels_path, clinical_trial_test, batch_size, standardize=standardize, 
+                                         data_format=data_format, read_all_labels=read_all_labels, num_workers=num_workers, 
+                                         start_idx=start_idx, img_channels=img_channels, img_size=img_size, downsample=downsample,
+                                         data_type=data_type, is_tfs=pathmaster.is_tfs, binary=binary)
+    test_loader = load_data_split_batched(data_path, labels_path, clinical_trial_unlabeled, batch_size, standardize=standardize, 
+                                          data_format=data_format, read_all_labels=read_all_labels, num_workers=num_workers,
+                                          start_idx=start_idx, img_channels=img_channels, img_size=img_size, downsample=downsample,
+                                          data_type=data_type, is_tfs=pathmaster.is_tfs, binary=binary)
+    return train_loader, val_loader, test_loader
+
+def map_samples_to_uids(uncertain_sample_indices, dataset):
+    """
+    Maps indices of uncertain samples back to their corresponding segment names or UIDs.
+
+    Args:
+    - uncertain_sample_indices: Indices of the uncertain samples in the dataset.
+    - dataset: The dataset object which contains the mapping of segment names and UIDs.
+
+    Returns:
+    - List of UIDs or segment names corresponding to the uncertain samples.
+    """
+    return [dataset.segment_names[i] for i in uncertain_sample_indices]
+
+def update_train_loader_with_labeled_samples(current_train_loader, labeled_samples, batch_size): # Luis' doesn't seem to use this
+    """
+    Updates the training DataLoader with newly labeled samples.
+
+    Args:
+    - current_train_loader: The current DataLoader for the training set.
+    - labeled_samples: A list of tuples, each containing a data tensor and its new label.
+    - batch_size: Batch size for the DataLoader.
+
+    Returns:
+    - DataLoader: The updated DataLoader with the new labeled samples.
+    """
+    
+    # Extract the current dataset from the DataLoader
+    current_dataset = current_train_loader.dataset
+    
+    # Update the dataset with new samples and labels
+    for data_tensor, label in labeled_samples:
+        # Assuming the CustomDataset class has a method to add new data and labels
+        current_dataset.add_data_label_pair(data_tensor, label)
+
+    # Create a new DataLoader with the updated dataset
+    updated_train_loader = DataLoader(current_dataset, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=4, prefetch_factor=2)
+
+    return updated_train_loader
+
+def update_train_loader_with_uncertain_samples(current_train_loader, new_sample_indices, batch_size): # Luis' uses this method for active learning
+    # Extract current UIDs from the current_train_loader
+    current_dataset = current_train_loader.dataset
+    # Map new_samples back to their corresponding segment names or UIDs
+    new_uids = map_samples_to_uids(new_sample_indices, current_dataset)
+    # Add new UIDs to the current dataset and refresh it
+    current_dataset.add_uids(new_uids)
+    # Create new DataLoader with the updated dataset
+    updated_train_loader = DataLoader(current_dataset, batch_size=batch_size, shuffle=False)
+    return updated_train_loader
+        
+    
\ No newline at end of file
diff --git a/utils/dataloader_database.py b/utils/dataloader_database.py
new file mode 100644
index 0000000..c3ab6b1
--- /dev/null
+++ b/utils/dataloader_database.py
@@ -0,0 +1,223 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Feb 26 18:29:59 2024
+
+@author: dchen
+"""
+import os
+import numpy as np
+import pandas as pd
+from PIL import Image
+import torch
+from torch.utils.data import Dataset, DataLoader
+from sklearn.preprocessing import StandardScaler
+from torchvision.transforms import ToTensor
+import math
+from numpy import random
+from numpy.random import choice
+import cv2
+from pyarrow import csv
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Seeds
+torch.manual_seed(42)
+np.random.seed(42)
+random.seed(42)
+
+class CustomDataset(Dataset):
+    def __init__(self, data_path, labels_path, standardize=True, data_format='pt', start_idx=0, 
+                 img_channels=1, img_size=128, downsample=None, data_type=torch.float32, is_tfs=True, binary=False):
+        self.data_path = data_path
+        self.labels_path = labels_path
+        self.standardize = standardize
+        self.data_format = data_format
+        self.transforms = ToTensor()
+        self.start_idx = start_idx  # Initial batch index to start from, useful for resuming training
+        self.img_channels = img_channels
+        self.img_size = img_size
+        self.downsample = downsample
+        self.is_tfs = is_tfs
+        self.dtype = data_type
+        self.binary = binary
+        
+        self.refresh_dataset()
+
+
+    def refresh_dataset(self):
+        self.segment_names, self.labels = self.extract_segment_names_and_labels()
+
+
+    def __len__(self): # Method is implicitly called when len() is used on an instance of CustomDataset
+        return len(self.segment_names)
+
+
+    def __getitem__(self, idx): # Method is implicitly called when getitem() is used on an instance of CustomDataset. It is called batch_size number of times per iteration of dataloader | Loads segments as needed (lazy loading)
+        actual_idx = (idx + self.start_idx) % len(self.segment_names)  # Adjust index based on start_idx and wrap around if needed (i.e. index falls out of bounds)
+        segment_name = self.segment_names[actual_idx]
+        label = self.labels[segment_name]
+
+        data_tensor = self.load_data(segment_name)
+
+        return {'data': data_tensor, 'label': label, 'segment_name': segment_name}
+    
+        # When iterating over the dataloader, which returns batches of data, each batch will contain a dictionary with keys corresponding to the data and labels.
+
+        # Since the dataloader's dataset's __getitem__ method returns a dictionary with keys 'data', 'label', and 'segment_name', the returned batch will be a dictionary where:
+
+        # The 'data' key will correspond to a tensor of shape (batch_size, ...), representing the shape of the data.
+        # The 'label' key will correspond to a tensor of shape (batch_size, ...), representing the shape of the labels.
+        # The 'segment_name' key will correspond to a tensor of shape (batch_size, ...), representing the shape of the segment_name.
+
+    def set_start_idx(self, index):
+        self.start_idx = index
+      
+            
+    def extract_segment_names_and_labels(self): # Only extract the segments and labels of a particular class, temporary solution
+        segment_names = []
+        labels = {}
+        label_file = self.labels_path
+        if os.path.exists(label_file):                
+            # Use PyArrow to read csv
+            parse_options = csv.ParseOptions(delimiter=',') # Indicate delimiter
+            read_options = csv.ReadOptions(column_names=['segment_names', 'labels'], skip_rows=1) # Assign desired column names and skip the first row (headers)
+            label_data = csv.read_csv(label_file, parse_options=parse_options, read_options=read_options)
+            label_data = label_data.to_pandas()
+                
+            label_segment_names = label_data['segment_names']
+            for idx, segment_name in enumerate(label_segment_names): # enumerate() returns the value and corresponding index of each element in an iterable
+                label_val = label_data['labels'].values[idx]
+                
+                if self.binary and label_val == 2: # If binary is true, set all PAC/PVC to 0 (non-AF)
+                    label_val = 0
+                
+                segment_names.append(segment_name)
+                labels[segment_name] = label_val
+
+        return segment_names, labels
+    
+    
+    def second_to_last_directory_name(self, path):
+        # Normalize path separator to '/'
+        path = path.replace('\\', '/')
+
+        # Split the path into its components
+        components = path.split('/')
+
+        # Remove empty components
+        components = [c for c in components if c]
+
+        # Check if the path ends with a separator (indicating it's a directory)
+        if path.endswith('/'):
+            # Remove the last empty component
+            components.pop()
+
+        # If there's only one or zero directories in the path, return None
+        if len(components) <= 1:
+            return None
+
+        # Return the name of the second-to-last directory
+        return components[-2]
+
+
+    def load_data(self, segment_name):
+        seg_path = os.path.join(self.data_path, segment_name + '.' + self.data_format)
+            
+        try: # Allows to define a block of code to be executed and specify how to handle any errors that might occur during its execution
+            if self.data_format == 'csv' and seg_path.endswith('.csv'):
+                # data_plot = np.array(pd.read_csv(seg_path, header=None))
+                
+                # Use PyArrow to read csv
+                read_options = csv.ReadOptions(autogenerate_column_names=True)
+                seg_data = csv.read_csv(seg_path, read_options=read_options)
+                data_plot = seg_data.to_pandas().to_numpy()
+                
+                data_tensor = torch.tensor(data_plot).reshape(self.img_channels, self.img_size, self.img_size)
+            elif self.data_format == 'png' and seg_path.endswith('.png'):
+                img = Image.open(seg_path)
+                img_data = np.array(img)
+                data_tensor = torch.tensor(img_data).unsqueeze(0)
+            elif self.data_format == 'pt' and seg_path.endswith('.pt'):
+                data_tensor = torch.load(seg_path)
+            else:
+                raise ValueError("Unsupported file format")
+
+            if self.downsample is not None:
+                # Downsample the image
+                # Use OpenCV to resize the array to downsample x downsample using INTER_AREA interpolation
+                data_array = cv2.resize(np.array(data_tensor.reshape(self.img_size, self.img_size).to('cpu')), (self.downsample, self.downsample), interpolation=cv2.INTER_AREA)
+                data_tensor = torch.tensor(data_array, dtype=self.dtype).reshape(self.img_channels, self.downsample, self.downsample)
+            else:
+                data_tensor = data_tensor.reshape(self.img_channels, self.img_size, self.img_size).to(self.dtype)
+
+            if self.standardize:
+                data_tensor = self.standard_scaling(data_tensor) # Standardize the data
+            
+            return data_tensor
+
+        except Exception as e:
+            print(f"Error processing segment: {segment_name}. Exception: {str(e)}")
+            if self.downsample is not None:
+                return torch.zeros((self.img_channels, self.downsample, self.downsample))  # Return zeros in case of an error
+            else:
+                return torch.zeros((self.img_channels, self.img_size, self.img_size))  # Return zeros in case of an error
+
+    def standard_scaling(self, data):
+        scaler = StandardScaler()
+        data = scaler.fit_transform(data.reshape(-1, data.shape[-1])).reshape(data.shape) # Converts data into 2D array, standardizes it, reshapes it back into 3D (1,X,X)
+        return torch.tensor(data, dtype=self.dtype)
+
+def load_data_split_batched(data_path, labels_path, batch_size, standardize=False, data_format='csv', 
+                            drop_last=False, num_workers=4, start_idx=0, 
+                            img_channels=1, img_size=128, downsample=None, data_type=torch.float16, is_tfs=True, binary=False):
+    torch.manual_seed(42)
+    g = torch.Generator()
+    g.manual_seed(42)
+    
+    pin_memory = False
+    if torch.cuda.is_available():
+        pin_memory = True
+    
+    dataset = CustomDataset(data_path, labels_path, standardize, data_format, start_idx=start_idx, 
+                            img_channels=img_channels, img_size=img_size, downsample=downsample, data_type=data_type, is_tfs=is_tfs, binary=binary)
+    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=drop_last, num_workers=num_workers, prefetch_factor=2, persistent_workers=True, pin_memory=pin_memory, worker_init_fn=seed_worker, generator=g) # Prefetches 2 batches ahead of current training iteration (allows loading of data simultaneously with training). Shuffle is set to False to resume training at a specific batch.
+    return dataloader
+
+def seed_worker(worker_id):
+    worker_seed = torch.initial_seed() % 2**32
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)
+
+# Function to extract and preprocess data
+def preprocess_data(database, batch_size, standardize=False, img_channels=1, img_size=128, 
+                    downsample=None, data_type=torch.float32, pathmaster=None, binary=False):
+    start_idx = 0
+    
+    if database == 'DeepBeat' or database == 'deepbeat' or database == 'Deepbeat':
+        data_path, labels_path = pathmaster.deepbeat_paths()
+    elif database == 'MIMICIII' or database == 'mimiciii' or database == 'mimicIII' or database == 'mimic3':
+        data_path, labels_path = pathmaster.mimic3_paths()
+    elif database == 'Simband' or database == 'simband':
+        data_path, labels_path = pathmaster.simband_paths()
+    else:
+        print('Invalid Database')
+    
+    data_format = 'pt'
+    
+    num_workers = 1
+    
+    test_loader = load_data_split_batched(data_path, labels_path, batch_size, standardize=standardize, 
+                                           data_format=data_format, num_workers=num_workers,
+                                           start_idx=start_idx, img_channels=img_channels, img_size=img_size, downsample=downsample,
+                                           data_type=data_type, is_tfs=pathmaster.is_tfs, binary=binary)
+    # loader2 = load_data_split_batched(data_path, labels_path, batch_size, standardize=standardize, 
+    #                                      data_format=data_format, num_workers=num_workers, 
+    #                                      start_idx=start_idx, img_channels=img_channels, img_size=img_size, downsample=downsample,
+    #                                      data_type=data_type, is_tfs=pathmaster.is_tfs, binary=False)
+    # loader3 = load_data_split_batched(data_path, labels_path, batch_size, standardize=standardize, 
+    #                                       data_format=data_format, num_workers=num_workers,
+    #                                       start_idx=start_idx, img_channels=img_channels, img_size=img_size, downsample=downsample,
+    #                                       data_type=data_type, is_tfs=pathmaster.is_tfs, binary=False)
+    return test_loader # loader1, loader2, loader3
+        
+    
\ No newline at end of file
diff --git a/utils/dataloader_smote.py b/utils/dataloader_smote.py
new file mode 100644
index 0000000..9266028
--- /dev/null
+++ b/utils/dataloader_smote.py
@@ -0,0 +1,215 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Feb 26 18:29:59 2024
+
+@author: dchen
+"""
+import os
+import numpy as np
+import pandas as pd
+from PIL import Image
+import torch
+from torch.utils.data import Dataset, DataLoader
+from sklearn.preprocessing import StandardScaler
+from torchvision.transforms import ToTensor
+import math
+from numpy import random
+from numpy.random import choice
+import cv2
+from pyarrow import csv
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Seeds
+torch.manual_seed(42)
+np.random.seed(42)
+random.seed(42)
+
+class CustomDataset(Dataset):
+    def __init__(self, smote_path, groups, standardize=True, data_format='pt', start_idx=0, 
+                 img_channels=1, img_size=128, downsample=None, data_type=torch.float32, is_tfs=True):
+        self.smote_path = smote_path
+        self.standardize = standardize
+        self.data_format = data_format
+        self.transforms = ToTensor()
+        self.start_idx = start_idx  # Initial batch index to start from, useful for resuming training
+        self.img_channels = img_channels
+        self.img_size = img_size
+        self.downsample = downsample
+        self.is_tfs = is_tfs
+        self.groups = groups
+        self.dtype = data_type
+        
+        self.refresh_dataset()
+
+
+    def refresh_dataset(self):
+        self.segment_names, self.labels = self.extract_segment_names_and_labels()
+
+
+    def __len__(self): # Method is implicitly called when len() is used on an instance of CustomDataset
+        return len(self.segment_names)
+
+
+    def __getitem__(self, idx): # Method is implicitly called when getitem() is used on an instance of CustomDataset. It is called batch_size number of times per iteration of dataloader | Loads segments as needed (lazy loading)
+        actual_idx = (idx + self.start_idx) % len(self.segment_names)  # Adjust index based on start_idx and wrap around if needed (i.e. index falls out of bounds)
+        segment_name = self.segment_names[actual_idx]
+        label = self.labels[segment_name]
+
+        data_tensor = self.load_data(segment_name)
+
+        return {'data': data_tensor, 'label': label, 'segment_name': segment_name}
+    
+        # When iterating over the dataloader, which returns batches of data, each batch will contain a dictionary with keys corresponding to the data and labels.
+
+        # Since the dataloader's dataset's __getitem__ method returns a dictionary with keys 'data', 'label', and 'segment_name', the returned batch will be a dictionary where:
+
+        # The 'data' key will correspond to a tensor of shape (batch_size, ...), representing the shape of the data.
+        # The 'label' key will correspond to a tensor of shape (batch_size, ...), representing the shape of the labels.
+        # The 'segment_name' key will correspond to a tensor of shape (batch_size, ...), representing the shape of the segment_name.
+
+    def set_start_idx(self, index):
+        self.start_idx = index
+      
+            
+    def extract_segment_names_and_labels(self): # Only extract the segments and labels of a particular class, temporary solution
+        segment_names = []
+        labels = {}
+        
+        group_directories = [entry for entry in os.listdir(self.smote_path) if os.path.isdir(os.path.join(self.smote_path, entry))]
+        group = list(set(self.groups).intersection(set(group_directories)))[0]
+        
+        smote_type = self.second_to_last_directory_name(self.smote_path)
+        label_file = os.path.join(self.smote_path, smote_type + '_' + group + '_names_labels.csv')
+        if os.path.exists(label_file):                
+            # Use PyArrow to read csv
+            parse_options = csv.ParseOptions(delimiter=',') # Indicate delimiter
+            read_options = csv.ReadOptions(column_names=['segment_name', 'label'], skip_rows=1) # Assign desired column names and skip the first row (headers)
+            label_data = csv.read_csv(label_file, parse_options=parse_options, read_options=read_options)
+            label_data = label_data.to_pandas()
+                
+            label_segment_names = label_data['segment_name']
+            for idx, segment_name in enumerate(label_segment_names): # enumerate() returns the value and corresponding index of each element in an iterable
+                label_val = label_data['label'].values[idx]
+                segment_names.append(segment_name)
+                labels[segment_name] = label_val
+
+        return segment_names, labels
+    
+    
+    def second_to_last_directory_name(self, path):
+        # Normalize path separator to '/'
+        path = path.replace('\\', '/')
+
+        # Split the path into its components
+        components = path.split('/')
+
+        # Remove empty components
+        components = [c for c in components if c]
+
+        # Check if the path ends with a separator (indicating it's a directory)
+        if path.endswith('/'):
+            # Remove the last empty component
+            components.pop()
+
+        # If there's only one or zero directories in the path, return None
+        if len(components) <= 1:
+            return None
+
+        # Return the name of the second-to-last directory
+        return components[-2]
+
+
+    def load_data(self, segment_name):
+        data_path_group = os.path.join(self.smote_path, segment_name.split('_')[1])
+        seg_path = os.path.join(data_path_group, segment_name + '.' + self.data_format)
+            
+        try: # Allows to define a block of code to be executed and specify how to handle any errors that might occur during its execution
+            if self.data_format == 'csv' and seg_path.endswith('.csv'):
+                # data_plot = np.array(pd.read_csv(seg_path, header=None))
+                
+                # Use PyArrow to read csv
+                read_options = csv.ReadOptions(autogenerate_column_names=True)
+                seg_data = csv.read_csv(seg_path, read_options=read_options)
+                data_plot = seg_data.to_pandas().to_numpy()
+                
+                data_tensor = torch.tensor(data_plot).reshape(self.img_channels, self.img_size, self.img_size)
+            elif self.data_format == 'png' and seg_path.endswith('.png'):
+                img = Image.open(seg_path)
+                img_data = np.array(img)
+                data_tensor = torch.tensor(img_data).unsqueeze(0)
+            elif self.data_format == 'pt' and seg_path.endswith('.pt'):
+                data_tensor = torch.load(seg_path)
+            else:
+                raise ValueError("Unsupported file format")
+
+            if self.downsample is not None:
+                # Downsample the image
+                # Use OpenCV to resize the array to downsample x downsample using INTER_AREA interpolation
+                data_array = cv2.resize(np.array(data_tensor.reshape(self.img_size, self.img_size).to('cpu')), (self.downsample, self.downsample), interpolation=cv2.INTER_AREA)
+                data_tensor = torch.tensor(data_array, dtype=self.dtype).reshape(self.img_channels, self.downsample, self.downsample)
+            else:
+                data_tensor = data_tensor.reshape(self.img_channels, self.img_size, self.img_size).to(self.dtype)
+
+            if self.standardize:
+                data_tensor = self.standard_scaling(data_tensor) # Standardize the data
+            
+            return data_tensor
+
+        except Exception as e:
+            print(f"Error processing segment: {segment_name}. Exception: {str(e)}")
+            if self.downsample is not None:
+                return torch.zeros((self.img_channels, self.downsample, self.downsample))  # Return zeros in case of an error
+            else:
+                return torch.zeros((self.img_channels, self.img_size, self.img_size))  # Return zeros in case of an error
+
+    def standard_scaling(self, data):
+        scaler = StandardScaler()
+        data = scaler.fit_transform(data.reshape(-1, data.shape[-1])).reshape(data.shape) # Converts data into 2D array, standardizes it, reshapes it back into 3D (1,X,X)
+        return torch.tensor(data, dtype=self.dtype)
+
+def load_data_split_batched(smote_path, groups, batch_size, standardize=False, data_format='csv', 
+                            drop_last=False, num_workers=4, start_idx=0, 
+                            img_channels=1, img_size=128, downsample=None, data_type=torch.float32, is_tfs=True):
+    torch.manual_seed(42)
+    g = torch.Generator()
+    g.manual_seed(42)
+    
+    pin_memory = False
+    if torch.cuda.is_available():
+        pin_memory = True
+    
+    dataset = CustomDataset(smote_path, groups, standardize, data_format, start_idx=start_idx, 
+                            img_channels=img_channels, img_size=img_size, downsample=downsample, data_type=data_type, is_tfs=is_tfs)
+    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=drop_last, num_workers=num_workers, prefetch_factor=2, persistent_workers=True, pin_memory=pin_memory, worker_init_fn=seed_worker, generator=g) # Prefetches 2 batches ahead of current training iteration (allows loading of data simultaneously with training). Shuffle is set to False to resume training at a specific batch.
+    return dataloader
+
+def seed_worker(worker_id):
+    worker_seed = torch.initial_seed() % 2**32
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)
+
+# Function to extract and preprocess data
+def preprocess_data(smote_type, split, batch_size, standardize=False, img_channels=1, img_size=128, 
+                    downsample=None, data_type=torch.float32, pathmaster=None):
+    start_idx = 0
+    smote_path = pathmaster.smote_path(smote_type, split)
+    data_format = 'pt'
+    
+    num_workers = 8
+    
+    loader1 = load_data_split_batched(smote_path, ['fold1', 'train'], batch_size, standardize=standardize, 
+                                           data_format=data_format, num_workers=num_workers,
+                                           start_idx=start_idx, img_channels=img_channels, img_size=img_size, downsample=downsample,
+                                           data_type=data_type, is_tfs=pathmaster.is_tfs)
+    loader2 = load_data_split_batched(smote_path, ['fold2', 'validate'], batch_size, standardize=standardize, 
+                                         data_format=data_format, num_workers=num_workers, 
+                                         start_idx=start_idx, img_channels=img_channels, img_size=img_size, downsample=downsample,
+                                         data_type=data_type, is_tfs=pathmaster.is_tfs)
+    loader3 = load_data_split_batched(smote_path, ['test', 'test'], batch_size, standardize=standardize, 
+                                          data_format=data_format, num_workers=num_workers,
+                                          start_idx=start_idx, img_channels=img_channels, img_size=img_size, downsample=downsample,
+                                          data_type=data_type, is_tfs=pathmaster.is_tfs)
+    return loader1, loader2, loader3
+        
+    
\ No newline at end of file
diff --git a/utils/get_paths.py b/utils/get_paths.py
new file mode 100644
index 0000000..b22752e
--- /dev/null
+++ b/utils/get_paths.py
@@ -0,0 +1,154 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Feb 27 14:55:43 2024
+
+@author: dchen
+"""
+import os
+
+def data_paths(data_format, is_linux=False, is_hpc=False):
+    if is_linux:
+        base_path = "/mnt/r/ENGR_Chon/Dong/MATLAB_generate_results/NIH_PulseWatch"
+        labels_base_path = "/mnt/r/ENGR_Chon/NIH_Pulsewatch_Database/Adjudication_UConn"
+        saving_base_path = "/mnt/r/ENGR_Chon/Darren/Honors_Thesis/saves/analysis"
+    elif is_hpc:
+        base_path = "/gpfs/scratchfs1/kic14002/doh16101"
+        labels_base_path = "/gpfs/scratchfs1/hfp14002/lrm22005"
+        saving_base_path = "/gpfs/scratchfs1/hfp14002/dac20022/Honors_Thesis/saves/analysis"
+    else:
+        # R:\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch
+        base_path = r"R:\ENGR_Chon\Dong\MATLAB_generate_results\\NIH_PulseWatch" # Why double \\ before NIH_Pulsewatch_Database?
+        labels_base_path = r"R:\ENGR_Chon\\NIH_Pulsewatch_Database\Adjudication_UConn" # Why double \\ before NIH_Pulsewatch_Database?
+        saving_base_path = r"\\grove.ad.uconn.edu\research\ENGR_Chon\Darren\Honors_Thesis\saves" # Only when writing to file in the R drive do we need the entire address for the R drive
+    if data_format == 'csv':
+        data_path = os.path.join(base_path, "TFS_csv")
+        labels_path = os.path.join(labels_base_path, "final_attemp_4_1_Dong_Ohm")
+        saving_path = os.path.join(saving_base_path, "analysis")
+    elif data_format == 'png':
+        data_path = os.path.join(base_path, "TFS_plots")
+        labels_path = os.path.join(labels_base_path, "final_attemp_4_1_Dong_Ohm")
+        saving_path = os.path.join(saving_base_path, "analysis")
+    elif data_format == 'pt':
+        data_path = os.path.join(base_path, "PT_format")
+        labels_path = os.path.join(labels_base_path, "final_attemp_4_1_Dong_Ohm")
+        saving_path = os.path.join(saving_base_path, "analysis")
+    else:
+        raise ValueError("Invalid data format. Choose 'csv', 'png, or 'pt'.")
+    
+    return data_path, labels_path, saving_path
+
+
+def models_path(is_linux=False, is_hpc=False):
+    if is_linux:
+        models_path = "/mnt/r/ENGR_Chon/Darren/Honors_Thesis/models"
+    elif is_hpc:
+        models_path = "/gpfs/scratchfs1/hfp14002/dac20022/Honors_Thesis/models"
+    else:
+        models_path = r"\\grove.ad.uconn.edu\research\ENGR_Chon\Darren\Honors_Thesis\models"
+    
+    return models_path
+
+# Base saving paths
+focus = 'misc'
+# focus = '2_layers_per_block'
+# focus = '2_layers_per_block'
+linux_saves_path = '/mnt/r/ENGR_Chon/Darren/Honors_Thesis/saves/' + focus + '/'
+hpc_saves_path = '/gpfs/scratchfs1/hfp14002/dac20022/Honors_Thesis/saves/' + focus + '/'
+saves_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Darren\Honors_Thesis\saves' + '\\' + focus + '\\'
+
+def losslists_path(is_linux=False, is_hpc=False):
+    if is_linux:
+        losslists_path = linux_saves_path + 'losslists'
+    elif is_hpc:
+        losslists_path = hpc_saves_path + 'losslists'
+    else:
+        losslists_path = saves_path + 'losslists'
+    
+    return losslists_path
+
+
+def runtime_lists_path(is_linux=False, is_hpc=False):
+    if is_linux:
+        runtime_lists_path = linux_saves_path + 'runtime_lists'
+    elif is_hpc:
+        runtime_lists_path = hpc_saves_path + 'runtime_lists'
+    else:
+        runtime_lists_path = saves_path + 'runtime_lists'
+    
+    return runtime_lists_path
+
+
+def predictions_path(is_linux=False, is_hpc=False):
+    if is_linux:
+        predictions_path = linux_saves_path + 'predictions'
+    elif is_hpc:
+        predictions_path = hpc_saves_path + 'predictions'
+    else:
+        predictions_path = saves_path + 'predictions'
+    
+    return predictions_path
+
+def prediction_proba_path(is_linux=False, is_hpc=False):
+    if is_linux:
+        prediction_proba_path = linux_saves_path + 'prediction_proba'
+    elif is_hpc:
+        prediction_proba_path = hpc_saves_path + 'prediction_proba'
+    else:
+        prediction_proba_path = saves_path + 'prediction_proba'
+    
+    return prediction_proba_path
+
+
+def metrics_path(is_linux=False, is_hpc=False):
+    if is_linux:
+        metrics_path = linux_saves_path + 'metrics'
+    elif is_hpc:
+        metrics_path = hpc_saves_path + 'metrics'
+    else:
+        metrics_path = saves_path + 'metrics'
+   
+    return metrics_path
+
+
+def confusion_matrices_path(is_linux=False, is_hpc=False):
+    if is_linux:
+        confusion_matrices_path = linux_saves_path + 'confusion_matrices'
+    elif is_hpc:
+        confusion_matrices_path = hpc_saves_path + 'confusion_matrices'
+    else:
+        confusion_matrices_path = saves_path + 'confusion_matrices'
+   
+    return confusion_matrices_path
+
+
+def checkpoints_path(is_linux=False, is_hpc=False):
+    if is_linux:
+        checkpoints_path = linux_saves_path + 'checkpoints'
+    elif is_hpc:
+        checkpoints_path = hpc_saves_path + 'checkpoints'
+    else:
+        checkpoints_path = saves_path + 'checkpoints'
+   
+    return checkpoints_path
+
+def hyperparameters_path(is_linux=False, is_hpc=False):
+    if is_linux:
+        hyperparameters_path = linux_saves_path + 'hyperparameters'
+    elif is_hpc:
+        hyperparameters_path = hpc_saves_path + 'hyperparameters'
+    else:
+        hyperparameters_path = saves_path + 'hyperparameters'
+   
+    return hyperparameters_path
+
+def loss_curves_path(is_linux=False, is_hpc=False):
+    if is_linux:
+        loss_curves_path = linux_saves_path + 'loss_curves'
+    elif is_hpc:
+        loss_curves_path = hpc_saves_path + 'loss_curves'
+    else:
+        loss_curves_path = saves_path + 'loss_curves'
+   
+    return loss_curves_path
+
+
diff --git a/utils/misc_func.py b/utils/misc_func.py
new file mode 100644
index 0000000..6893a71
--- /dev/null
+++ b/utils/misc_func.py
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Mar  3 03:56:36 2024
+
+@author: dchen
+"""
+
+def substring_between_strings(main_string, start_string, end_string):
+    start_index = main_string.find(start_string)
+    if start_index == -1:
+        return None
+    
+    end_index = main_string.find(end_string, start_index + len(start_string))
+    if end_index == -1:
+        return None
+
+    return main_string[start_index + len(start_string):end_index]
+
+
+def string_to_boolean(input_string):
+    if input_string.lower() in ['true', 't', 'yes', 'y', '1']:
+        return True
+    elif input_string.lower() in ['false', 'f', 'no', 'n', '0']:
+        return False
+    else:
+        raise ValueError("String does not represent a boolean value")
diff --git a/utils/model_func.py b/utils/model_func.py
new file mode 100644
index 0000000..95f19de
--- /dev/null
+++ b/utils/model_func.py
@@ -0,0 +1,2145 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Feb 26 14:58:20 2024
+
+@author: dchen
+"""
+
+import os
+import sys
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+from tqdm import tqdm
+import random
+import time
+import torch.autograd as autograd
+from torch.cuda.amp import autocast, GradScaler
+
+# Import my own functions and classes
+# from utils import get_paths
+from utils import plot_save_func
+from models.densenet import DenseNet3 as DenseNet
+from models.densenet_configurable import DenseNet as DenseNet_config
+
+# If GPU is available, use GPU, else use CPU
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Seeds
+torch.manual_seed(42)
+np.random.seed(42)
+random.seed(42)
+
+
+def cross_val_2fold_DenseNet(model_hyperparameters, fold1_loader, fold2_loader, model_type=torch.float32, 
+                            n_epochs=100, n_classes=3, patience=10, save=False, 
+                            resume_checkpoint_path=None, pathmaster=None):
+     # If GPU is available, use GPU, else use CPU
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    
+    # Optimizer and scheduler hyperparameters
+    lr = 0.0001
+    
+    # Define img_channels
+    img_channels = 1
+    
+    # Resume checkpoint if specified
+    if resume_checkpoint_path is not None and os.path.exists(resume_checkpoint_path):        
+        # Load model hyperparameters
+        depth, growth_rate, compression, bottleneck, drop_rate, class_weights = load_hyperparameters(pathmaster)
+        
+        # Define DenseNet model based on loaded hyperparameters
+        model_fold1 = DenseNet(img_channels=img_channels, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                         compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device, dtype=model_type)
+        model_fold2 = DenseNet(img_channels=img_channels, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                         compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device, dtype=model_type)
+        
+        # Create optimizer and scheduler
+        optimizer_fold1 = torch.optim.Adam(model_fold1.parameters(), lr=lr)
+        optimizer_fold2 = torch.optim.Adam(model_fold2.parameters(), lr=lr)
+        
+        scheduler_fold1 = IdentityScheduler(optimizer_fold1)
+        scheduler_fold2 = IdentityScheduler(optimizer_fold2)
+        
+        model_fold1, optimizer_fold1, scheduler_fold1, model_fold2, optimizer_fold2, scheduler_fold2, epoch, loss = load_checkpoint_2fold(model_fold1, model_fold2, optimizer_fold1, optimizer_fold2, scheduler_fold1, scheduler_fold2, pathmaster)
+        start_epoch = epoch + 1
+        best_loss_cross_val = loss
+    else:
+        # Extract model hyperparameters
+        depth = model_hyperparameters['depth']
+        growth_rate = model_hyperparameters['growth_rate']
+        compression = model_hyperparameters['compression']
+        bottleneck = model_hyperparameters['bottleneck']
+        drop_rate = model_hyperparameters['drop_rate']
+        class_weights = model_hyperparameters['class_weights']
+        
+        # Define DenseNet model based on loaded hyperparameters
+        model_fold1 = DenseNet(img_channels=1, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                         compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device, dtype=model_type)
+        model_fold2 = DenseNet(img_channels=1, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                         compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device, dtype=model_type)
+        
+        # Create optimizer and scheduler
+        optimizer_fold1 = torch.optim.Adam(model_fold1.parameters(), lr=lr)
+        optimizer_fold2 = torch.optim.Adam(model_fold2.parameters(), lr=lr)
+        
+        scheduler_fold1 = IdentityScheduler(optimizer_fold1)
+        scheduler_fold2 = IdentityScheduler(optimizer_fold2)
+    
+        best_loss_cross_val = float('inf') # If no checkpoint is loaded, set to infinity
+        start_epoch = 0
+
+        if save:
+            # Save hyperparameters
+            plot_save_func.save_hyperparameters(model_hyperparameters, pathmaster)
+
+    # Create EarlyStoppingCallback object
+    early_stopping_callback = EarlyStoppingCallback(patience)
+    
+    # Create criterion for loss
+    criterion_train = nn.CrossEntropyLoss(weight=torch.tensor(class_weights).to(device=device))
+    criterion_val = nn.CrossEntropyLoss()
+    
+    # Regularization
+    lambda_l1 = 0.01
+    
+    # Initialize losslists
+    losslist_train = []
+    losslist_cross_val = []
+    
+    losslist_train_fold1 = []
+    losslist_val_fold1 = []
+    
+    losslist_train_fold2 = []
+    losslist_val_fold2 = []
+    
+    # Initialize runtime list
+    runtime_list = []
+    
+    # Cross-validation
+    print('\n===========================================================================================')
+    sys.stdout.flush()
+    for epoch in tqdm(range(start_epoch, n_epochs), desc='Cross-Validation', unit='epoch', leave=False): # Creates a training progress bar with units of epoch
+        start_time = time.time()
+        sys.stderr.flush()
+        print("\nEntering Epoch:", epoch)    
+        sys.stdout.flush()
+        
+        # Fold 1 training ===============================================================================================================================================================
+        model_fold1.train()
+        train_cum_loss_fold1 = 0
+        for data_batch in tqdm(fold1_loader, total=len(fold1_loader), desc='Training', unit='batch', leave=False):
+            # Extract input and labels
+            X_train = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[1], data_batch['data'].shape[-1], data_batch['data'].shape[-1]).to(device=device)
+            Y_train = data_batch['label'].to(device=device)
+
+            # Forward pass
+            logits, _, _ = model_fold1(X_train)
+
+            # Regularization (if applicable)
+            l1 = 0
+            for p in model_fold1.parameters():
+                l1 = l1 + p.abs().sum()
+
+            # Calculate total loss with regularization
+            batch_loss_train = criterion_train(logits.to(torch.float32), Y_train.long()) + lambda_l1 * l1
+            train_cum_loss_fold1 += batch_loss_train.item()
+
+            # Clear gradients
+            optimizer_fold1.zero_grad()
+
+            # Backwards pass
+            batch_loss_train.backward()
+
+            # Optimizer step
+            optimizer_fold1.step()
+        
+        # Update scheduler
+        scheduler_fold1.step()
+
+        loss_train_fold1 = train_cum_loss_fold1 / len(fold1_loader)
+        
+        sys.stderr.flush()
+        print('\nTraining for Fold #1 in Epoch', epoch, 'has been completed!')
+        sys.stdout.flush()
+        
+        # Fold 1 validation =============================================================================================================================================================
+        model_fold1.eval()
+        with torch.no_grad():
+            val_cum_loss_fold1 = 0
+            for data_batch in tqdm(fold2_loader, total=len(fold2_loader), desc='Validation', unit='batch', leave=False):
+                X_val = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[1], data_batch['data'].shape[-1], data_batch['data'].shape[-1]).to(device=device)
+                Y_val = data_batch['label'].to(device=device)
+
+                logits, _, _ = model_fold1(X_val)
+                val_cum_loss_fold1 += criterion_val(logits.float(), Y_val.long()).item()
+
+        loss_val_fold1 = val_cum_loss_fold1 / len(fold2_loader)
+        
+        sys.stderr.flush()
+        print('\nValidation for Fold #1 in Epoch', epoch, 'has been completed!')
+        sys.stdout.flush()
+
+        # Fold 2 training ===============================================================================================================================================================
+        model_fold2.train()
+        train_cum_loss_fold2 = 0
+        for data_batch in tqdm(fold2_loader, total=len(fold2_loader), desc='Training', unit='batch', leave=False):
+            # Extract input and labels
+            X_train = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[1], data_batch['data'].shape[-1], data_batch['data'].shape[-1]).to(device=device)
+            Y_train = data_batch['label'].to(device=device)
+
+            # Forward pass
+            logits, _, _ = model_fold2(X_train)
+
+            # Regularization (if applicable)
+            l1 = 0
+            for p in model_fold2.parameters():
+                l1 = l1 + p.abs().sum()
+
+            # Calculate total loss with regularization
+            batch_loss_train = criterion(logits.to(torch.float32), Y_train.long()) + lambda_l1 * l1
+            train_cum_loss_fold2 += batch_loss_train.item()
+
+            # Clear gradients
+            optimizer_fold2.zero_grad()
+
+            # Backwards pass
+            batch_loss_train.backward()
+
+            # Optimizer step
+            optimizer_fold2.step()
+        
+        # Update scheduler
+        scheduler_fold2.step()
+
+        loss_train_fold2 = train_cum_loss_fold2 / len(fold2_loader)
+        
+        sys.stderr.flush()
+        print('\nTraining for Fold #2 in Epoch', epoch, 'has been completed!')
+        sys.stdout.flush()
+        
+        # Fold 2 validation =============================================================================================================================================================
+        model_fold2.eval()
+        with torch.no_grad():
+            val_cum_loss_fold2 = 0
+            for data_batch in tqdm(fold1_loader, total=len(fold1_loader), desc='Validation', unit='batch', leave=False):
+                X_val = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[1], data_batch['data'].shape[-1], data_batch['data'].shape[-1]).to(device=device)
+                Y_val = data_batch['label'].to(device=device)
+
+                logits, _, _ = model_fold2(X_val)
+                val_cum_loss_fold2 += criterion(logits.float(), Y_val.long()).item()
+
+        loss_val_fold2 = val_cum_loss_fold2 / len(fold1_loader)
+        
+        sys.stderr.flush()
+        print('\nValidation for Fold #2 in Epoch', epoch, 'has been completed!')
+        sys.stdout.flush()
+        # ===============================================================================================================================================================================
+        
+        # Caluclate epoch losses
+        epoch_loss_train = (loss_train_fold1 + loss_train_fold2) / 2
+        epoch_loss_cross_val = (loss_val_fold1 + loss_val_fold2) / 2
+        
+        # Append to losslists
+        losslist_train.append(epoch_loss_train)
+        losslist_cross_val.append(epoch_loss_cross_val)
+        
+        losslist_train_fold1.append(loss_train_fold1)
+        losslist_val_fold1.append(loss_val_fold1)
+        
+        losslist_train_fold2.append(loss_train_fold2)
+        losslist_val_fold2.append(loss_val_fold2)
+        
+        # Return the best cross-validation loss and save best checkpoint (epoch)
+        best_loss_cross_val = save_best_checkpoint_2fold(model_fold1, model_fold2, optimizer_fold1, optimizer_fold2, scheduler_fold1, scheduler_fold2, epoch, epoch_loss_cross_val, best_loss_cross_val, pathmaster)
+        
+        # Update line
+        sys.stderr.flush()
+        print("\n======> Epoch: {}/{}, Training Loss: {:.4f}, Cross-Validation Loss: {:.4f}".format(epoch, n_epochs-1, epoch_loss_train, epoch_loss_cross_val))
+        print('\n===========================================================================================')
+        sys.stdout.flush()
+        
+        # Add epoch time to runtime_list
+        end_time = time.time()
+        time_passed = end_time-start_time # in seconds
+        runtime_list.append(time_passed)
+        
+        # Call the early stopping callback
+        if early_stopping_callback(epoch, epoch_loss_cross_val):
+            break
+    
+    # Saving
+    if save:
+        title = 'Training and Cross-Validation Loss'
+        plot_save_func.train_val_loss(losslist_train, losslist_cross_val, title, save, pathmaster)
+        
+        plot_save_func.save_losslists_2fold(losslist_train_fold1, losslist_val_fold1, losslist_train_fold2, losslist_val_fold2, losslist_train, losslist_cross_val, pathmaster)
+        plot_save_func.save_runtime_list(runtime_list, pathmaster)
+
+
+def cross_val_2fold_DenseNet_mixed(model_hyperparameters, fold1_loader, fold2_loader, 
+                            n_epochs=100, n_classes=3, patience=10, save=False, 
+                            resume_checkpoint_path=None, pathmaster=None):
+     # If GPU is available, use GPU, else use CPU
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    
+    # Optimizer and scheduler hyperparameters
+    lr = 0.0005
+    
+    # Define img_channels
+    img_channels = 1
+    
+    # Resume checkpoint if specified
+    if resume_checkpoint_path is not None and os.path.exists(resume_checkpoint_path):        
+        # Load model hyperparameters
+        depth, growth_rate, compression, bottleneck, drop_rate, class_weights = load_hyperparameters(pathmaster)
+        
+        # Define DenseNet model based on loaded hyperparameters
+        model_fold1 = DenseNet(img_channels=img_channels, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                         compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device)
+        model_fold2 = DenseNet(img_channels=img_channels, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                         compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device)
+        
+        # Create optimizer and scheduler
+        optimizer_fold1 = torch.optim.Adam(model_fold1.parameters(), lr=lr)
+        optimizer_fold2 = torch.optim.Adam(model_fold2.parameters(), lr=lr)
+        
+        scheduler_fold1 = IdentityScheduler(optimizer_fold1)
+        scheduler_fold2 = IdentityScheduler(optimizer_fold2)
+        
+        model_fold1, optimizer_fold1, scheduler_fold1, model_fold2, optimizer_fold2, scheduler_fold2, epoch, loss = load_checkpoint_2fold(model_fold1, model_fold2, optimizer_fold1, optimizer_fold2, scheduler_fold1, scheduler_fold2, pathmaster)
+        start_epoch = epoch + 1
+        best_loss_cross_val = loss
+    else:
+        # Extract model hyperparameters
+        depth = model_hyperparameters['depth']
+        growth_rate = model_hyperparameters['growth_rate']
+        compression = model_hyperparameters['compression']
+        bottleneck = model_hyperparameters['bottleneck']
+        drop_rate = model_hyperparameters['drop_rate']
+        class_weights = model_hyperparameters['class_weights']
+        
+        # Define DenseNet model based on loaded hyperparameters
+        model_fold1 = DenseNet(img_channels=1, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                         compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device)
+        model_fold2 = DenseNet(img_channels=1, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                         compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device)
+        
+        # Create optimizer and scheduler
+        optimizer_fold1 = torch.optim.Adam(model_fold1.parameters(), lr=lr)
+        optimizer_fold2 = torch.optim.Adam(model_fold2.parameters(), lr=lr)
+        
+        scheduler_fold1 = IdentityScheduler(optimizer_fold1)
+        scheduler_fold2 = IdentityScheduler(optimizer_fold2)
+    
+        best_loss_cross_val = float('inf') # If no checkpoint is loaded, set to infinity
+        start_epoch = 0
+
+        if save:
+            # Save hyperparameters
+            plot_save_func.save_hyperparameters(model_hyperparameters, pathmaster)
+
+    # Create EarlyStoppingCallback object
+    early_stopping_callback = EarlyStoppingCallback(patience)
+    
+    # Create criterion for loss
+    criterion_train = nn.CrossEntropyLoss(weight=torch.tensor(class_weights).to(device=device))
+    criterion_val = nn.CrossEntropyLoss()
+    
+    # Regularization
+    lambda_l1 = 0.01
+    
+    # Initialize losslists
+    losslist_train = []
+    losslist_cross_val = []
+    
+    losslist_train_fold1 = []
+    losslist_val_fold1 = []
+    
+    losslist_train_fold2 = []
+    losslist_val_fold2 = []
+    
+    # Initialize runtime list
+    runtime_list = []
+    
+    # Initialize predictions lists
+    predictions_list_train = []
+    predictions_list_val = []
+    
+    # Initialize true labels lists
+    true_labels_list_train = []
+    true_labels_list_val = []
+    
+    # Scalers
+    scaler_fold1 = GradScaler()
+    scaler_fold2 = GradScaler()
+    
+    # Cross-validation
+    print('\n===========================================================================================')
+    sys.stdout.flush()
+    for epoch in tqdm(range(start_epoch, n_epochs), desc='Cross-Validation', unit='epoch', leave=False): # Creates a training progress bar with units of epoch
+        start_time = time.time()
+        sys.stderr.flush()
+        print("\nEntering Epoch:", epoch)    
+        sys.stdout.flush()
+        
+        # Epoch predictions
+        predictions_epoch_train = []
+        predictions_epoch_val = []
+        
+        # Fold 1 training ===============================================================================================================================================================
+        model_fold1.train()
+        train_cum_loss_fold1 = 0
+        for data_batch in tqdm(fold1_loader, total=len(fold1_loader), desc='Training', unit='batch', leave=False):
+            # Extract input and labels
+            X_train = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[1], data_batch['data'].shape[-1], data_batch['data'].shape[-1]).to(device=device)
+            Y_train = data_batch['label'].to(device=device)
+
+            if epoch == start_epoch:
+                true_labels_list_train.append(torch.reshape(Y_train, (-1,1)))
+
+            with autocast():
+                # Forward pass
+                logits, predictions, _ = model_fold1(X_train)
+
+            predictions_epoch_train.append(torch.reshape(predictions, (-1,1))) 
+
+            # Regularization (if applicable)
+            l1 = 0
+            for p in model_fold1.parameters():
+                l1 = l1 + p.abs().sum() 
+
+            # Calculate total loss with regularization
+            batch_loss_train = criterion_train(logits.to(torch.float32), Y_train.long()) + lambda_l1 * l1
+            train_cum_loss_fold1 += batch_loss_train.item()
+
+            # Clear gradients
+            optimizer_fold1.zero_grad()
+
+            # Backwards pass
+            scaler_fold1.scale(batch_loss_train).backward()
+
+            # Optimizer step
+            scaler_fold1.step(optimizer_fold1)
+            
+            # Scaler update
+            scaler_fold1.update()
+        
+        # Update scheduler
+        scheduler_fold1.step()
+
+        loss_train_fold1 = train_cum_loss_fold1 / len(fold1_loader)
+        
+        sys.stderr.flush()
+        print('\nTraining for Fold #1 in Epoch', epoch, 'has been completed!')
+        sys.stdout.flush()
+        
+        # Fold 1 validation =============================================================================================================================================================
+        model_fold1.eval()
+        with torch.no_grad():
+            val_cum_loss_fold1 = 0
+            for data_batch in tqdm(fold2_loader, total=len(fold2_loader), desc='Validation', unit='batch', leave=False):
+                X_val = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[1], data_batch['data'].shape[-1], data_batch['data'].shape[-1]).to(device=device)
+                Y_val = data_batch['label'].to(device=device)
+
+                if epoch == start_epoch:
+                    true_labels_list_val.append(torch.reshape(Y_val, (-1,1)))
+
+                logits, predictions, _ = model_fold1(X_val)
+                
+                predictions_epoch_val.append(torch.reshape(predictions, (-1,1)))
+                
+                val_cum_loss_fold1 += criterion_loss(logits.float(), Y_val.long()).item()
+
+        loss_val_fold1 = val_cum_loss_fold1 / len(fold2_loader)
+        
+        sys.stderr.flush()
+        print('\nValidation for Fold #1 in Epoch', epoch, 'has been completed!')
+        sys.stdout.flush()
+
+        # Fold 2 training ===============================================================================================================================================================
+        model_fold2.train()
+        train_cum_loss_fold2 = 0
+        for data_batch in tqdm(fold2_loader, total=len(fold2_loader), desc='Training', unit='batch', leave=False):
+            # Extract input and labels
+            X_train = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[1], data_batch['data'].shape[-1], data_batch['data'].shape[-1]).to(device=device)
+            Y_train = data_batch['label'].to(device=device)
+
+            if epoch == start_epoch:
+                true_labels_list_train.append(torch.reshape(Y_train, (-1,1)))
+
+            with autocast():
+                # Forward pass
+                logits, predictions, _ = model_fold2(X_train)
+
+            predictions_epoch_train.append(torch.reshape(predictions, (-1,1))) 
+
+            # Regularization (if applicable)
+            l1 = 0
+            for p in model_fold2.parameters():
+                l1 = l1 + p.abs().sum()
+
+            # Calculate total loss with regularization
+            batch_loss_train = criterion_train(logits.to(torch.float32), Y_train.long()) + lambda_l1 * l1
+            train_cum_loss_fold2 += batch_loss_train.item()
+
+            # Clear gradients
+            optimizer_fold2.zero_grad()
+
+            # Backwards pass
+            scaler_fold2.scale(batch_loss_train).backward()
+
+            # Optimizer step
+            scaler_fold2.step(optimizer_fold2)
+            
+            # Scaler update
+            scaler_fold2.update()
+        
+        # Update scheduler
+        scheduler_fold2.step()
+
+        loss_train_fold2 = train_cum_loss_fold2 / len(fold2_loader)
+        
+        sys.stderr.flush()
+        print('\nTraining for Fold #2 in Epoch', epoch, 'has been completed!')
+        sys.stdout.flush()
+        
+        # Fold 2 validation =============================================================================================================================================================
+        model_fold2.eval()
+        with torch.no_grad():
+            val_cum_loss_fold2 = 0
+            for data_batch in tqdm(fold1_loader, total=len(fold1_loader), desc='Validation', unit='batch', leave=False):
+                X_val = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[1], data_batch['data'].shape[-1], data_batch['data'].shape[-1]).to(device=device)
+                Y_val = data_batch['label'].to(device=device)
+
+                if epoch == start_epoch:
+                    true_labels_list_val.append(torch.reshape(Y_val, (-1,1)))
+
+                logits, predictions, _ = model_fold2(X_val)
+                
+                predictions_epoch_val.append(torch.reshape(predictions, (-1,1)))
+                
+                val_cum_loss_fold2 += criterion_val(logits.float(), Y_val.long()).item()
+
+        loss_val_fold2 = val_cum_loss_fold2 / len(fold1_loader)
+        
+        sys.stderr.flush()
+        print('\nValidation for Fold #2 in Epoch', epoch, 'has been completed!')
+        sys.stdout.flush()
+        # ===============================================================================================================================================================================
+        
+        # Caluclate epoch losses
+        epoch_loss_train = (loss_train_fold1 + loss_train_fold2) / 2
+        epoch_loss_cross_val = (loss_val_fold1 + loss_val_fold2) / 2
+        
+        # Append to losslists
+        losslist_train.append(epoch_loss_train)
+        losslist_cross_val.append(epoch_loss_cross_val)
+        
+        losslist_train_fold1.append(loss_train_fold1)
+        losslist_val_fold1.append(loss_val_fold1)
+        
+        losslist_train_fold2.append(loss_train_fold2)
+        losslist_val_fold2.append(loss_val_fold2)
+        
+        # Return the best cross-validation loss and save best checkpoint (epoch)
+        best_loss_cross_val = save_best_checkpoint_2fold(model_fold1, model_fold2, optimizer_fold1, optimizer_fold2, scheduler_fold1, scheduler_fold2, epoch, epoch_loss_cross_val, best_loss_cross_val, pathmaster)
+        
+        # Update line
+        sys.stderr.flush()
+        print("\n======> Epoch: {}/{}, Training Loss: {:.4f}, Cross-Validation Loss: {:.4f}".format(epoch, n_epochs-1, epoch_loss_train, epoch_loss_cross_val))
+        print('\n===========================================================================================')
+        sys.stdout.flush()
+        
+        # Add epoch predictions
+        predictions_epoch_train = np.array(torch.cat(predictions_epoch_train, dim=0).to('cpu'))
+        predictions_epoch_val = np.array(torch.cat(predictions_epoch_val, dim=0).to('cpu'))
+        
+        predictions_list_train.append(predictions_epoch_train)
+        predictions_list_val.append(predictions_epoch_val)
+        
+        # Add epoch time to runtime_list
+        end_time = time.time()
+        time_passed = end_time-start_time # in seconds
+        runtime_list.append(time_passed)
+        
+        # Call the early stopping callback
+        if early_stopping_callback(epoch, epoch_loss_cross_val):
+            break
+    
+    # Convert true label list into array
+    true_labels_train = np.array(torch.cat(true_labels_list_train, dim=0).to('cpu'))
+    true_labels_val = np.array(torch.cat(true_labels_list_val, dim=0).to('cpu'))
+    
+    # Saving
+    if save:
+        title = 'Training and Cross-Validation Loss'
+        plot_save_func.train_val_loss(losslist_train, losslist_cross_val, title, save, pathmaster)
+        
+        title = 'Training and Cross-Validation Accuracy'
+        plot_save_func.accuracy_curves(true_labels_train, true_labels_val, predictions_list_train, predictions_list_val, title, save, pathmaster)
+        
+        plot_save_func.save_losslists_2fold(losslist_train_fold1, losslist_val_fold1, losslist_train_fold2, losslist_val_fold2, losslist_train, losslist_cross_val, pathmaster)
+        plot_save_func.save_runtime_list(runtime_list, pathmaster)
+
+     
+# Utilizes train() and validate() functions        
+def cross_val_2fold_DenseNet_func(model_hyperparameters, fold1_loader, fold2_loader, model_type=torch.float32, 
+                            n_epochs=100, n_classes=3, patience=10, save=False, 
+                            resume_checkpoint_path=None, pathmaster=None):
+     # If GPU is available, use GPU, else use CPU
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    
+    # Optimizer and scheduler hyperparameters
+    lr = 0.0005
+    
+    # Define img_channels
+    img_channels = 1
+    
+    # Resume checkpoint if specified
+    if resume_checkpoint_path is not None and os.path.exists(resume_checkpoint_path):        
+        # Load model hyperparameters
+        depth, growth_rate, compression, bottleneck, drop_rate, class_weights = load_hyperparameters(pathmaster)
+        
+        # Define DenseNet model based on loaded hyperparameters
+        model_fold1 = DenseNet(img_channels=img_channels, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                         compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device, dtype=model_type)
+        model_fold2 = DenseNet(img_channels=img_channels, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                         compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device, dtype=model_type)
+        
+        # Create optimizer and scheduler
+        optimizer_fold1 = torch.optim.Adam(model_fold1.parameters(), lr=lr)
+        optimizer_fold2 = torch.optim.Adam(model_fold2.parameters(), lr=lr)
+        
+        scheduler_fold1 = IdentityScheduler(optimizer_fold1)
+        scheduler_fold2 = IdentityScheduler(optimizer_fold2)
+        
+        model_fold1, optimizer_fold1, scheduler_fold1, model_fold2, optimizer_fold2, scheduler_fold2, epoch, loss = load_checkpoint_2fold(model_fold1, model_fold2, optimizer_fold1, optimizer_fold2, scheduler_fold1, scheduler_fold2, pathmaster)
+        start_epoch = epoch + 1
+        best_loss_cross_val = loss
+    else:
+        # Extract model hyperparameters
+        depth = model_hyperparameters['depth']
+        growth_rate = model_hyperparameters['growth_rate']
+        compression = model_hyperparameters['compression']
+        bottleneck = model_hyperparameters['bottleneck']
+        drop_rate = model_hyperparameters['drop_rate']
+        class_weights = model_hyperparameters['class_weights']   
+             
+        # Define DenseNet model based on loaded hyperparameters
+        model_fold1 = DenseNet(img_channels=1, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                         compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device, dtype=model_type)
+        model_fold2 = DenseNet(img_channels=1, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                         compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device, dtype=model_type)
+        
+        # Create optimizer and scheduler
+        optimizer_fold1 = torch.optim.Adam(model_fold1.parameters(), lr=lr)
+        optimizer_fold2 = torch.optim.Adam(model_fold2.parameters(), lr=lr)
+        
+        scheduler_fold1 = IdentityScheduler(optimizer_fold1)
+        scheduler_fold2 = IdentityScheduler(optimizer_fold2)
+    
+        best_loss_cross_val = float('inf') # If no checkpoint is loaded, set to infinity
+        start_epoch = 0
+
+        if save:
+            # Save hyperparameters
+            plot_save_func.save_hyperparameters(model_hyperparameters, pathmaster)
+
+    # Create EarlyStoppingCallback object
+    early_stopping_callback = EarlyStoppingCallback(patience)
+    
+    # Create criterion for loss
+    criterion_train = nn.CrossEntropyLoss(weight=torch.tensor(class_weights).to(device=device))
+    criterion_val = nn.CrossEntropyLoss()
+    
+    # Regularization
+    lambda_l1 = 0.01
+    
+    # Initialize losslists
+    losslist_train = []
+    losslist_cross_val = []
+    
+    losslist_train_fold1 = []
+    losslist_val_fold1 = []
+    
+    losslist_train_fold2 = []
+    losslist_val_fold2 = []
+    
+    # Initialize runtime list
+    runtime_list = []
+    
+    # Cross-validation
+    print('\n===========================================================================================')
+    sys.stdout.flush()
+    for epoch in tqdm(range(start_epoch, n_epochs), desc='Cross-Validation', unit='epoch', leave=False): # Creates a training progress bar with units of epoch
+        start_time = time.time()
+        sys.stderr.flush()
+        print("\nEntering Epoch:", epoch)    
+        sys.stdout.flush()
+        
+        # Fold 1 (train on fold1, validate on fold2)
+        model_fold1, optimizer_fold1, scheduler_fold1, loss_train_fold1 = train(model_fold1, fold1_loader, optimizer_fold1, scheduler_fold1, criterion_train, lambda_l1)
+        loss_val_fold1 = validate(model_fold1, fold2_loader, criterion_val)
+
+        # Fold 2 (train on fold2, validate on fold1)
+        model_fold2, optimizer_fold2, scheduler_fold2, loss_train_fold2 = train(model_fold2, fold2_loader, optimizer_fold2, scheduler_fold2, criterion_train, lambda_l1)
+        loss_val_fold2 = validate(model_fold2, fold1_loader, criterion_val)
+        
+        # Caluclate epoch losses
+        epoch_loss_train = (loss_train_fold1 + loss_train_fold2) / 2
+        epoch_loss_cross_val = (loss_val_fold1 + loss_val_fold2) / 2
+        
+        # Append to losslists
+        losslist_train.append(epoch_loss_train)
+        losslist_cross_val.append(epoch_loss_cross_val)
+        
+        losslist_train_fold1.append(loss_train_fold1)
+        losslist_val_fold1.append(loss_val_fold1)
+        
+        losslist_train_fold2.append(loss_train_fold2)
+        losslist_val_fold2.append(loss_val_fold2)
+        
+        # Return the best cross-validation loss and save best checkpoint (epoch)
+        best_loss_cross_val = save_best_checkpoint_2fold(model_fold1, model_fold2, optimizer_fold1, optimizer_fold2, scheduler_fold1, scheduler_fold2, epoch, epoch_loss_cross_val, best_loss_cross_val, pathmaster)
+        
+        # Update line
+        sys.stderr.flush()
+        print("\n======> Epoch: {}/{}, Training Loss: {:.4f}, Cross-Validation Loss: {:.4f}".format(epoch, n_epochs-1, epoch_loss_train, epoch_loss_cross_val))
+        print('\n===========================================================================================')
+        sys.stdout.flush()
+        
+        # Add epoch time to runtime_list
+        end_time = time.time()
+        time_passed = end_time-start_time # in seconds
+        runtime_list.append(time_passed)
+        
+        # Call the early stopping callback
+        if early_stopping_callback(epoch, epoch_loss_cross_val):
+            break
+    
+    # Saving
+    if save:
+        title = 'Training and Cross-Validation Loss'
+        plot_save_func.train_val_loss(losslist_train, losslist_cross_val, title, save, pathmaster)
+        
+        plot_save_func.save_losslists_2fold(losslist_train_fold1, losslist_val_fold1, losslist_train_fold2, losslist_val_fold2, losslist_train, losslist_cross_val, pathmaster)
+        plot_save_func.save_runtime_list(runtime_list, pathmaster)
+        
+        
+def train_validate_DenseNet(model_hyperparameters, train_loader, val_loader, model_type=torch.float32, 
+                            n_epochs=100, n_classes=3, patience=10, save=False, 
+                            resume_checkpoint_path=None, pathmaster=None):
+    
+    # If GPU is available, use GPU, else use CPU
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    
+    # Optimizer and scheduler hyperparameters
+    lr = 0.0005
+    
+    # Resume checkpoint if specified
+    if resume_checkpoint_path is not None and os.path.exists(resume_checkpoint_path):        
+        # Load model hyperparameters
+        depth, growth_rate, compression, bottleneck, drop_rate, class_weights = load_hyperparameters(pathmaster)
+        
+        # Define DenseNet model based on loaded hyperparameters
+        model = DenseNet(img_channels=1, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                         compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device, dtype=model_type)
+        
+        # Create optimizer and scheduler
+        optimizer = torch.optim.Adam(model.parameters(), lr=lr)     
+        scheduler = IdentityScheduler(optimizer)
+        
+        model, optimizer, scheduler, epoch, loss = load_checkpoint(model, optimizer, scheduler, pathmaster)
+        start_epoch = epoch + 1
+        best_loss_val = loss
+    else:
+        # Extract model hyperparameters
+        depth = model_hyperparameters['depth']
+        growth_rate = model_hyperparameters['growth_rate']
+        compression = model_hyperparameters['compression']
+        bottleneck = model_hyperparameters['bottleneck']
+        drop_rate = model_hyperparameters['drop_rate']
+        class_weights = model_hyperparameters['class_weights']
+        
+        # Define DenseNet model based on input hyperparameters
+        model = DenseNet(img_channels=1, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                         compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device, dtype=model_type)
+        
+        # Create optimizer and scheduler
+        optimizer = torch.optim.Adam(model.parameters(), lr=lr)     
+        scheduler = IdentityScheduler(optimizer)
+    
+        best_loss_val = float('inf') # If no checkpoint is loaded, set to infinity
+        start_epoch = 0
+
+        if save:
+            # Save hyperparameters
+            plot_save_func.save_hyperparameters(model_hyperparameters, pathmaster)
+
+    # Create EarlyStoppingCallback object
+    early_stopping_callback = EarlyStoppingCallback(patience)
+    
+    # Create criterion for loss
+    criterion_train = nn.CrossEntropyLoss(weight=torch.tensor(class_weights).to(device=device))
+    criterion_val = nn.CrossEntropyLoss()
+    
+    # Regularization
+    lambda_l1 = 0.01
+    
+    # Initialize losslists
+    losslist_train = []
+    losslist_val = []
+    
+    # Initialize runtime list
+    runtime_list = []
+    
+    # Training and validation
+    print('\n===========================================================================================')
+    sys.stdout.flush()
+    for epoch in tqdm(range(start_epoch, n_epochs), desc='Training and Validation', unit='epoch', leave=False): # Creates a training progress bar with units of epoch
+        start_time = time.time()
+        sys.stderr.flush()
+        print("\nEntering Epoch:", epoch)    
+        # Training
+        model.train()
+        # Reset training sum of epoch loss and batch_count
+        sum_epoch_loss_train = 0
+        sys.stdout.flush()
+        for train_batch in tqdm(train_loader, total=len(train_loader), desc='Training Epoch', unit='batch', leave=False):
+            # Extract input and labels
+            # train_batch['data'].shape = [batch_size, img_channels, img_size, img_size]
+            X_train = train_batch['data'].reshape(train_batch['data'].shape[0], train_batch['data'].shape[1], train_batch['data'].shape[-1], train_batch['data'].shape[-1]).to(device=device)
+            Y_train = train_batch['label'].to(device=device)
+            
+            # Forward pass
+            logits, _, _ = model(X_train)
+            
+            # Regularization
+            l1 = 0
+            for p in model.parameters():
+                l1 = l1 + p.abs().sum()
+            
+            # Calculate sum of total loss for epoch with regularization
+            batch_loss_train = criterion_train(logits.to(torch.float32), Y_train.long()) # Criterion returns a scalar tensor
+            batch_loss_train += lambda_l1 * l1
+            
+            # Clear gradients
+            optimizer.zero_grad(set_to_none=True)
+    
+            # Backwards pass
+            batch_loss_train.backward()
+            
+            # Optimizer step
+            optimizer.step()
+            
+            # Generate epoch loss
+            sum_epoch_loss_train += batch_loss_train.item()
+        
+        # Update scheduler
+        scheduler.step()
+        
+        # Calculate epoch loss for training
+        epoch_loss_train = sum_epoch_loss_train / len(train_loader)
+        losslist_train.append(epoch_loss_train)
+        
+        sys.stderr.flush()
+        print('\nTraining for Epoch', epoch, 'has been completed!')
+        sys.stdout.flush()
+        
+        # Validation
+        model.eval()
+        sum_epoch_loss_val = 0
+        with torch.no_grad(): # Disable gradient computation during validation
+            sys.stdout.flush()
+            for val_batch in tqdm(val_loader, total=len(val_loader), desc='Validation Epoch', unit='batch', leave=False):
+                # Extract input and labels
+                X_val = val_batch['data'].reshape(val_batch['data'].shape[0], val_batch['data'].shape[1], val_batch['data'].shape[-1], val_batch['data'].shape[-1]).to(device=device)
+                Y_val = val_batch['label'].to(device=device)
+                
+                # Forward pass
+                logits, _, _ = model(X_val)
+
+                # Calculate sum of total loss for epoch
+                sum_epoch_loss_val += criterion_val(logits.float(), Y_val.long()).item() # Criterion returns a scalar tensor
+                
+        # Calculate epoch loss for validation
+        epoch_loss_val = sum_epoch_loss_val / len(val_loader)
+        losslist_val.append(epoch_loss_val)
+        
+        sys.stderr.flush()
+        print('\nValidation for Epoch', epoch, 'has been completed!')
+        sys.stdout.flush()
+        
+        # # Temporarily save checkpoint after each epoch
+        # save_checkpoint(model, optimizer, scheduler, epoch, loss=epoch_loss_val, checkpoint_path=temp_checkpoint_path)
+        
+        # Return the best validation loss and save best checkpoint (epoch)
+        best_loss_val = save_best_checkpoint(model, optimizer, scheduler, epoch, epoch_loss_val, best_loss_val, pathmaster)
+        
+        # Update line
+        sys.stderr.flush()
+        print("\n======> Epoch: {}/{}, Training Loss: {:.4f}, Validation Loss: {:.4f}".format(epoch, n_epochs-1, epoch_loss_train, epoch_loss_val))
+        print('\n===========================================================================================')
+        sys.stdout.flush()
+        
+        # Add epoch time to runtime_list
+        end_time = time.time()
+        time_passed = end_time-start_time # in seconds
+        runtime_list.append(time_passed)
+        
+        
+        # Call the early stopping callback
+        if early_stopping_callback(epoch, epoch_loss_val):
+            break
+    
+    # Saving
+    if save:
+        title = 'Training and Validation Loss'
+        plot_save_func.train_val_loss(losslist_train, losslist_val, title, save, pathmaster)
+        
+        plot_save_func.save_losslists(losslist_train, losslist_val, pathmaster)
+        plot_save_func.save_runtime_list(runtime_list, pathmaster)
+       
+       
+def train_validate_DenseNet_mixed(model_hyperparameters, train_loader, val_loader,
+                            n_epochs=100, n_classes=3, patience=10, save=False, 
+                            resume_checkpoint_path=None, pathmaster=None):
+    
+    # If GPU is available, use GPU, else use CPU
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    
+    # Optimizer and scheduler hyperparameters
+    lr = 0.0005
+    
+    # Resume checkpoint if specified
+    if resume_checkpoint_path is not None and os.path.exists(resume_checkpoint_path):        
+        # Load model hyperparameters
+        depth, growth_rate, compression, bottleneck, drop_rate, class_weights = load_hyperparameters(pathmaster)
+        
+        # Define DenseNet model based on loaded hyperparameters
+        model = DenseNet(img_channels=1, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                         compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device)
+        
+        # Create optimizer and scheduler
+        optimizer = torch.optim.Adam(model.parameters(), lr=lr)     
+        scheduler = IdentityScheduler(optimizer)
+        
+        model, optimizer, scheduler, epoch, loss = load_checkpoint(model, optimizer, scheduler, pathmaster)
+        start_epoch = epoch + 1
+        best_loss_val = loss
+    else:
+        # Extract model hyperparameters
+        depth = model_hyperparameters['depth']
+        growth_rate = model_hyperparameters['growth_rate']
+        compression = model_hyperparameters['compression']
+        bottleneck = model_hyperparameters['bottleneck']
+        drop_rate = model_hyperparameters['drop_rate']
+        class_weights = model_hyperparameters['class_weights']
+        
+        # Define DenseNet model based on input hyperparameters
+        model = DenseNet(img_channels=1, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                         compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device)
+        
+        # Create optimizer and scheduler
+        optimizer = torch.optim.Adam(model.parameters(), lr=lr)     
+        scheduler = IdentityScheduler(optimizer)
+    
+        best_loss_val = float('inf') # If no checkpoint is loaded, set to infinity
+        start_epoch = 0
+
+        if save:
+            # Save hyperparameters
+            plot_save_func.save_hyperparameters(model_hyperparameters, pathmaster)
+
+    # Create EarlyStoppingCallback object
+    early_stopping_callback = EarlyStoppingCallback(patience)
+    
+    # Create criterion for loss
+    criterion_train = nn.CrossEntropyLoss(weight=torch.tensor(class_weights).to(device=device))
+    criterion_val = nn.CrossEntropyLoss()
+    
+    # Regularization
+    lambda_l1 = 0.01
+    
+    # Initialize losslists
+    losslist_train = []
+    losslist_val = []
+    
+    # Initialize runtime list
+    runtime_list = []
+    
+    # Initialize predictions lists
+    predictions_list_train = []
+    predictions_list_val = []
+    
+    # Initialize true labels lists
+    true_labels_list_train = []
+    true_labels_list_val = []
+    
+    # Scalers
+    scaler = GradScaler()
+    
+    # Training and validation
+    print('\n===========================================================================================')
+    sys.stdout.flush()
+    for epoch in tqdm(range(start_epoch, n_epochs), desc='Training and Validation', unit='epoch', leave=False): # Creates a training progress bar with units of epoch
+        start_time = time.time()
+        sys.stderr.flush()
+        print("\nEntering Epoch:", epoch)    
+        # Training
+        model.train()
+        # Reset training sum of epoch loss and batch_count
+        sum_epoch_loss_train = 0
+        sys.stdout.flush()
+        
+        # Epoch predictions
+        predictions_epoch_train = []
+        predictions_epoch_val = []
+        
+        for train_batch in tqdm(train_loader, total=len(train_loader), desc='Training Epoch', unit='batch', leave=False):
+            # Extract input and labels
+            # train_batch['data'].shape = [batch_size, img_channels, img_size, img_size]
+            X_train = train_batch['data'].reshape(train_batch['data'].shape[0], train_batch['data'].shape[1], train_batch['data'].shape[-1], train_batch['data'].shape[-1]).to(device=device)
+            Y_train = train_batch['label'].to(device=device)
+            
+            if epoch == start_epoch:
+                true_labels_list_train.append(torch.reshape(Y_train, (-1,1)))
+            
+            with autocast():
+                # Forward pass
+                logits, predictions, _ = model(X_train)
+            
+            predictions_epoch_train.append(torch.reshape(predictions, (-1,1)))   
+            
+            # Regularization
+            l1 = 0
+            for p in model.parameters():
+                l1 = l1 + p.abs().sum()
+            
+            # Calculate sum of total loss for epoch with regularization
+            batch_loss_train = criterion_train(logits.to(torch.float32), Y_train.long()) # Criterion returns a scalar tensor
+            batch_loss_train += lambda_l1 * l1
+            
+            # Clear gradients
+            optimizer.zero_grad(set_to_none=True)
+    
+            # Backwards pass
+            scaler.scale(batch_loss_train).backward()
+
+            # Optimizer step
+            scaler.step(optimizer)
+            
+            # Scaler update
+            scaler.update()
+            
+            # Generate epoch loss
+            sum_epoch_loss_train += batch_loss_train.item()
+        
+        # Update scheduler
+        scheduler.step()
+        
+        # Calculate epoch loss for training
+        epoch_loss_train = sum_epoch_loss_train / len(train_batch)
+        losslist_train.append(epoch_loss_train)
+        
+        sys.stderr.flush()
+        print('\nTraining for Epoch', epoch, 'has been completed!')
+        sys.stdout.flush()
+        
+        # Validation
+        model.eval()
+        sum_epoch_loss_val = 0
+        with torch.no_grad(): # Disable gradient computation during validation
+            sys.stdout.flush()
+            for val_batch in tqdm(val_loader, total=len(val_loader), desc='Validation Epoch', unit='batch', leave=False):
+                # Extract input and labels
+                X_val = val_batch['data'].reshape(val_batch['data'].shape[0], val_batch['data'].shape[1], val_batch['data'].shape[-1], val_batch['data'].shape[-1]).to(device=device)
+                Y_val = val_batch['label'].to(device=device)
+                
+                if epoch == start_epoch:
+                    true_labels_list_val.append(torch.reshape(Y_val, (-1,1)))
+                
+                # Forward pass
+                logits, predictions, _ = model(X_val)
+                predictions_epoch_val.append(torch.reshape(predictions, (-1,1)))
+
+                # Calculate sum of total loss for epoch
+                sum_epoch_loss_val += criterion_val(logits.float(), Y_val.long()).item() # Criterion returns a scalar tensor
+
+        # Calculate epoch loss for validation
+        epoch_loss_val = sum_epoch_loss_val / len(val_loader)
+        losslist_val.append(epoch_loss_val)
+        
+        sys.stderr.flush()
+        print('\nValidation for Epoch', epoch, 'has been completed!')
+        sys.stdout.flush()
+        
+        # # Temporarily save checkpoint after each epoch
+        # save_checkpoint(model, optimizer, scheduler, epoch, loss=epoch_loss_val, checkpoint_path=temp_checkpoint_path)
+        
+        # Return the best validation loss and save best checkpoint (epoch)
+        best_loss_val = save_best_checkpoint(model, optimizer, scheduler, epoch, epoch_loss_val, best_loss_val, pathmaster)
+        
+        # Update line
+        sys.stderr.flush()
+        print("\n======> Epoch: {}/{}, Training Loss: {:.4f}, Validation Loss: {:.4f}".format(epoch, n_epochs-1, epoch_loss_train, epoch_loss_val))
+        print('\n===========================================================================================')
+        sys.stdout.flush()
+        
+        # Add epoch predictions
+        predictions_epoch_train = np.array(torch.cat(predictions_epoch_train, dim=0).to('cpu'))
+        predictions_epoch_val = np.array(torch.cat(predictions_epoch_val, dim=0).to('cpu'))
+        
+        predictions_list_train.append(predictions_epoch_train)
+        predictions_list_val.append(predictions_epoch_val)
+        
+        # Add epoch time to runtime_list
+        end_time = time.time()
+        time_passed = end_time-start_time # in seconds
+        runtime_list.append(time_passed)
+        
+        
+        # Call the early stopping callback
+        if early_stopping_callback(epoch, epoch_loss_val):
+            break
+    
+    # Convert true label list into array
+    true_labels_train = np.array(torch.cat(true_labels_list_train, dim=0).to('cpu'))
+    true_labels_val = np.array(torch.cat(true_labels_list_val, dim=0).to('cpu'))
+    
+    # Saving
+    if save:
+        title = 'Training and Validation Loss'
+        plot_save_func.train_val_loss(losslist_train, losslist_val, title, save, pathmaster)
+        
+        title = 'Training and Validation Accuracy'
+        plot_save_func.accuracy_curves(true_labels_train, true_labels_val, predictions_list_train, predictions_list_val, title, save, pathmaster)
+        
+        plot_save_func.save_losslists(losslist_train, losslist_val, pathmaster)
+        plot_save_func.save_runtime_list(runtime_list, pathmaster)
+       
+       
+def train_validate_DenseNet_config(config, train_loader, val_loader,
+                            n_epochs=100, n_classes=3, patience=10, save=False, 
+                            pathmaster=None):
+    # # Set filetag
+    # file_tag = str(dt.datetime.now())
+    # # Define characters to replace with underscores
+    # chars_to_replace = [' ', ':', '.', '-']
+    
+    # # Replace characters with underscores
+    # for char in chars_to_replace:
+    #     file_tag = file_tag.replace(char, '_')
+    # pathmaster.set_file_tag(file_tag)
+    
+    # Save hyperparameters
+    model_hyperparameters = { # Default, no bottleneck or compression
+        'depth': config['depth'],
+        'growth_rate': config['growth_rate'],
+        'compression': config['compression'],
+        'bottleneck': config['bottleneck'],
+        'drop_rate': config['drop_rate'],
+        'class_weights': config['class_weights'],
+        'learning_rate': config['learning_rate'],
+        'num_dense_tran': config['num_dense_tran'],
+        'lambda_l1': config['lambda_l1'],
+        'activation': activation_to_string(config['activation']),
+        }
+    
+    if save:
+            plot_save_func.save_hyperparameters(model_hyperparameters, pathmaster)
+    
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    img_channels = 1
+    
+    model = DenseNet_config(img_channels, config['depth'], n_classes, config['growth_rate'], config['compression'], 
+                     config['bottleneck'], config['drop_rate'], config['activation'], config['num_dense_tran']).to(device=device)
+    
+    # Loss function and optimizer
+    criterion_train = nn.CrossEntropyLoss(weight=torch.tensor(config['class_weights']).to(device=device))
+    criterion_val = nn.CrossEntropyLoss()
+    optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'])
+    scheduler = IdentityScheduler(optimizer)
+        
+    
+    # Scalers
+    scaler = GradScaler()
+    
+    # Initialize losslists
+    losslist_train = []
+    losslist_val = []
+    
+    # Initialize predictions lists
+    predictions_list_train = []
+    predictions_list_val = []
+    
+    # Initialize true labels lists
+    true_labels_list_train = []
+    true_labels_list_val = []
+    
+    # Initialize runtime list
+    runtime_list = []
+    
+    # Create EarlyStoppingCallback object
+    early_stopping_callback = EarlyStoppingCallback(patience)
+    
+    # Initialize best validation loss
+    best_loss_val = float('inf') # If no checkpoint is loaded, set to infinity
+    
+    start_epoch = 0
+    # Training and validation
+    print('\n===========================================================================================')
+    sys.stdout.flush()
+    for epoch in range(start_epoch, n_epochs): # Creates a training progress bar with units of epoch
+        start_time = time.time()
+        sys.stderr.flush()
+        print("\nEntering Epoch:", epoch)    
+        # Training
+        model.train()
+        # Reset training sum of epoch loss and batch_count
+        sum_epoch_loss_train = 0
+        sys.stdout.flush()
+        
+        # Epoch predictions
+        predictions_epoch_train = []
+        predictions_epoch_val = []
+        
+        for train_batch in tqdm(train_loader, total=len(train_loader), desc='Training Epoch', unit='batch', leave=False):
+            # Extract input and labels
+            # train_batch['data'].shape = [batch_size, img_channels, img_size, img_size]
+            X_train = train_batch['data'].reshape(train_batch['data'].shape[0], train_batch['data'].shape[1], train_batch['data'].shape[-1], train_batch['data'].shape[-1]).to(device=device)
+            Y_train = train_batch['label'].to(device=device)
+            
+            if epoch == start_epoch:
+                true_labels_list_train.append(torch.reshape(Y_train, (-1,1)))
+            
+            with autocast():
+                # Forward pass
+                logits, predictions, _ = model(X_train)
+            
+            predictions_epoch_train.append(torch.reshape(predictions, (-1,1)))   
+            
+            # Regularization
+            l1 = 0
+            for p in model.parameters():
+                l1 = l1 + p.abs().sum()
+            
+            # Calculate sum of total loss for epoch with regularization
+            batch_loss_train = criterion_train(logits.to(torch.float32), Y_train.long()) # Criterion returns a scalar tensor
+            batch_loss_train += config['lambda_l1'] * l1
+            
+            # Clear gradients
+            optimizer.zero_grad(set_to_none=True)
+    
+            # Backwards pass
+            scaler.scale(batch_loss_train).backward()
+
+            # Optimizer step
+            scaler.step(optimizer)
+            
+            # Scaler update
+            scaler.update()
+            
+            # Generate epoch loss
+            sum_epoch_loss_train += batch_loss_train.item()
+        
+        # Update scheduler
+        scheduler.step()
+        
+        # Calculate epoch loss for training
+        epoch_loss_train = sum_epoch_loss_train / len(train_batch)
+        losslist_train.append(epoch_loss_train)
+        
+        sys.stderr.flush()
+        print('\nTraining for Epoch', epoch, 'has been completed!')
+        sys.stdout.flush()
+        
+        # Validation
+        model.eval()
+        sum_epoch_loss_val = 0
+        with torch.no_grad(): # Disable gradient computation during validation
+            sys.stdout.flush()
+            for val_batch in tqdm(val_loader, total=len(val_loader), desc='Validation Epoch', unit='batch', leave=False):
+                # Extract input and labels
+                X_val = val_batch['data'].reshape(val_batch['data'].shape[0], val_batch['data'].shape[1], val_batch['data'].shape[-1], val_batch['data'].shape[-1]).to(device=device)
+                Y_val = val_batch['label'].to(device=device)
+                
+                if epoch == start_epoch:
+                    true_labels_list_val.append(torch.reshape(Y_val, (-1,1)))
+                
+                # Forward pass
+                logits, predictions, _ = model(X_val)
+                predictions_epoch_val.append(torch.reshape(predictions, (-1,1)))
+
+                # Calculate sum of total loss for epoch
+                sum_epoch_loss_val += criterion_val(logits.float(), Y_val.long()).item() # Criterion returns a scalar tensor
+
+        # Calculate epoch loss for validation
+        epoch_loss_val = sum_epoch_loss_val / len(val_loader)
+        losslist_val.append(epoch_loss_val)
+        
+        sys.stderr.flush()
+        print('\nValidation for Epoch', epoch, 'has been completed!')
+        sys.stdout.flush()
+        
+        # Return the best validation loss and save best checkpoint (epoch)
+        best_loss_val = save_best_checkpoint(model, optimizer, scheduler, epoch, epoch_loss_val, best_loss_val, pathmaster)
+        
+        # Update line
+        sys.stderr.flush()
+        print("\n======> Epoch: {}/{}, Training Loss: {:.4f}, Validation Loss: {:.4f}".format(epoch, n_epochs-1, epoch_loss_train, epoch_loss_val))
+        print('\n===========================================================================================')
+        sys.stdout.flush()
+        
+        # Add epoch predictions
+        predictions_epoch_train = np.array(torch.cat(predictions_epoch_train, dim=0).to('cpu'))
+        predictions_epoch_val = np.array(torch.cat(predictions_epoch_val, dim=0).to('cpu'))
+        
+        predictions_list_train.append(predictions_epoch_train)
+        predictions_list_val.append(predictions_epoch_val)
+        
+        # Add epoch time to runtime_list
+        end_time = time.time()
+        time_passed = end_time-start_time # in seconds
+        runtime_list.append(time_passed)
+        
+        # Call the early stopping callback
+        if early_stopping_callback(epoch, epoch_loss_val):
+            break
+    
+    # Convert true label list into array
+    true_labels_train = np.array(torch.cat(true_labels_list_train, dim=0).to('cpu'))
+    true_labels_val = np.array(torch.cat(true_labels_list_val, dim=0).to('cpu'))
+    
+    if save:
+        title = 'Training and Validation Loss'
+        plot_save_func.train_val_loss(losslist_train, losslist_val, title, save, pathmaster)
+        
+        title = 'Training and Validation Accuracy'
+        plot_save_func.accuracy_curves(true_labels_train, true_labels_val, predictions_list_train, predictions_list_val, title, save, pathmaster)
+        
+        plot_save_func.save_losslists(losslist_train, losslist_val, pathmaster)
+        plot_save_func.save_runtime_list(runtime_list, pathmaster)
+       
+
+def best_DenseNet_2fold(fold1_loader, fold2_loader, model_type=torch.float32, n_classes=3, save=False, pathmaster=None):
+    print('\n===========================================================================================')
+    
+    # If GPU is available, use GPU, else use CPU
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    
+    # Get paths
+    checkpoints_path = pathmaster.checkpoints_path()
+    
+    # Load model hyperparameters
+    depth, growth_rate, compression, bottleneck, drop_rate = load_hyperparameters(pathmaster)
+    # When testing on the test set, drop_rate should always be 0
+    
+    # Define img_channels
+    img_channels = 1
+    
+    # Initialize model
+    model_fold1 = DenseNet(img_channels=img_channels, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                    compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device, dtype=model_type)
+    model_fold2 = DenseNet(img_channels=img_channels, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                    compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device, dtype=model_type)
+    
+    # Create criterion for loss
+    criterion = nn.CrossEntropyLoss()
+    
+    # If checkpoint is not specified, terminate the function
+    checkpoint_path = os.path.join(checkpoints_path, 'checkpoint_' + pathmaster.file_tag + '.pt')
+    assert os.path.exists(checkpoint_path), 'Function terminated. Not a valid checkpoint path.'
+    
+    # Load models
+    model_fold1, model_fold2 = load_model_2fold(model_fold1, model_fold2, pathmaster)
+      
+    # Fold 1 =======================================================================================================================================================================
+    # Initialize true label lists
+    true_labels_list_fold1 = []
+        
+    # Intialize output (prediction) lists
+    predictions_list_fold1 = []
+    prediction_proba_list_fold1 = []
+        
+    # Validation
+    model_fold1.eval()
+    cum_loss_fold1 = 0
+    with torch.no_grad(): # Disable gradient computation during validation
+        sys.stdout.flush()
+        for data_batch in tqdm(fold2_loader, total=len(fold2_loader), desc='Testing Fold #1', unit='batch', leave=False):
+            sys.stderr.flush()
+                
+            # Extract input and labels
+            X = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[1], data_batch['data'].shape[-1], data_batch['data'].shape[-1]).to(device=device)
+            Y = data_batch['label'].to(device=device)
+            true_labels_list_fold1.append(torch.reshape(Y, (-1,1)))
+            
+            # Forward pass
+            logits, predictions, prediction_proba = model_fold1(X)
+            predictions_list_fold1.append(torch.reshape(predictions, (-1,1)))
+            prediction_proba_list_fold1.append(torch.reshape(prediction_proba, (-1,n_classes)))
+                
+            # Calculate sum of total loss for epoch
+            cum_loss_fold1 += criterion(logits.float(), Y.long()).item() # Criterion returns a scalar tensor
+            
+    # Calculate loss for validation
+    loss_fold1 = cum_loss_fold1 / len(fold2_loader)
+    
+    # Convert true label list into array
+    true_labels_fold1 = np.array(torch.cat(true_labels_list_fold1, dim=0).to('cpu'))
+    
+    # Convert the output lists into arrays and concatenate along dim=0 (rows)
+    predictions_fold1 = np.array(torch.cat(predictions_list_fold1, dim=0).to('cpu'))
+    prediction_proba_fold1 = np.array(torch.cat(prediction_proba_list_fold1, dim=0).to('cpu'))
+    
+    # Fold 2 =======================================================================================================================================================================
+    # Initialize true label lists
+    true_labels_list_fold2 = []
+        
+    # Intialize output (prediction) lists
+    predictions_list_fold2 = []
+    prediction_proba_list_fold2 = []
+        
+    # Validation
+    model_fold2.eval()
+    cum_loss_fold2 = 0
+    with torch.no_grad(): # Disable gradient computation during validation
+        sys.stdout.flush()
+        for data_batch in tqdm(fold1_loader, total=len(fold1_loader), desc='Testing Fold #2', unit='batch', leave=False):
+            sys.stderr.flush()
+                
+            # Extract input and labels
+            X = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[1], data_batch['data'].shape[-1], data_batch['data'].shape[-1]).to(device=device)
+            Y = data_batch['label'].to(device=device)
+            true_labels_list_fold2.append(torch.reshape(Y, (-1,1)))
+            
+            # Forward pass
+            logits, predictions, prediction_proba = model_fold2(X)
+            predictions_list_fold2.append(torch.reshape(predictions, (-1,1)))
+            prediction_proba_list_fold2.append(torch.reshape(prediction_proba, (-1,n_classes)))
+                
+            # Calculate sum of total loss for epoch
+            cum_loss_fold2 += criterion(logits.float(), Y.long()).item() # Criterion returns a scalar tensor
+            
+    # Calculate loss for validation
+    loss_fold2 = cum_loss_fold2 / len(fold1_loader)
+    
+    # Convert true label list into array
+    true_labels_fold2 = np.array(torch.cat(true_labels_list_fold2, dim=0).to('cpu'))
+    
+    # Convert the output lists into arrays and concatenate along dim=0 (rows)
+    predictions_fold2 = np.array(torch.cat(predictions_list_fold2, dim=0).to('cpu'))
+    prediction_proba_fold2 = np.array(torch.cat(prediction_proba_list_fold2, dim=0).to('cpu'))
+    # ==============================================================================================================================================================================
+    
+    # Create overall lists
+    true_labels = np.concatenate((true_labels_fold1, true_labels_fold2), axis=0)
+    predictions = np.concatenate((predictions_fold1, predictions_fold2), axis=0)
+    prediction_proba = np.concatenate((prediction_proba_fold1, prediction_proba_fold2), axis=0)
+    
+    # Print mean validation loss
+    mean_loss = (loss_fold1 + loss_fold2) / 2
+    print('\n=====> Fold #1 Loss: %.4f' % loss_fold1)
+    print('=====> Fold #2 Loss: %.4f' % loss_fold2)
+    print('=====> Mean Loss: %.4f' % mean_loss)
+
+    # Saving
+    if save:
+        from sklearn.metrics import confusion_matrix
+        conf_matrix = confusion_matrix(true_labels, predictions)
+        title = 'Cross-Validation Confusion Matrix'
+        plot_save_func.conf_matrix(conf_matrix, title, save, pathmaster)
+        
+        plot_save_func.save_labels(true_labels, pathmaster)
+        plot_save_func.save_predictions(predictions, pathmaster)
+        plot_save_func.save_prediction_proba(prediction_proba, pathmaster)
+        plot_save_func.metrics_2fold(true_labels_fold1, true_labels_fold2, predictions_fold1, predictions_fold2, prediction_proba_fold1, prediction_proba_fold2, save, pathmaster)
+        
+        clf_names = ['Fold #1', 'Fold #2', 'Combined']
+        plot_save_func.mean_roc_curves([true_labels_fold1, true_labels_fold2], [prediction_proba_fold1, prediction_proba_fold2], clf_names, save, pathmaster)
+
+
+# Utilizes test() function        
+def best_DenseNet_2fold_func(fold1_loader, fold2_loader, model_type=torch.float32, n_classes=3, save=False, pathmaster=None):
+    print('\n===========================================================================================')
+    
+    # If GPU is available, use GPU, else use CPU
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    
+    # Get paths
+    checkpoints_path = pathmaster.checkpoints_path()
+    
+    # Load model hyperparameters
+    depth, growth_rate, compression, bottleneck, drop_rate, _ = load_hyperparameters(pathmaster)
+    # When testing on the test set, drop_rate should always be 0
+    
+    # Define img_channels
+    img_channels = 1
+    
+    # Initialize model
+    model_fold1 = DenseNet(img_channels=img_channels, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                    compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device, dtype=model_type)
+    model_fold2 = DenseNet(img_channels=img_channels, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                    compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device, dtype=model_type)
+    
+    # Create criterion for loss
+    criterion = nn.CrossEntropyLoss()
+    
+    # If checkpoint is not specified, terminate the function
+    checkpoint_path = os.path.join(checkpoints_path, 'checkpoint_' + pathmaster.file_tag + '.pt')
+    assert os.path.exists(checkpoint_path), 'Function terminated. Not a valid checkpoint path.'
+    
+    # Load models
+    model_fold1, model_fold2 = load_model_2fold(model_fold1, model_fold2, pathmaster)
+    
+    # Validation
+    with torch.no_grad(): # Disable gradient computation during validation
+        sys.stdout.flush()
+        
+        true_labels_fold1, predictions_fold1, prediction_proba_fold1, loss_fold1 = test(model_fold1, fold1_loader, criterion, n_classes)
+        true_labels_fold2, predictions_fold2, prediction_proba_fold2, loss_fold2 = test(model_fold2, fold2_loader, criterion, n_classes)
+    
+    # Create overall lists
+    true_labels = true_labels_fold1 + true_labels_fold2
+    predictions = predictions_fold1 + predictions_fold2
+    prediction_proba = prediction_proba_fold1 + prediction_proba_fold2
+    
+    # Print mean validation loss
+    mean_loss = (loss_fold1 + loss_fold2) / 2
+    print('\n======> Fold #1 Loss: %.4f' % loss_fold1)
+    print('=====> Fold #2 Loss: %.4f' % loss_fold2)
+    print('======> Mean Loss: %.4f' % mean_loss)
+
+    # Saving
+    if save:
+        from sklearn.metrics import confusion_matrix
+        conf_matrix = confusion_matrix(true_labels, predictions)
+        title = 'Cross-Validation Confusion Matrix'
+        plot_save_func.conf_matrix(conf_matrix, title, save, pathmaster)
+        
+        plot_save_func.save_labels(true_labels, pathmaster)
+        plot_save_func.save_predictions(predictions, pathmaster)
+        plot_save_func.save_prediction_proba(prediction_proba, pathmaster)
+        plot_save_func.metrics_2fold(true_labels_fold1, true_labels_fold2, predictions_fold1, predictions_fold2, save, pathmaster)
+        
+        clf_names = ['Fold #1', 'Fold #2', 'Combined']
+        plot_save_func.mean_roc_curves([true_labels_fold1, true_labels_fold2], [prediction_proba_fold1, prediction_proba_fold2], clf_names, save, pathmaster)
+    
+    
+def best_DenseNet(data_loader, model_type=torch.float32, n_classes=3, save=False, pathmaster=None):
+    print('\n===========================================================================================')
+    
+    # If GPU is available, use GPU, else use CPU
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    
+    # Get paths
+    checkpoints_path = pathmaster.checkpoints_path()
+    
+    # Load model hyperparameters
+    depth, growth_rate, compression, bottleneck, drop_rate, class_weights = load_hyperparameters(pathmaster)
+    # When testing on the test set, drop_rate should always be 0
+    
+    # Initialize model
+    model = DenseNet(img_channels=1, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                  compression=compression, bottleneck=bottleneck, drop_rate=drop_rate).to(device=device, dtype=model_type)
+    
+    # Create criterion for loss
+    criterion = nn.CrossEntropyLoss()
+    
+    # If checkpoint is not specified, terminate the function
+    checkpoint_path = os.path.join(checkpoints_path, 'checkpoint_' + pathmaster.file_tag + '.pt')
+    assert os.path.exists(checkpoint_path), 'Function terminated. Not a valid checkpoint path.'
+    
+    # Load model
+    model = load_model(model, pathmaster)
+    
+    # Initialize true label lists
+    true_labels_list = []
+    
+    # Intialize output (prediction) lists
+    predictions_list = []
+    prediction_proba_list = []
+    
+    # Evaluation
+    model.eval()
+    cum_loss = 0
+    with torch.no_grad(): # Disable gradient computation during validation
+        sys.stdout.flush()
+        for data_batch in tqdm(data_loader, total=len(data_loader), desc='Testing', unit='batch', leave=False):
+            sys.stderr.flush()
+                
+            # Extract input and labels
+            X = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[1], data_batch['data'].shape[-1], data_batch['data'].shape[-1]).to(device=device)
+            Y = data_batch['label'].to(device=device)
+            true_labels_list.append(torch.reshape(Y, (-1,1)))
+            
+            # Forward pass
+            logits, predictions, prediction_proba = model(X)
+            predictions_list.append(torch.reshape(predictions, (-1,1)))
+            prediction_proba_list.append(torch.reshape(prediction_proba, (-1,n_classes)))
+                
+            # Calculate sum of total loss for epoch
+            cum_loss += criterion(logits.float(), Y.long()).item() # Criterion returns a scalar tensor
+            
+    # Calculate loss for validation
+    loss = cum_loss / len(data_loader)
+    
+    # Convert true label list into array
+    true_labels = np.array(torch.cat(true_labels_list, dim=0).to('cpu'))
+    
+    # Convert the output lists into arrays and concatenate along dim=0 (rows)
+    predictions = np.array(torch.cat(predictions_list, dim=0).to('cpu'))
+    prediction_proba = np.array(torch.cat(prediction_proba_list, dim=0).to('cpu'))
+    
+    # Print validation loss
+    print('\n======> Loss: %.4f' % loss)
+
+    # Saving
+    if save:
+        from sklearn.metrics import confusion_matrix
+        conf_matrix = confusion_matrix(true_labels, predictions)
+        title = 'Evaluation Confusion Matrix'
+        plot_save_func.conf_matrix(conf_matrix, title, save, pathmaster)
+        
+        plot_save_func.save_labels(true_labels, pathmaster)
+        plot_save_func.save_predictions(predictions, pathmaster)
+        plot_save_func.save_prediction_proba(prediction_proba, pathmaster)
+        plot_save_func.metrics(true_labels, predictions, prediction_proba, save, pathmaster)
+        
+        plot_save_func.save_classification_report(true_labels, predictions, save, pathmaster)
+        plot_save_func.save_classification_report_imbalanced(true_labels, predictions, save, pathmaster)
+        
+        clf_names = ['Model']
+        plot_save_func.mean_roc_curves([true_labels], [prediction_proba], clf_names, save, pathmaster)
+        plot_save_func.roc_curves(true_labels, prediction_proba, save, pathmaster)
+        
+
+def best_DenseNet_config(data_loader, model_type=torch.float32, n_classes=3, save=False, pathmaster=None):
+    print('\n===========================================================================================')
+    
+    # If GPU is available, use GPU, else use CPU
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    
+    # Get paths
+    checkpoints_path = pathmaster.checkpoints_path()
+    
+    # Load model hyperparameters
+    depth, growth_rate, compression, bottleneck, drop_rate, _, _, num_dense_tran, _, activation = load_hyperparameters_random_search(pathmaster)
+    # When testing on the test set, drop_rate, class_weights, learning_rate, and lambda_l1 are not needed
+    
+    # Initialize model
+    model = DenseNet_config(img_channels=1, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                  compression=compression, bottleneck=bottleneck, drop_rate=drop_rate,
+                  activation=activation, num_dense_tran=num_dense_tran).to(device=device, dtype=model_type)
+    
+    # Create criterion for loss
+    criterion = nn.CrossEntropyLoss()
+    
+    # If checkpoint is not specified, terminate the function
+    checkpoint_path = os.path.join(checkpoints_path, 'checkpoint_' + pathmaster.file_tag + '.pt')
+    assert os.path.exists(checkpoint_path), 'Function terminated. Not a valid checkpoint path.'
+    
+    # Load model
+    model = load_model(model, pathmaster)
+    
+    # Initialize true label lists
+    true_labels_list = []
+    
+    # Intialize output (prediction) lists
+    predictions_list = []
+    prediction_proba_list = []
+    
+    # # Initialize segment names list
+    # segment_names_list = []
+    
+    # Evaluation
+    model.eval()
+    cum_loss = 0
+    with torch.no_grad(): # Disable gradient computation during validation
+        sys.stdout.flush()
+        for data_batch in tqdm(data_loader, total=len(data_loader), desc='Testing', unit='batch', leave=False):
+            sys.stderr.flush()
+                
+            # Extract input and labels
+            X = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[1], data_batch['data'].shape[-1], data_batch['data'].shape[-1]).to(device=device)
+            Y = data_batch['label'].to(device=device)
+            # Z = data_batch['segment_name']
+            # segment_names_list.append(Z)
+            true_labels_list.append(torch.reshape(Y, (-1,1)))
+            
+            # Forward pass
+            logits, predictions, prediction_proba = model(X)
+            predictions_list.append(torch.reshape(predictions, (-1,1)))
+            prediction_proba_list.append(torch.reshape(prediction_proba, (-1,n_classes)))
+                
+            # Calculate sum of total loss for epoch
+            cum_loss += criterion(logits.float(), Y.long()).item() # Criterion returns a scalar tensor
+            
+    # Calculate loss for validation
+    loss = cum_loss / len(data_loader)
+    
+    # Convert true label list into array
+    true_labels = np.array(torch.cat(true_labels_list, dim=0).to('cpu'))
+    
+    # # Convert segment names list into array
+    # segment_names = np.concatenate(segment_names_list, axis=0)
+    # segment_names = segment_names.reshape(-1,1)
+    
+    # Convert the output lists into arrays and concatenate along dim=0 (rows)
+    predictions = np.array(torch.cat(predictions_list, dim=0).to('cpu'))
+    prediction_proba = np.array(torch.cat(prediction_proba_list, dim=0).to('cpu'))
+    
+    # Print validation loss
+    print('\n======> Loss: %.4f' % loss)
+
+    # Saving
+    if save:
+        # pathmaster.set_file_tag(pathmaster.file_tag + '_test')
+        from sklearn.metrics import confusion_matrix
+        conf_matrix = confusion_matrix(true_labels, predictions)
+        title = 'Evaluation Confusion Matrix'
+        plot_save_func.conf_matrix(conf_matrix, title, save, pathmaster)
+        
+        plot_save_func.save_labels(true_labels, pathmaster)
+        # plot_save_func.save_labels(np.hstack([segment_names, true_labels]), pathmaster)
+        plot_save_func.save_predictions(predictions, pathmaster)
+        plot_save_func.save_prediction_proba(prediction_proba, pathmaster)
+        plot_save_func.metrics(true_labels, predictions, prediction_proba, save, pathmaster)
+        
+        plot_save_func.save_classification_report(true_labels, predictions, save, pathmaster)
+        plot_save_func.save_classification_report_imbalanced(true_labels, predictions, save, pathmaster)
+        
+        clf_names = ['Model']
+        plot_save_func.mean_roc_curves([true_labels], [prediction_proba], clf_names, save, pathmaster)
+        plot_save_func.roc_curves(true_labels, prediction_proba, save, pathmaster)
+        
+
+def best_DenseNet_config_binary(data_loader, model_type=torch.float32, n_classes=3, save=False, pathmaster=None):
+    print('\n===========================================================================================')
+    
+    # If GPU is available, use GPU, else use CPU
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    
+    # Get paths
+    checkpoints_path = pathmaster.checkpoints_path()
+    
+    # Load model hyperparameters
+    depth, growth_rate, compression, bottleneck, drop_rate, _, _, num_dense_tran, _, activation = load_hyperparameters_random_search(pathmaster)
+    # When testing on the test set, drop_rate, class_weights, learning_rate, and lambda_l1 are not needed
+    
+    # Initialize model
+    model = DenseNet_config(img_channels=1, depth=depth, n_classes=n_classes, growth_rate=growth_rate, 
+                  compression=compression, bottleneck=bottleneck, drop_rate=drop_rate,
+                  activation=activation, num_dense_tran=num_dense_tran).to(device=device, dtype=model_type)
+    
+    # Create criterion for loss
+    criterion = nn.CrossEntropyLoss()
+    
+    # If checkpoint is not specified, terminate the function
+    checkpoint_path = os.path.join(checkpoints_path, 'checkpoint_' + pathmaster.file_tag + '.pt')
+    assert os.path.exists(checkpoint_path), 'Function terminated. Not a valid checkpoint path.'
+    
+    # Load model
+    model = load_model(model, pathmaster)
+    
+    # Initialize true label lists
+    true_labels_list = []
+    
+    # Intialize output (prediction) lists
+    predictions_list = []
+    prediction_proba_list = []
+    
+    # Evaluation
+    model.eval()
+    cum_loss = 0
+    with torch.no_grad(): # Disable gradient computation during validation
+        sys.stdout.flush()
+        for data_batch in tqdm(data_loader, total=len(data_loader), desc='Testing', unit='batch', leave=False):
+            sys.stderr.flush()
+                
+            # Extract input and labels
+            X = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[1], data_batch['data'].shape[-1], data_batch['data'].shape[-1]).to(device=device)
+            Y = data_batch['label'].to(device=device)
+            true_labels_list.append(torch.reshape(Y, (-1,1)))
+            
+            # Forward pass
+            logits, predictions, prediction_proba = model(X)
+            predictions_list.append(torch.reshape(predictions, (-1,1)))
+            prediction_proba_list.append(torch.reshape(prediction_proba, (-1,n_classes)))
+                
+            # Calculate sum of total loss for epoch
+            cum_loss += criterion(logits.float(), Y.long()).item() # Criterion returns a scalar tensor
+            
+    # Calculate loss for validation
+    loss = cum_loss / len(data_loader)
+    
+    # Convert true label list into array
+    true_labels = np.array(torch.cat(true_labels_list, dim=0).to('cpu'))
+    
+    # Convert the output lists into arrays and concatenate along dim=0 (rows)
+    predictions = np.array(torch.cat(predictions_list, dim=0).to('cpu'))
+    prediction_proba = np.array(torch.cat(prediction_proba_list, dim=0).to('cpu'))
+    
+    # Print validation loss
+    print('\n======> Loss: %.4f' % loss)
+
+    # Saving
+    if save:
+        # pathmaster.set_file_tag(pathmaster.file_tag + '_test')
+        from sklearn.metrics import confusion_matrix
+        conf_matrix = confusion_matrix(true_labels, predictions)
+        title = 'Evaluation Confusion Matrix'
+        plot_save_func.conf_matrix(conf_matrix, title, save, pathmaster, class_names=['non-AF', 'AF'])
+        
+        plot_save_func.save_labels(true_labels, pathmaster)
+        plot_save_func.save_predictions(predictions, pathmaster)
+        plot_save_func.save_prediction_proba_binary(prediction_proba, pathmaster)
+        plot_save_func.metrics_binary(true_labels, predictions, prediction_proba, save, pathmaster)
+        
+        plot_save_func.save_classification_report(true_labels, predictions, save, pathmaster)
+        plot_save_func.save_classification_report_imbalanced(true_labels, predictions, save, pathmaster)
+        
+        plot_save_func.roc_curves_binary(true_labels, prediction_proba, save, pathmaster, class_names=['non-AF', 'AF'])
+
+
+def train(model, dataloader, optimizer, scheduler, criterion, regularization):
+    model.train()
+    cum_loss = 0
+    for data_batch in tqdm(dataloader, total=len(dataloader), desc='Training', unit='batch', leave=False):
+        # Extract input and labels
+        X_train = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[1], data_batch['data'].shape[-1], data_batch['data'].shape[-1]).to(device=device)
+        Y_train = data_batch['label'].to(device=device)
+
+        # Forward pass
+        logits, _, _ = model(X_train)
+
+        # Regularization (if applicable)
+        l1 = 0
+        for p in model.parameters():
+            l1 = l1 + p.abs().sum()
+
+        # Calculate total loss with regularization
+        batch_loss_train = criterion(logits.to(torch.float32), Y_train.long()) + regularization * l1
+        cum_loss += batch_loss_train.item()
+
+        # Clear gradients
+        optimizer.zero_grad(set_to_none=True)
+
+        # Backwards pass
+        batch_loss_train.backward()
+
+        # Optimizer step
+        optimizer.step()
+    
+    # Update scheduler
+    scheduler.step()
+
+    epoch_loss = cum_loss / len(dataloader)
+
+    return model, optimizer, scheduler, epoch_loss
+
+
+def validate(model, dataloader, criterion):
+    model.eval()
+    with torch.no_grad():
+        cum_loss = 0
+        for data_batch in tqdm(dataloader, total=len(dataloader), desc='Validation', unit='batch', leave=False):
+            X_val = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[1], data_batch['data'].shape[-1], data_batch['data'].shape[-1]).to(device=device)
+            Y_val = data_batch['label'].to(device=device)
+
+            logits, _, _ = model(X_val)
+            cum_loss += criterion(logits.float(), Y_val.long()).item()
+
+    epoch_loss = cum_loss / len(dataloader)
+
+    return epoch_loss
+
+
+def test(model, dataloader, criterion, n_classes):
+    # Initialize true label lists
+    true_labels_list = []
+    
+    # Intialize output (prediction) lists
+    predictions_list = []
+    prediction_proba_list = []
+    
+    # Validation
+    model.eval()
+    cum_loss = 0
+    with torch.no_grad(): # Disable gradient computation during validation
+        sys.stdout.flush()
+        for data_batch in tqdm(dataloader, total=len(dataloader), desc='Testing', unit='batch', leave=False):
+            sys.stderr.flush()
+                
+            # Extract input and labels
+            X = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[1], data_batch['data'].shape[-1], data_batch['data'].shape[-1]).to(device=device)
+            Y = data_batch['label'].to(device=device)
+            true_labels_list.append(torch.reshape(Y, (-1,1)))
+            
+            # Forward pass
+            logits, predictions, prediction_proba = model(X)
+            predictions_list.append(torch.reshape(predictions, (-1,1)))
+            prediction_proba_list.append(torch.reshape(prediction_proba, (-1,n_classes)))
+            
+            # Calculate sum of total loss for epoch
+            cum_loss += criterion(logits.float(), Y.long()).item() # Criterion returns a scalar tensor
+            
+    # Calculate loss for validation
+    loss = cum_loss / len(dataloader)
+    
+    # Convert true label list into array
+    true_labels = np.array(torch.cat(true_labels_list, dim=0).to('cpu'))
+    
+    # Convert the output lists into arrays and concatenate along dim=0 (rows)
+    predictions = np.array(torch.cat(predictions_list, dim=0).to('cpu'))
+    prediction_proba = np.array(torch.cat(prediction_proba_list, dim=0).to('cpu'))
+    
+    return true_labels, predictions, prediction_proba, loss
+
+
+class IdentityScheduler(torch.optim.lr_scheduler._LRScheduler):
+    def __init__(self, optimizer, last_epoch=-1):
+        super(IdentityScheduler, self).__init__(optimizer, last_epoch)
+
+    def get_lr(self):
+        # Returns the current learning rate without any modifications.
+        return self.base_lrs            
+
+
+def save_checkpoint(model, optimizer, scheduler, epoch, loss, checkpoint_path): # Will also be called to save the most recent checkpoint locally in the runtime so I always have the most recent checkpoint
+    torch.save({
+        'model_state_dict': model.state_dict(),
+        'optimizer_state_dict': optimizer.state_dict(),
+        'scheduler_state_dict': scheduler.state_dict() if scheduler else IdentityScheduler(optimizer).state_dict(),  # Create identity scheduler if missing, actually doesn't work since the parameter is required
+        'epoch': epoch,
+        'loss': loss
+        }, checkpoint_path)
+
+def save_checkpoint_2fold(model_fold1, model_fold2, optimizer_fold1, optimizer_fold2, scheduler_fold1, scheduler_fold2, epoch, loss, checkpoint_path): # Will also be called to save the most recent checkpoint locally in the runtime so I always have the most recent checkpoint
+    torch.save({
+        'model_fold1_state_dict': model_fold1.state_dict(),
+        'model_fold2_state_dict': model_fold2.state_dict(),
+        'optimizer_fold1_state_dict': optimizer_fold1.state_dict(),
+        'optimizer_fold2_state_dict': optimizer_fold2.state_dict(),
+        'scheduler_fold1_state_dict': scheduler_fold1.state_dict(),
+        'scheduler_fold2_state_dict': scheduler_fold2.state_dict(),
+        'epoch': epoch,
+        'loss': loss
+        }, checkpoint_path)    
+
+def save_best_checkpoint(model, optimizer, scheduler, epoch, current_loss, best_loss, pathmaster): # When training the model, best_loss should be initialized to float.('inf')
+    # Might be good to have two different checkpoint paths, one for the best and one for the most recent checkpoint, maybe also have temp vs permanent checkpoint paths    
+    if current_loss < best_loss:
+        checkpoints_path = pathmaster.checkpoints_path()
+        checkpoint_path = os.path.join(checkpoints_path, 'checkpoint_' + pathmaster.file_tag + '.pt')
+        best_loss = current_loss
+        save_checkpoint(model, optimizer, scheduler, epoch, best_loss, checkpoint_path)
+        print('\nNew checkpoint with better loss was saved!')
+        
+        return best_loss
+    else:
+        return best_loss
+
+
+def save_best_checkpoint_2fold(model_fold1, model_fold2, optimizer_fold1, optimizer_fold2, scheduler_fold1, scheduler_fold2, epoch, current_loss, best_loss, pathmaster): # When training the model, best_loss should be initialized to float.('inf')
+    # Might be good to have two different checkpoint paths, one for the best and one for the most recent checkpoint, maybe also have temp vs permanent checkpoint paths    
+    if current_loss < best_loss:
+        checkpoints_path = pathmaster.checkpoints_path()
+        checkpoint_path = os.path.join(checkpoints_path, 'checkpoint_' + pathmaster.file_tag + '.pt')
+        best_loss = current_loss
+        save_checkpoint_2fold(model_fold1, model_fold2, optimizer_fold1, optimizer_fold2, scheduler_fold1, scheduler_fold2, epoch, best_loss, checkpoint_path)
+        print('\nNew checkpoint with better loss was saved!')
+        
+        return best_loss
+    else:
+        return best_loss
+
+
+def load_checkpoint(model, optimizer, scheduler, pathmaster):
+    checkpoints_path = pathmaster.checkpoints_path()
+    checkpoint_path = os.path.join(checkpoints_path, 'checkpoint_' + pathmaster.file_tag + '.pt')
+    if os.path.exists(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path, map_location=device)
+        
+        model.load_state_dict(checkpoint['model_state_dict'])
+        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
+        start_epoch = checkpoint['epoch']
+        loss = checkpoint['loss']
+        
+        print('\nCheckpoint loaded!')
+        # print(f'Resuming training from epoch {start_epoch}, batch {start_batch}')
+        
+        return model, optimizer, scheduler, start_epoch, loss
+    else:
+        print('\nError! Checkpoint does not exist!')
+        
+
+def load_checkpoint_2fold(model_fold1, model_fold2, optimizer_fold1, optimizer_fold2, scheduler_fold1, scheduler_fold2, pathmaster):
+    checkpoints_path = pathmaster.checkpoints_path()
+    checkpoint_path = os.path.join(checkpoints_path, 'checkpoint_' + pathmaster.file_tag + '.pt')
+    if os.path.exists(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path, map_location=device)
+        
+        model_fold1.load_state_dict(checkpoint['model_fold1_state_dict'])
+        optimizer_fold1.load_state_dict(checkpoint['optimizer_fold1_state_dict'])
+        scheduler_fold1.load_state_dict(checkpoint['scheduler_fold1_state_dict'])
+        
+        model_fold2.load_state_dict(checkpoint['model_fold2_state_dict'])
+        optimizer_fold2.load_state_dict(checkpoint['optimizer_fold2_state_dict'])
+        scheduler_fold2.load_state_dict(checkpoint['scheduler_fold2_state_dict'])
+        
+        start_epoch = checkpoint['epoch']
+        loss = checkpoint['loss']
+        
+        print('\nCheckpoint loaded!')
+        # print(f'Resuming training from epoch {start_epoch}, batch {start_batch}')
+        
+        return model_fold1, optimizer_fold1, scheduler_fold1, model_fold2, optimizer_fold2, scheduler_fold2, start_epoch, loss
+    else:
+        print('\nError! Checkpoint does not exist!')
+        
+
+def load_model_2fold(model_fold1, model_fold2, pathmaster):
+    checkpoints_path = pathmaster.checkpoints_path()
+    checkpoint_path = os.path.join(checkpoints_path, 'checkpoint_' + pathmaster.file_tag + '.pt')
+    if os.path.exists(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path, map_location=device)
+        
+        model_fold1.load_state_dict(checkpoint['model_fold1_state_dict'])
+        model_fold2.load_state_dict(checkpoint['model_fold2_state_dict'])
+        
+        print('\nModels loaded!')
+        # print(f'Resuming training from epoch {start_epoch}, batch {start_batch}')
+        
+        return model_fold1, model_fold2
+    else:
+        print('\nError! Models do not exist!')
+
+
+def load_model(model, pathmaster):
+    checkpoints_path = pathmaster.checkpoints_path()
+    checkpoint_path = os.path.join(checkpoints_path, 'checkpoint_' + pathmaster.file_tag + '.pt')
+    if os.path.exists(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path, map_location=device)
+        
+        model.load_state_dict(checkpoint['model_state_dict'])
+        
+        print('\nModel loaded!')
+        # print(f'Resuming training from epoch {start_epoch}, batch {start_batch}')
+        
+        return model
+    else:
+        print('\nError! Model does not exist!')
+        
+
+def load_hyperparameters(pathmaster):
+    hyperparameters_path = pathmaster.hyperparameters_path()
+    
+    # Extract model hyperparameters
+    model_hyperparameters_file = os.path.join(hyperparameters_path, 'hyperparameters_' + pathmaster.file_tag + '.csv')
+    model_hyperparameters = pd.read_csv(model_hyperparameters_file)
+    depth = int(model_hyperparameters['depth'].iloc[0])
+    growth_rate = int(model_hyperparameters['growth_rate'].iloc[0])
+    compression = model_hyperparameters['compression'].iloc[0]
+    bottleneck = model_hyperparameters['bottleneck'].iloc[0]
+    drop_rate = model_hyperparameters['drop_rate'].iloc[0]
+    class_weights = model_hyperparameters['class_weights']
+    
+    return depth, growth_rate, compression, bottleneck, drop_rate, class_weights
+
+
+def load_hyperparameters_random_search(pathmaster):
+    hyperparameters_path = pathmaster.hyperparameters_path()
+    
+    # Extract model hyperparameters
+    model_hyperparameters_file = os.path.join(hyperparameters_path, 'hyperparameters_' + pathmaster.file_tag + '.csv')
+    model_hyperparameters = pd.read_csv(model_hyperparameters_file)
+    depth = int(model_hyperparameters['depth'].iloc[0])
+    growth_rate = int(model_hyperparameters['growth_rate'].iloc[0])
+    compression = model_hyperparameters['compression'].iloc[0]
+    bottleneck = model_hyperparameters['bottleneck'].iloc[0]
+    drop_rate = model_hyperparameters['drop_rate'].iloc[0]
+    class_weights = model_hyperparameters['class_weights']
+    learning_rate = model_hyperparameters['learning_rate'].iloc[0]
+    num_dense_tran = int(model_hyperparameters['num_dense_tran'].iloc[0])
+    lambda_l1 = model_hyperparameters['lambda_l1'].iloc[0]
+    activation = string_to_activation((model_hyperparameters['activation'].iloc[0]))
+    
+    return depth, growth_rate, compression, bottleneck, drop_rate, class_weights, learning_rate, num_dense_tran, lambda_l1, activation
+
+
+def string_to_activation(activation_string):
+    activation_map = {
+        'relu': nn.ReLU(),
+        'leaky_relu': nn.LeakyReLU(),
+        'sigmoid': nn.Sigmoid(),
+        'tanh': nn.Tanh(),
+        'softmax': nn.Softmax(),
+        'softplus': nn.Softplus(),
+        'softshrink': nn.Softshrink(),
+        'softmin': nn.Softmin(),
+        'log_softmax': nn.LogSoftmax(),
+        'elu': nn.ELU(),
+        'prelu': nn.PReLU(),
+        'relu6': nn.ReLU6(),
+        'rrelu': nn.RReLU(),
+        'celu': nn.CELU(),
+        'selu': nn.SELU(),
+        'gelu': nn.GELU(),
+        'silu': nn.SiLU(),
+        # Add more activation functions if needed
+    }
+    
+    return activation_map.get(activation_string, None)
+
+
+def activation_to_string(activation_func):
+    activation_map = {
+        nn.ReLU: 'relu',
+        nn.LeakyReLU: 'leaky_relu',
+        nn.Sigmoid: 'sigmoid',
+        nn.Tanh: 'tanh',
+        nn.Softmax: 'softmax',
+        nn.Softplus: 'softplus',
+        nn.Softshrink: 'softshrink',
+        nn.Softmin: 'softmin',
+        nn.LogSoftmax: 'log_softmax',
+        nn.ELU: 'elu',
+        nn.PReLU: 'prelu',
+        nn.ReLU6: 'relu6',
+        nn.RReLU: 'rrelu',
+        nn.CELU: 'celu',
+        nn.SELU: 'selu',
+        nn.GELU: 'gelu',
+        nn.SiLU: 'silu',
+        # Add more activation functions if needed
+    }
+    
+    return activation_map.get(activation_func.__class__, 'unknown')
+
+
+class EarlyStoppingCallback:
+    def __init__(self, patience=10):
+        self.patience = patience
+        self.best_loss = float('inf')
+        self.counter = 0
+        self.best_epoch = 0
+
+    def __call__(self, epoch, current_loss):
+        if current_loss < self.best_loss:
+            self.best_loss = current_loss
+            self.counter = 0
+            self.best_epoch = epoch
+        else:
+            self.counter += 1
+            if self.counter >= self.patience:
+                print(f"\nEarly stopping at epoch {epoch}. No improvement for {self.patience} epochs.")
+                
+                return True
+        
+        return False
\ No newline at end of file
diff --git a/utils/pathmaster.py b/utils/pathmaster.py
new file mode 100644
index 0000000..38c5718
--- /dev/null
+++ b/utils/pathmaster.py
@@ -0,0 +1,321 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Mar 4 13:04:27 2024
+
+@author: dchen
+"""
+import os
+
+class PathMaster():
+    def __init__(self, is_linux=False, is_hpc=False, is_tfs=True, is_internal=False, is_external=False, focus='misc', file_tag='temp', img_res='not_an_img_res'):
+        self.focus = focus
+        self.file_tag = file_tag
+        self.is_linux = is_linux
+        self.is_hpc = is_hpc
+        self.is_tfs = is_tfs
+        self.is_internal = is_internal
+        self.is_external = is_external
+        self.img_res = img_res
+        
+        # Select correct root saves path
+        if self.is_linux:
+            if self.is_tfs:
+                self.saves_path = '/mnt/R/ENGR_Chon/Darren/Honors_Thesis/saves_tfs/' + self.focus + '/'
+            else:
+                self.saves_path = '/mnt/R/ENGR_Chon/Darren/Honors_Thesis/saves_poincare/' + self.focus + '/'
+        elif self.is_hpc:
+            if self.is_tfs:
+                self.saves_path = '/gpfs/scratchfs1/hfp14002/dac20022/Honors_Thesis/saves_tfs/' + self.focus + '/'
+            else:
+                self.saves_path = '/gpfs/scratchfs1/hfp14002/dac20022/Honors_Thesis/saves_poincare/' + self.focus + '/'
+        else: # Using your own computer
+            if self.is_tfs:
+                self.saves_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Darren\Honors_Thesis\saves_tfs' + '\\' + self.focus + '\\'
+            else:
+                self.saves_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Darren\Honors_Thesis\saves_poincare' + '\\' + self.focus + '\\'
+
+
+    def set_saves_path(self, saves_path):
+        self.saves_path = saves_path
+
+
+    def set_file_tag(self, file_tag):
+        self.file_tag = file_tag
+        
+        
+    def set_focus(self, focus):
+        self.focus = focus
+
+
+    def data_paths(self, data_format):
+        if data_format == 'pt':
+            # Base path
+            if self.is_linux:
+                base_path = "/mnt/R/ENGR_Chon/Darren/NIH_PulseWatch"
+                labels_base_path = "/mnt/R/ENGR_Chon/Darren/NIH_Pulsewatch"
+                # labels_base_path = "/mnt/R/ENGR_Chon/NIH_Pulsewatch_Database/Adjudication_UConn"
+            elif self.is_hpc:
+                base_path = "/gpfs/scratchfs1/kic14002/doh16101"
+                labels_base_path = "/gpfs/scratchfs1/hfp14002/lrm22005"
+            else:
+                if self.is_internal:
+                    base_path = r'C:\\Chon_Lab\\NIH_Pulsewatch'
+                    labels_base_path = r'C:\\Chon_Lab\\NIH_Pulsewatch'
+                elif self.is_external:
+                    base_path = r'D:\\Chon_Lab\\NIH_Pulsewatch'
+                    labels_base_path = r'D:\\Chon_Lab\\NIH_Pulsewatch'
+                else:
+                    # R:\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch
+                    base_path = "R:\\ENGR_Chon\\Darren\\NIH_Pulsewatch" # Why double \\ before NIH_Pulsewatch_Database?
+                    labels_base_path = "R:\\ENGR_Chon\\Darren\\NIH_Pulsewatch" # Why double \\ before NIH_Pulsewatch_Database?      
+                    # labels_base_path = "R:\ENGR_Chon\\NIH_Pulsewatch_Database\Adjudication_UConn"      
+            
+            # Type path
+            if self.is_tfs:
+                format_path = 'TFS_pt'
+            else:
+                format_path = 'Poincare_pt'
+            
+            # Join paths
+            data_path = os.path.join(base_path, format_path, self.img_res)
+            
+        else:
+            if self.is_linux:
+                base_path = "/mnt/R/ENGR_Chon/Dong/MATLAB_generate_results/NIH_PulseWatch"
+                labels_base_path = "/mnt/R/ENGR_Chon/Darren/NIH_Pulsewatch"
+                # labels_base_path = "/mnt/R/ENGR_Chon/NIH_Pulsewatch_Database/Adjudication_UConn"
+            elif self.is_hpc:
+                base_path = "/gpfs/scratchfs1/kic14002/doh16101"
+                labels_base_path = "/gpfs/scratchfs1/hfp14002/lrm22005"
+            else:
+                # R:\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch
+                base_path = "R:\ENGR_Chon\Dong\MATLAB_generate_results\\NIH_PulseWatch" # Why double \\ before NIH_Pulsewatch_Database?
+                labels_base_path = "R:\ENGR_Chon\Darren\\NIH_Pulsewatch" # Why double \\ before NIH_Pulsewatch_Database?
+                # labels_base_path = "R:\ENGR_Chon\\NIH_Pulsewatch_Database\Adjudication_UConn"
+            
+            if data_format == 'csv':
+                if self.is_tfs:
+                    data_path = os.path.join(base_path, "TFS_csv")
+                else:
+                    data_path = os.path.join(base_path, "Poincare_Density_csv")
+            elif data_format == 'png':
+                if not self.is_tfs:
+                    print('No png image available for Density Poincare plot')
+                    return
+                data_path = os.path.join(base_path, "TFS_plots")
+            else:
+                raise ValueError("Invalid data format. Choose 'csv', 'png, or 'pt'.")
+        
+        # Complete labels path
+        # labels_path = os.path.join(labels_base_path, "final_attemp_4_1_Dong_Ohm_2024_02_18_copy")
+        labels_path = os.path.join(labels_base_path, "Ground_Truths")
+        
+        # Check if directories exist        
+        if not os.path.exists(data_path):
+            print("Data path does not exist")
+            return    
+        if not os.path.exists(labels_path):
+            print("Labels path does not exist")
+            return          
+
+        return data_path, labels_path
+    
+    
+    def smote_path(self, smote_type, split):
+        if self.is_internal:
+            base_path = r'C:\Chon_Lab\NIH_Pulsewatch'
+        elif self.is_external:
+            base_path = r'D:\Chon_Lab\NIH_Pulsewatch'
+        else:
+            # R:\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch
+            base_path = "R:\ENGR_Chon\Darren\\NIH_Pulsewatch" # Why double \\ before NIH_Pulsewatch_Database?
+
+        # Type path
+        if self.is_tfs:
+            format_path = 'TFS_pt'
+        else:
+            format_path = 'Poincare_pt'
+        
+        smote_path = os.path.join(base_path, format_path, smote_type, split)
+        
+        return smote_path
+    
+    
+    def deepbeat_paths(self):
+        if self.is_internal:
+            base_path = r'C:\Chon_Lab\Public_Database\DeepBeat\Concatenated_DeepBeat\test\Darren_conversion'
+        elif self.is_external:
+            base_path = r'D:\Chon_Lab\Public_Database\DeepBeat\Concatenated_DeepBeat\test\Darren_conversion'
+        else:
+            # R:\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch
+            base_path = r'R:\ENGR_Chon\Darren\Public_Database\DeepBeat\Concatenated_DeepBeat\test\Darren_conversion'
+
+        # Type path
+        if self.is_tfs:
+            format_path = 'tfs_float16_pt'
+        else:
+            format_path = 'poincare_float16_pt'
+        
+        data_path = os.path.join(base_path, format_path)
+        labels_path = os.path.join(base_path, 'DeepBeat_segment_names_labels_STFT.csv')
+        
+        return data_path, labels_path
+    
+    
+    def mimic3_paths(self):
+        if self.is_internal:
+            base_path = r'C:\Chon_Lab\Public_Database\PPG_PeakDet_MIMICIII\Darren_conversion'
+        elif self.is_external:
+            base_path = r'D:\Chon_Lab\Public_Database\PPG_PeakDet_MIMICIII\Darren_conversion'
+        else:
+            # R:\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch
+            base_path = r'R:\ENGR_Chon\Darren\Public_Database\PPG_PeakDet_MIMICIII\Darren_conversion'
+
+        # Type path
+        if self.is_tfs:
+            format_path = 'test_tfs_float16_pt'
+        else:
+            format_path = 'test_poincare_float16_pt'
+        
+        data_path = os.path.join(base_path, format_path)
+        labels_path = os.path.join(base_path, '2020_Han_Sensors_MIMICIII_Ground_Truth_STFT.csv')
+        
+        return data_path, labels_path
+    
+    
+    def simband_paths(self):
+        if self.is_internal:
+            base_path = r'C:\Chon_Lab\Public_Database\PPG_PeakDet_Simband\Darren_conversion'
+        elif self.is_external:
+            base_path = r'D:\Chon_Lab\Public_Database\PPG_PeakDet_Simband\Darren_conversion'
+        else:
+            # R:\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch
+            base_path = r'R:\ENGR_Chon\Darren\Public_Database\PPG_PeakDet_Simband\Darren_conversion'
+
+        # Type path
+        if self.is_tfs:
+            format_path = 'tfs_float16_pt'
+        else:
+            format_path = 'poincare_float16_pt'
+        
+        data_path = os.path.join(base_path, format_path)
+        labels_path = os.path.join(base_path, 'simband_segments_labels_STFT.csv')
+        
+        return data_path, labels_path
+    
+
+    def summary_path(self):
+        if self.is_linux:
+            summary_path = "/mnt/R/ENGR_Chon/Darren/NIH_Pulsewatch/labels_summary_2_18_Darren.csv"
+        elif self.is_hpc:
+            summary_path = "/gpfs/scratchfs1/hfp14002/dac20022/NIH_Pulsewatch/labels_summary_2_18_Darren.csv"
+        else:
+            if self.is_internal:
+                summary_path = r'C:\Chon_Lab\NIH_Pulsewatch\labels_summary_2_18_Darren.csv'
+            elif self.is_external:
+                summary_path = r'D:\Chon_Lab\NIH_Pulsewatch\labels_summary_2_18_Darren.csv'
+            else:
+                summary_path = r"\\grove.ad.uconn.edu\research\ENGR_Chon\Darren\NIH_Pulsewatch\labels_summary_2_18_Darren.csv"
+            
+        return summary_path
+     
+
+    def models_path(self):
+        if self.is_linux:
+            models_path = "/mnt/R/ENGR_Chon/Darren/Honors_Thesis/models"
+        elif self.is_hpc:
+            models_path = "/gpfs/scratchfs1/hfp14002/dac20022/Honors_Thesis/models"
+        else:
+            models_path = r"\\grove.ad.uconn.edu\research\ENGR_Chon\Darren\Honors_Thesis\models"
+        
+        return models_path
+
+
+    def losslists_path(self):
+        losslists_path = self.saves_path + 'losslists'
+        
+        return losslists_path
+
+
+    def runtime_lists_path(self):
+        runtime_lists_path = self.saves_path + 'runtime_lists'
+        
+        return runtime_lists_path
+
+
+    def labels_path(self):
+        labels_path = self.saves_path + 'labels'
+        
+        return labels_path
+
+
+    def predictions_path(self):
+        predictions_path = self.saves_path + 'predictions'
+        
+        return predictions_path
+
+
+    def prediction_proba_path(self):
+        prediction_proba_path = self.saves_path + 'prediction_proba'
+        
+        return prediction_proba_path
+
+
+    def metrics_path(self):
+        metrics_path = self.saves_path + 'metrics'
+    
+        return metrics_path
+    
+    
+    def classification_report_path(self):
+        classification_report_path = self.saves_path + 'classification_reports'
+    
+        return classification_report_path
+    
+    
+    def classification_report_imbalanced_path(self):
+        classification_report_imbalanced_path = self.saves_path + 'classification_reports_imbalanced'
+    
+        return classification_report_imbalanced_path
+
+
+    def confusion_matrices_path(self):
+        confusion_matrices_path = self.saves_path + 'confusion_matrices'
+    
+        return confusion_matrices_path
+
+
+    def checkpoints_path(self):
+        checkpoints_path = self.saves_path + 'checkpoints'
+    
+        return checkpoints_path
+
+
+    def hyperparameters_path(self):
+        hyperparameters_path = self.saves_path + 'hyperparameters'
+    
+        return hyperparameters_path
+
+
+    def loss_curves_path(self):
+        loss_curves_path = self.saves_path + 'loss_curves'
+    
+        return loss_curves_path
+    
+    
+    def roc_curves_path(self):
+        roc_curves_path = self.saves_path + 'roc_curves'
+    
+        return roc_curves_path
+    
+    
+    def mean_roc_curves_path(self):
+        mean_roc_curves_path = self.saves_path + 'mean_roc_curves'
+    
+        return mean_roc_curves_path
+    
+    
+    def accuracy_curves_path(self):
+        accuracy_curves_path = self.saves_path + 'accuracy_curves'
+    
+        return accuracy_curves_path
\ No newline at end of file
diff --git a/utils/plot_save_func.py b/utils/plot_save_func.py
new file mode 100644
index 0000000..abe201e
--- /dev/null
+++ b/utils/plot_save_func.py
@@ -0,0 +1,542 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Feb 29 12:06:14 2024
+
+@author: dchen
+"""
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import pandas as pd
+from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score, roc_curve, auc, classification_report
+from sklearn.preprocessing import label_binarize
+from imblearn.metrics import classification_report_imbalanced
+
+# For increased csv speed
+import pyarrow as pa
+from pyarrow import csv
+
+def save_hyperparameters(hyperparameters, pathmaster):
+    hyperparameters_path = pathmaster.hyperparameters_path()
+    hyperparameters_path = os.path.join(hyperparameters_path, 'hyperparameters_' + pathmaster.file_tag + '.csv')
+
+    # If there are class weights, make sure all other columns have same length
+    if hyperparameters['class_weights'] is not None:
+        # Update the dictionary
+        for key, value in hyperparameters.items():
+        # If the length of the value is less than max_length
+            if key != 'class_weights':
+                # Fill missing values with np.nan
+                hyperparameters[key] = [value] + [np.nan] * (len(hyperparameters['class_weights']) - 1)
+    
+    hyperparameters = pd.DataFrame(hyperparameters) 
+    hyperparameters.to_csv(hyperparameters_path, index=False)
+    
+    # # Using PyArrow (need each hyperparameter to be a list)
+    # hyperparameters_table = pa.Table.from_pydict(hyperparameters)
+    # csv.write_csv(hyperparameters_table, hyperparameters_path)
+
+
+def save_losslists(losslist_train, losslist_val, pathmaster): # For holdout training and validation
+    losslists_path = pathmaster.losslists_path()
+    losslists_path = os.path.join(losslists_path, 'losslists_' + pathmaster.file_tag + '.csv')
+    # losslists = pd.DataFrame(dtype='float32')
+    # losslists['training'] = losslist_train
+    # losslists['validation'] = losslist_val
+    # losslists.to_csv(losslists_path, index=False, chunksize=500)
+    
+    # Using PyArrow
+    # losslists = {
+    #     'training': losslist_train, 
+    #     'validation': losslist_val
+    #     }
+    # losslists_table = pa.Table.from_pydict(losslists)
+    losslists = [np.array(losslist_train).reshape(-1).astype(np.float32), np.array(losslist_val).reshape(-1).astype(np.float32)]
+    losslists_names = ['training', 'validation']
+    losslists_table = pa.Table.from_arrays(losslists, losslists_names)
+    csv.write_csv(losslists_table, losslists_path)
+    
+def save_losslists_2fold(losslist_train_fold1, losslist_val_fold1, losslist_train_fold2, losslist_val_fold2, losslist_train, losslist_val, pathmaster): # For holdout training and validation
+    losslists_path = pathmaster.losslists_path()
+    losslists_path = os.path.join(losslists_path, 'losslists_' + pathmaster.file_tag + '.csv')
+    # losslists = pd.DataFrame(dtype='float32')
+    # losslists['training'] = losslist_train
+    # losslists['validation'] = losslist_val
+    # losslists.to_csv(losslists_path, index=False, chunksize=500)
+    
+    # Using PyArrow
+    # losslists = {
+    #     'training': losslist_train, 
+    #     'validation': losslist_val
+    #     }
+    # losslists_table = pa.Table.from_pydict(losslists)
+    losslists = [np.array(losslist_train_fold1).reshape(-1).astype(np.float32), np.array(losslist_val_fold1).reshape(-1).astype(np.float32), 
+                 np.array(losslist_train_fold2).reshape(-1).astype(np.float32), np.array(losslist_val_fold2).reshape(-1).astype(np.float32), 
+                 np.array(losslist_train).reshape(-1).astype(np.float32), np.array(losslist_val).reshape(-1).astype(np.float32)]
+    losslists_names = ['fold1_training', 'fold1_validation', 'fold2_training', 'fold2_validation', 'mean_training', 'mean_validation']
+    losslists_table = pa.Table.from_arrays(losslists, losslists_names)
+    csv.write_csv(losslists_table, losslists_path)
+    
+    
+def save_runtime_list(epoch_time_list, pathmaster):
+    # epoch_time_array = np.array(epoch_time_list).reshape(-1).astype(np.float32)
+    runtime_lists_path = pathmaster.runtime_lists_path()
+    runtime_lists_path = os.path.join(runtime_lists_path, 'runtime_lists_' + pathmaster.file_tag + '.csv')
+    # runtime_list = pd.DataFrame(dtype='float32')
+    # runtime_list['time_sec'] = epoch_time_list
+    # runtime_list.to_csv(runtime_lists_path, index=False, chunksize=500)
+    
+    # Using PyArrow
+    runtime_dict = {'epoch_time_sec': epoch_time_list,
+                    'mean_time_sec': [sum(epoch_time_list)/len(epoch_time_list)] + [np.nan] * (len(epoch_time_list) - 1)}
+    runtime_table = pa.Table.from_pydict(runtime_dict)
+    # runtime_table = pa.Table.from_arrays([epoch_time_array, np.array([np.mean(epoch_time_array)])], names=['epoch_time_sec', 'mean_time_sec'])
+    csv.write_csv(runtime_table, runtime_lists_path)
+    
+
+def save_labels(labels, pathmaster):
+    labels = labels.astype(np.int8)
+    labels_path = pathmaster.labels_path()
+    labels_path = os.path.join(labels_path, 'labels_' + pathmaster.file_tag + '.csv')
+    # labels = pd.DataFrame(np.array(labels), dtype='int')
+    # labels.to_csv(labels_path, index=False, chunksize=500)
+    
+    # Using PyArrow
+    # labels_dict = {'labels': labels.reshape(-1)} # Convert to 1D array
+    # labels_table = pa.Table.from_pydict(labels_dict)
+    labels_table = pa.Table.from_arrays([labels.reshape(-1)], names=['labels'])
+    csv.write_csv(labels_table, labels_path)
+
+
+def save_predictions(predictions, pathmaster):
+    predictions = predictions.astype(np.int8)
+    predictions_path = pathmaster.predictions_path()
+    predictions_path = os.path.join(predictions_path, 'predictions_' + pathmaster.file_tag + '.csv')
+    # predictions = pd.DataFrame(np.array(predictions), dtype='int')
+    # predictions.to_csv(predictions_path, index=False, chunksize=500)
+    
+    # Using PyArrow
+    # predictions_dict = {'predictions': predictions.reshape(-1)} # Convert to 1D array
+    # predictions_table = pa.Table.from_pydict(predictions_dict)
+    predictions_table = pa.Table.from_arrays([predictions.reshape(-1)], names=['predictions'])
+    csv.write_csv(predictions_table, predictions_path)
+    
+    
+def save_prediction_proba(prediction_proba, pathmaster):
+    prediction_proba = prediction_proba.astype(np.float32)
+    prediction_proba_path = pathmaster.prediction_proba_path()
+    prediction_proba_path = os.path.join(prediction_proba_path, 'prediction_proba_' + pathmaster.file_tag + '.csv')
+    # prediction_proba = pd.DataFrame(np.array(prediction_proba), dtype='float32')
+    # prediction_proba.to_csv(prediction_proba_path, index=False, chunksize=500)
+    
+    # Using PyArrow
+    # # Create PyArrow arrays with specific data type (float64)
+    # prediction_proba_dict = {
+    #     '0': prediction_proba[:,0],
+    #     '1': prediction_proba[:,1],
+    #     '2': prediction_proba[:,2]
+    #     }
+
+    # Create a PyArrow table
+    # prediction_proba_Table = pa.Table.from_pydict(prediction_proba_dict)
+    # col_arrays = [prediction_proba[:,0], prediction_proba[:,1]]
+    # prediction_proba_Table = pa.Table.from_arrays(col_arrays, names=['0', '1'])
+    # csv.write_csv(prediction_proba_Table, prediction_proba_path)
+    col_arrays = [prediction_proba[:,0], prediction_proba[:,1], prediction_proba[:,2]]
+    prediction_proba_Table = pa.Table.from_arrays(col_arrays, names=['0', '1', '2'])
+    csv.write_csv(prediction_proba_Table, prediction_proba_path)
+    
+    
+def save_prediction_proba_binary(prediction_proba, pathmaster):
+    prediction_proba = prediction_proba.astype(np.float32)
+    prediction_proba_path = pathmaster.prediction_proba_path()
+    prediction_proba_path = os.path.join(prediction_proba_path, 'prediction_proba_' + pathmaster.file_tag + '.csv')
+    # prediction_proba = pd.DataFrame(np.array(prediction_proba), dtype='float32')
+    # prediction_proba.to_csv(prediction_proba_path, index=False, chunksize=500)
+    
+    # Using PyArrow
+    # # Create PyArrow arrays with specific data type (float64)
+    # prediction_proba_dict = {
+    #     '0': prediction_proba[:,0],
+    #     '1': prediction_proba[:,1],
+    #     '2': prediction_proba[:,2]
+    #     }
+
+    # Create a PyArrow table
+    # prediction_proba_Table = pa.Table.from_pydict(prediction_proba_dict)
+    # col_arrays = [prediction_proba[:,0], prediction_proba[:,1]]
+    # prediction_proba_Table = pa.Table.from_arrays(col_arrays, names=['0', '1'])
+    # csv.write_csv(prediction_proba_Table, prediction_proba_path)
+    col_arrays = [prediction_proba[:,0], prediction_proba[:,1]]
+    prediction_proba_Table = pa.Table.from_arrays(col_arrays, names=['0', '1'])
+    csv.write_csv(prediction_proba_Table, prediction_proba_path)
+        
+    
+def metrics(Y_true, Y_pred, Y_proba, save=False, pathmaster=None):
+    averages = ['micro', 'macro', 'weighted']
+    accuracy_list = []
+    precision_list = []
+    recall_list = []
+    f1_list = []
+    auc_list = []
+    
+    for average in averages:
+        accuracy = accuracy_score(Y_true, Y_pred)
+        precision, recall, f1, _ = precision_recall_fscore_support(Y_true, Y_pred, average=average)
+        auc = roc_auc_score(Y_true, Y_proba, average=average, multi_class='ovr')
+        
+        accuracy_list.append(accuracy)
+        precision_list.append(precision)
+        recall_list.append(recall)
+        f1_list.append(f1)
+        auc_list.append(auc)
+    
+    metrics = {
+        'accuracy': accuracy_list,
+        'precision': precision_list,
+        'recall': recall_list,
+        'f1': f1_list,
+        'auc': auc_list
+        }
+    
+    if save:
+        metrics_path = pathmaster.metrics_path()
+        metrics_path = os.path.join(metrics_path, 'metrics_' + pathmaster.file_tag + '.csv')
+        # metrics = pd.DataFrame(metrics, index=[0], dtype='float32') 
+        # metrics.to_csv(metrics_path, index=False)
+        
+        # Using PyArrow
+        metrics_table = pa.Table.from_pydict(metrics)
+        csv.write_csv(metrics_table, metrics_path)
+        
+        
+def metrics_binary(Y_true, Y_pred, Y_proba, save=False, pathmaster=None):
+    averages = ['micro', 'macro', 'weighted']
+    accuracy_list = []
+    precision_list = []
+    recall_list = []
+    f1_list = []
+    auc_list = []
+    
+    for average in averages:
+        accuracy = accuracy_score(Y_true, Y_pred)
+        precision, recall, f1, _ = precision_recall_fscore_support(Y_true, Y_pred, average=average)
+        auc = roc_auc_score(Y_true, Y_proba[:,1], average=average)
+        
+        accuracy_list.append(accuracy)
+        precision_list.append(precision)
+        recall_list.append(recall)
+        f1_list.append(f1)
+        auc_list.append(auc)
+    
+    metrics = {
+        'accuracy': accuracy_list,
+        'precision': precision_list,
+        'recall': recall_list,
+        'f1': f1_list,
+        'auc': auc_list
+        }
+    
+    if save:
+        metrics_path = pathmaster.metrics_path()
+        metrics_path = os.path.join(metrics_path, 'metrics_' + pathmaster.file_tag + '.csv')
+        # metrics = pd.DataFrame(metrics, index=[0], dtype='float32') 
+        # metrics.to_csv(metrics_path, index=False)
+        
+        # Using PyArrow
+        metrics_table = pa.Table.from_pydict(metrics)
+        csv.write_csv(metrics_table, metrics_path)
+        
+        
+def metrics_2fold(Y_true_fold1, Y_true_fold2, Y_pred_fold1, Y_pred_fold2, Y_proba_fold1, Y_proba_fold2, save=False, pathmaster=None):
+    accuracy_fold1 = accuracy_score(Y_true_fold1, Y_pred_fold1)
+    precision_fold1, recall_fold1, f1_fold1, _ = precision_recall_fscore_support(Y_true_fold1, Y_pred_fold1, average='weighted')
+    auc_fold1 = roc_auc_score(Y_true_fold1, Y_proba_fold1, average='weighted', multi_class='ovr')
+    
+    accuracy_fold2 = accuracy_score(Y_true_fold2, Y_pred_fold2)
+    precision_fold2, recall_fold2, f1_fold2, _ = precision_recall_fscore_support(Y_true_fold2, Y_pred_fold2, average='weighted')
+    auc_fold2 = roc_auc_score(Y_true_fold2, Y_proba_fold2, average='weighted', multi_class='ovr')
+    
+    accuracy = accuracy_score(np.concatenate((Y_true_fold1,Y_true_fold2), axis=0), np.concatenate((Y_pred_fold1,Y_pred_fold2), axis=0))
+    precision, recall, f1, _ = precision_recall_fscore_support(np.concatenate((Y_true_fold1,Y_true_fold2), axis=0), np.concatenate((Y_pred_fold1,Y_pred_fold2), axis=0), average='weighted')
+    auc = roc_auc_score(np.concatenate((Y_true_fold1,Y_true_fold2), axis=0), np.concatenate((Y_proba_fold1,Y_proba_fold2), axis=0), average='weighted', multi_class='ovr')
+    
+    metrics = {
+        'accuracy': [accuracy_fold1, accuracy_fold2, accuracy],
+        'precision': [precision_fold1, precision_fold2, precision],
+        'recall': [recall_fold1, recall_fold2, recall],
+        'f1': [f1_fold1, f1_fold2, f1],
+        'auc': [auc_fold1, auc_fold2, auc]
+        }
+    
+    if save:
+        metrics_path = pathmaster.metrics_path()
+        metrics_path = os.path.join(metrics_path, 'metrics_' + pathmaster.file_tag + '.csv')
+        # metrics = pd.DataFrame(metrics, index=[0], dtype='float32') 
+        # metrics.to_csv(metrics_path, index=False)
+        
+        # Using PyArrow
+        metrics_table = pa.Table.from_pydict(metrics)
+        csv.write_csv(metrics_table, metrics_path)
+
+
+def save_classification_report(Y_true, Y_pred, save=False, pathmaster=None):
+    report = classification_report(Y_true, Y_pred, output_dict=True)
+    row_labels = ['precision', 'recall', 'f1', 'support']
+    
+    if save:
+        classification_report_path = pathmaster.classification_report_path()
+        classification_report_path = os.path.join(classification_report_path, 'classification_report_' + pathmaster.file_tag + '.csv')
+        report = pd.DataFrame(report)
+        # report.reset_index(inplace=True)
+        report.insert(loc=0, column='metrics', value=row_labels)
+        report.to_csv(classification_report_path, index=False)
+        
+        # # Using PyArrow
+        # report_table = pa.Table.from_pydict(report)
+        # csv.write_csv(report_table, classification_report_path)
+        
+        
+def save_classification_report_imbalanced(Y_true, Y_pred, save=False, pathmaster=None):
+    report_imbalanced = classification_report_imbalanced(Y_true, Y_pred, output_dict=True)
+    row_labels = ['precision', 'recall', 'specificity', 'f1', 'geo mean', 'iba', 'support']
+    
+    if save:
+        classification_report_imbalanced_path = pathmaster.classification_report_imbalanced_path()
+        classification_report_imbalanced_path = os.path.join(classification_report_imbalanced_path, 'classification_report_imbalanced_' + pathmaster.file_tag + '.csv')
+        report_imbalanced = pd.DataFrame(report_imbalanced)
+        # report_imbalanced.reset_index(inplace=True)
+        report_imbalanced.insert(loc=0, column='metrics', value=row_labels)
+        report_imbalanced.to_csv(classification_report_imbalanced_path, index=False)
+        
+        # # Using PyArrow
+        # report_imbalanced_table = pa.Table.from_pydict(report_imbalanced)
+        # csv.write_csv(report_imbalanced_table, classification_report_imbalanced_path)      
+        
+
+def roc_curves(y_test, y_prob, save=False, pathmaster=None, class_names=['NSR', 'AF', 'PAC/PVC']):
+  # Get the unique class labels
+  classes = np.unique(y_test)
+  
+  if class_names is None:
+        class_names = np.unique(y_test)
+
+  # Convert labels to binary matrix
+  y_bin = label_binarize(y_test, classes=classes)
+
+  # Pre-allocate arrays for ROC curves
+  fpr_mean = np.linspace(0, 1, 100)
+  tpr_mean = []
+  fpr = []
+  tpr = []
+  AUC = []
+
+  # Calculate ROC curves for each class
+  for i, class_label in enumerate(classes):
+    fpr_i, tpr_i, _ = roc_curve(y_bin[:, i], y_prob[:, i])
+    AUC.append(roc_auc_score(y_bin[:, i], y_prob[:, i]))
+    fpr.append(fpr_i)
+    tpr.append(tpr_i)
+
+    # Interpolate TPR for mean ROC curve
+    tpr_mean.append(np.interp(fpr_mean, fpr_i, tpr_i))
+
+  # Calculate mean and standard deviation for TPR and AUC
+  tpr_mean = np.mean(np.array(tpr_mean).reshape(len(classes), -1), axis=0)
+  tpr_stdv = np.std(tpr_mean, axis=0)
+  mean_auc = auc(fpr_mean, tpr_mean)
+  std_auc = np.std(AUC)
+
+  # Create the plot
+  plt.figure(figsize=(12, 9))
+  plt.clf()
+  plt.plot([0, 1], [0, 1], 'k--')
+  plt.axis([0, 1, 0, 1])
+  plt.xlabel('False Positive Rate', fontsize=16)
+  plt.ylabel('True Positive Rate', fontsize=16)
+  plt.title('ROC Curves (' + pathmaster.file_tag + ')', fontweight='bold')
+
+  # Plot individual ROC curves
+  for i in range(len(classes)):
+    label_str = f"ROC Label {class_names[i]} (AUC = {AUC[i]:.3f})"
+    plt.plot(fpr[i], tpr[i], linewidth=3, label=label_str)
+
+  # Plot mean ROC curve with standard deviation
+  plt.plot(fpr_mean, tpr_mean, color='k', label=rf"Mean ROC (AUC = {mean_auc:.3f} $\pm$ {std_auc:.3f})", linewidth=5)
+  plt.fill_between(fpr_mean, np.maximum(tpr_mean - tpr_stdv, 0), np.minimum(tpr_mean + tpr_stdv, 1), color='grey', alpha=0.2, label=r"$\pm$ 1 std. dev.")
+
+  plt.legend(loc="lower right")
+  
+  if save:
+        roc_curves_path = pathmaster.roc_curves_path()
+        roc_curves_path = os.path.join(roc_curves_path, 'roc_curves_' + pathmaster.file_tag + '.jpg')
+        plt.savefig(roc_curves_path, dpi=150)
+    
+    
+def roc_curves_binary(y_test, y_prob, save=False, pathmaster=None, class_names=['Negative', 'Positive']):
+    y_prob = y_prob[:,1]
+    # Convert labels to binary matrix
+    y_bin = label_binarize(y_test, classes=np.unique(y_test))
+
+    # Pre-allocate arrays for ROC curves
+    fpr_mean = np.linspace(0, 1, 100)
+    tpr_mean = []
+    fpr = []
+    tpr = []
+    AUC = []
+
+    # Calculate ROC curve for the positive class
+    fpr, tpr, _ = roc_curve(y_bin, y_prob)
+    AUC = roc_auc_score(y_bin, y_prob)
+
+    # Create the plot
+    plt.figure(figsize=(12, 9))
+    plt.plot([0, 1], [0, 1], 'k--')
+    plt.plot(fpr, tpr, linewidth=3, label=f'ROC Curve (AUC = {AUC:.3f})')
+    plt.axis([0, 1, 0, 1])
+    plt.xlabel('False Positive Rate', fontsize=16)
+    plt.ylabel('True Positive Rate', fontsize=16)
+    plt.title('ROC Curve', fontweight='bold')
+    plt.legend(loc="lower right")
+  
+    if save:
+        roc_curves_path = pathmaster.roc_curves_path()
+        roc_curves_path = os.path.join(roc_curves_path, 'roc_curves_' + pathmaster.file_tag + '.jpg')
+        plt.savefig(roc_curves_path, dpi=150)
+    
+
+def mean_roc_curves(Y_tests, Y_probas, clf_names, save=False, pathmaster=None):
+    # Pre-allocate arrays for ROC curves
+    fpr_mean = np.linspace(0, 1, 100)
+    # tpr_mean = np.zeros_like(fpr_mean)
+
+    # Set figure size
+    plt.figure(figsize=(12,9))
+    
+    # Plot individual mean ROC curves for each classifier
+    for y_test, y_prob, clf_name in zip(Y_tests, Y_probas, clf_names):
+        # Get the unique class labels
+        classes = np.unique(y_test)
+        
+        # Convert labels to binary matrix
+        y_bin = label_binarize(y_test, classes=classes)
+
+        # Pre-allocate arrays for ROC curves
+        fpr = []
+        tpr = []
+        AUC = []
+
+        # Calculate ROC curves for each class
+        for i, class_label in enumerate(classes):
+            fpr_i, tpr_i, _ = roc_curve(y_bin[:, i], y_prob[:, i])
+            AUC.append(roc_auc_score(y_bin[:, i], y_prob[:, i]))
+            fpr.append(fpr_i)
+            tpr.append(tpr_i)
+
+        # Interpolate TPR for mean ROC curve
+        tpr_interp = [np.interp(fpr_mean, fpr_i, tpr_i) for fpr_i, tpr_i in zip(fpr, tpr)]
+        tpr_mean = np.mean(tpr_interp, axis=0)
+
+        # Plot mean ROC curve
+        plt.plot(fpr_mean, tpr_mean, label=f"{clf_name} - Mean ROC (AUC = {auc(fpr_mean, tpr_mean):.3f} $\pm$ {np.std(AUC):.3f})", linewidth=2)
+
+    # Additional plot configurations
+    plt.plot([0, 1], [0, 1], 'k--')
+    plt.axis([0, 1, 0, 1])
+    plt.xlabel('False Positive Rate', fontsize=12)
+    plt.ylabel('True Positive Rate', fontsize=12)
+    plt.title('Mean ROC Curve(s)', fontweight='bold')
+    plt.legend(loc="lower right")
+    # plt.show()
+    
+    if save:
+        mean_roc_curves_path = pathmaster.mean_roc_curves_path()
+        mean_roc_curves_path = os.path.join(mean_roc_curves_path, 'mean_roc_curves_' + pathmaster.file_tag + '.jpg')
+        plt.savefig(mean_roc_curves_path, dpi=150)
+    
+    
+def conf_matrix(conf_matrix, title='Confusion Matrix', save=False, pathmaster=None, class_names=['NSR', 'AF', 'PAC/PVC']): 
+    title = title + ' (' + pathmaster.file_tag + ')'
+    conf_matrix_norm = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis] # Normalize
+    
+    plt.figure(figsize=(10, 8))  # Adjust the figure size as per your preference
+    plt.imshow(conf_matrix_norm, interpolation='nearest', cmap=plt.cm.Blues, vmin=0.0, vmax=1.0)
+    plt.title(title, fontweight='bold')
+    plt.colorbar()
+    tick_marks = np.arange(len(conf_matrix))
+    
+    if class_names is not None:
+        tick_marks = np.arange(len(class_names))
+        plt.xticks(tick_marks, class_names)
+        plt.yticks(tick_marks, class_names)
+    else:
+        tick_marks = np.arange(len(conf_matrix))
+        plt.xticks(tick_marks, tick_marks)
+        plt.yticks(tick_marks, tick_marks)
+    
+    plt.xlabel('Predicted label')
+    plt.ylabel('True label')
+
+    # Add counts and percentages in each box
+    for i in range(conf_matrix.shape[0]):
+        for j in range(conf_matrix.shape[1]):
+            percentage = conf_matrix_norm[i, j] * 100
+            count = int(conf_matrix[i, j])
+            # text_color = 'black' if conf_matrix[i, j] < np.max(conf_matrix) / 1.5 else 'white'
+            text_color = 'black' if percentage < 80 else 'white'
+            plt.text(j, i, "{:.2f}%\n{}".format(percentage, count),
+                     horizontalalignment="center",
+                     verticalalignment="center",
+                     color=text_color)
+
+    if save:
+        confusion_matrices_path = pathmaster.confusion_matrices_path()
+        confusion_matrices_path = os.path.join(confusion_matrices_path, 'confusion_matrix_' + pathmaster.file_tag + '.jpg')
+        plt.savefig(confusion_matrices_path, dpi=200)
+
+    # plt.show()
+
+        
+def train_val_loss(losslist_train, losslist_val, title='Training and Validation Loss', save=False, pathmaster=None):
+    title = title + ' (' + pathmaster.file_tag + ')'
+    plt.figure(figsize=(12, 8))
+    plt.plot(range(len(losslist_train)), losslist_train, label='training')
+    plt.plot(range(len(losslist_val)), losslist_val, label='validation')
+    plt.legend()
+    plt.title(title, fontweight='bold')
+    plt.xlabel('Epochs')
+    plt.ylabel('Loss')
+    
+    if save:
+        loss_curves_path = pathmaster.loss_curves_path()
+        loss_curves_path = os.path.join(loss_curves_path, 'loss_curve_' + pathmaster.file_tag + '.jpg')
+        plt.savefig(loss_curves_path, dpi=150)
+        
+    # plt.show()
+    
+def accuracy_curves(Y_true_train, Y_true_val, Y_pred_train, Y_pred_val, title='Training and Validation Accuracy', save=False, pathmaster=None):
+    accuracy_list_train = []
+    accuracy_list_val = []
+    epochs_train = range(len(Y_pred_train))
+    epochs_val = range(len(Y_pred_val))
+    
+    for predictions in Y_pred_train:
+        accuracy = accuracy_score(Y_true_train, predictions)
+        accuracy_list_train.append(accuracy)
+    for predictions in Y_pred_val:
+        accuracy = accuracy_score(Y_true_val, predictions)
+        accuracy_list_val.append(accuracy)
+        
+    title = title + ' (' + pathmaster.file_tag + ')'
+    plt.figure(figsize=(12, 8))
+    plt.plot(epochs_train, accuracy_list_train, label='training')
+    plt.plot(epochs_val, accuracy_list_val, label='validation')
+    plt.legend()
+    plt.title(title, fontweight='bold')
+    plt.xlabel('Epochs')
+    plt.ylabel('Accuracy')
+    
+    if save:
+        accuracy_curves_path = pathmaster.accuracy_curves_path()
+        accuracy_curves_path = os.path.join(accuracy_curves_path, 'accuracy_curve_' + pathmaster.file_tag + '.jpg')
+        plt.savefig(accuracy_curves_path, dpi=150)
\ No newline at end of file
diff --git a/utils/smote.py b/utils/smote.py
new file mode 100644
index 0000000..890d2fb
--- /dev/null
+++ b/utils/smote.py
@@ -0,0 +1,158 @@
+import torch
+import torch.nn as nn
+import torchvision.transforms as transforms
+import os
+import csv
+from imblearn.over_sampling import SMOTE
+import numpy as np
+from tqdm import tqdm
+import pandas as pd
+from concurrent.futures import ProcessPoolExecutor
+
+import sys
+sys.path.append('R:\ENGR_Chon\Darren\Honors_Thesis')
+
+# Import my own functions and classes
+from utils.pathmaster import PathMaster
+from utils import dataloader
+
+def apply_cassey_smote(data, labels):
+    cassey_smote = SMOTE(random_state=42,sampling_strategy='not majority',k_neighbors=5)
+    data_resampled, labels_resampled = cassey_smote.fit_resample(data, labels)
+    return data_resampled, labels_resampled
+
+def save_image(i, image, group, save_dir):
+    # Generate a unique file name with zero-padding
+    file_name = f'{i+1:06d}' + '_' + group + '_tfs'
+    
+    # Convert the image to a PyTorch tensor
+    tensor_image = torch.tensor(image).to(dtype=torch.float16)
+    
+    # Save the tensor to a .pt file
+    torch.save(tensor_image, os.path.join(save_dir, file_name + '.pt'))
+    
+    return file_name
+
+def save_images_parallel(data_resampled, group, save_dir):
+    file_names = []
+    with ProcessPoolExecutor() as executor:
+        results = [executor.submit(save_image, i, image, group, save_dir) for i, image in enumerate(data_resampled)]
+        for future in results:
+            file_names.append(future.result())
+    return file_names
+
+def main():
+    # Initialize save location specifics
+    smote_type = 'Cassey_SMOTE'
+    split = '2foldCV_60_40'
+    groups = ['fold1', 'fold2', 'test']
+    
+    # Device and drives
+    is_linux = False
+    is_hpc = False
+    is_internal = True
+    is_external = False
+        
+    # Input
+    is_tfs = True
+        
+    # Intialize the focus
+    focus = 'misc'
+        
+    # Initialize the file tag
+    file_tag = 'temp'
+        
+    # Image resolution
+    img_res = '128x128_float16'
+        
+    # Data type: the type to convert the data into when it is loaded in
+    data_type = torch.float32
+        
+    # Create a PathMaster object
+    pathmaster = PathMaster(is_linux, is_hpc, is_tfs, is_internal, is_external, focus, file_tag, img_res)
+        
+    # Image dimensions
+    img_channels = 1
+    img_size = 128 
+    downsample = None
+    standardize = None
+        
+    # Split UIDs
+    # train_set, val_set, test_set = dataloader.split_uids(pathmaster)
+    cross_val_fold1, cross_val_fold2, test_set = dataloader.split_uids_2fold_60_40_smote(pathmaster)
+    # train_set, val_set, test_set = dataloader.split_uids_60_10_30(pathmaster)
+        
+    # Preprocess data
+    data_format = 'pt'
+    batch_size = 256
+        
+    # train_loader, val_loader, _ = dataloader.preprocess_data(data_format, clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled, 
+    #                                               batch_size, standardize, False, img_channels, img_size, downsample, data_type, pathmaster)
+    fold1_loader, fold2_loader, test_loader = dataloader.preprocess_data(data_format, cross_val_fold1, cross_val_fold2, test_set, batch_size,
+                                                        standardize, False, img_channels, img_size, downsample, data_type, pathmaster)
+    # train_loader, val_loader, test_loader = dataloader.preprocess_data(data_format, train_set, val_set, test_set, 
+                                                    #   batch_size, standardize, False, img_channels, img_size, downsample, data_type, pathmaster)
+    data_loaders = [fold1_loader, fold2_loader, test_loader]
+    print()
+    sys.stdout.flush()
+    for data_loader, group in tqdm(zip(data_loaders,groups), total=len(data_loaders), desc='SMOTE', unit='Data Loader', leave=False):    
+        sys.stderr.flush()
+        
+        # Define your original data and labels
+        data = np.empty((0,img_size*img_size))
+        labels = np.empty((0,1))
+        
+        sys.stdout.flush()
+        
+        for data_batch in tqdm(data_loader, total=len(data_loader), desc='Loading', unit='batch', leave=False):
+                sys.stderr.flush()
+                
+                # Extract input and labels
+                X = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[-1] * data_batch['data'].shape[-1]).numpy()
+                Y = data_batch['label'].numpy().reshape(-1,1)
+                
+                data = np.concatenate((data, X), axis=0)
+                labels = np.concatenate((labels, Y), axis=0)
+            
+        sys.stderr.flush()
+        print('\nData shape:', data.shape)
+        print('Labels shape:', labels.shape)
+        sys.stdout.flush()
+        
+        if group != 'test':
+            # SMOTE
+            data_resampled, labels_resampled = apply_cassey_smote(data, labels)
+            data_resampled = data_resampled.reshape(len(data_resampled), img_channels, img_size, img_size)
+            sys.stderr.flush()
+            print('\nResampled Data shape:', data_resampled.shape)
+            print('Resampled Labels shape:', labels_resampled.shape)
+            print()
+            sys.stdout.flush()
+        else:
+            data_resampled = data
+            data_resampled = data_resampled.reshape(len(data_resampled), img_channels, img_size, img_size)
+            labels_resampled = labels
+            sys.stderr.flush()
+            print('\nResampled Data shape:', data_resampled.shape)
+            print('Resampled Labels shape:', labels_resampled.shape)
+            print()
+            sys.stdout.flush()
+        
+        # Define a directory to save the images
+        # save_dir = os.path.join(r'\\grove.ad.uconn.edu\research\ENGR_Chon\Darren\NIH_Pulsewatch', smote_type, split, group)
+        save_dir = os.path.join(r'\\grove.ad.uconn.edu\research\ENGR_Chon\Darren\NIH_Pulsewatch', smote_type, split, group)
+        os.makedirs(save_dir, exist_ok=True)
+        
+        file_names = save_images_parallel(data_resampled, group, save_dir)
+            
+        # Ground truths
+        data_labels = pd.DataFrame({
+            'segment_name': file_names,
+            'label': labels_resampled
+        })
+        
+        csv_file_name = os.path.join(r'C:\Chon_Lab\NIH_Pulsewatch', smote_type, split, smote_type + '_' + group + '_names_labels.csv')
+        data_labels.to_csv(csv_file_name, index=False)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/utils/smote_accelerated.py b/utils/smote_accelerated.py
new file mode 100644
index 0000000..555f8cc
--- /dev/null
+++ b/utils/smote_accelerated.py
@@ -0,0 +1,178 @@
+import torch
+import torch.nn as nn
+import torchvision.transforms as transforms
+import os
+import csv
+from imblearn.over_sampling import SMOTE, BorderlineSMOTE, ADASYN
+import numpy as np
+from tqdm import tqdm
+import pandas as pd
+from concurrent.futures import ProcessPoolExecutor
+
+import sys
+sys.path.append('R:\ENGR_Chon\Darren\Honors_Thesis')
+
+# Import my own functions and classes
+from utils.pathmaster import PathMaster
+from utils import dataloader
+
+def apply_cassey_smote(data, labels):
+    cassey_smote = SMOTE(random_state=42,sampling_strategy='not majority',k_neighbors=5)
+    data_resampled, labels_resampled = cassey_smote.fit_resample(data, labels)
+    return data_resampled, labels_resampled
+
+def apply_borderline_smote(data, labels):
+    borderline_smote = BorderlineSMOTE(random_state=42,sampling_strategy='not majority',k_neighbors=5)
+    data_resampled, labels_resampled = borderline_smote.fit_resample(data, labels)
+    return data_resampled, labels_resampled
+
+def apply_adasyn(data, labels):
+    adasyn = ADASYN(random_state=42,sampling_strategy='not majority',n_neighbors=5)
+    data_resampled, labels_resampled = adasyn.fit_resample(data, labels)
+    return data_resampled, labels_resampled
+
+def save_image(i, image, group, save_dir):
+    
+    # Generate a unique file name with zero-padding
+    file_name = f'{i+1:06d}' + '_' + group + '_tfs'
+    
+    # Convert the image to a PyTorch tensor
+    tensor_image = torch.tensor(image).to(dtype=torch.float16)
+    tensor_image = tensor_image.reshape(tensor_image.size()[-2], tensor_image.size()[-2])
+
+    
+    # Save the tensor to a .pt file
+    torch.save(tensor_image, os.path.join(save_dir, file_name + '.pt'))
+    
+    return file_name
+
+def save_images_parallel(data_resampled, group, save_dir):
+    file_names = []
+    with ProcessPoolExecutor() as executor:
+        results = [executor.submit(save_image, i, image, group, save_dir) for i, image in enumerate(data_resampled)]
+        for future in results:
+            file_names.append(future.result())
+    return file_names
+
+def main():
+    # Initialize save location specifics
+    # smote_type = 'Cassey_SMOTE'
+    smote_type = 'Borderline_SMOTE'
+    # smote_type = 'ADASYN'
+    
+    # split = '2foldCV_60_40'
+    split = 'holdout_60_10_30'
+    
+    # groups = ['fold1', 'fold2', 'test']
+    groups = ['train', 'validate', 'test']
+    
+    # Device and drives
+    is_linux = False
+    is_hpc = False
+    is_internal = True
+    is_external = False
+        
+    # Input
+    is_tfs = True
+        
+    # Intialize the focus
+    focus = 'misc'
+        
+    # Initialize the file tag
+    file_tag = 'temp'
+        
+    # Image resolution
+    img_res = '128x128_float16'
+        
+    # Data type: the type to convert the data into when it is loaded in
+    data_type = torch.float32
+        
+    # Create a PathMaster object
+    pathmaster = PathMaster(is_linux, is_hpc, is_tfs, is_internal, is_external, focus, file_tag, img_res)
+        
+    # Image dimensions
+    img_channels = 1
+    img_size = 128 
+    downsample = None
+    standardize = None
+        
+    # Split UIDs
+    # cross_val_fold1, cross_val_fold2, test_set = dataloader.split_uids_2fold_60_40_smote(pathmaster)
+    train_set, val_set, test_set = dataloader.split_uids_60_10_30_smote(pathmaster)
+        
+    # Preprocess data
+    data_format = 'pt'
+    batch_size = 256
+        
+    # fold1_loader, fold2_loader, test_loader = dataloader.preprocess_data(data_format, cross_val_fold1, cross_val_fold2, test_set, batch_size,
+                                                        # standardize, False, img_channels, img_size, downsample, data_type, pathmaster)
+    # data_loaders = [fold1_loader, fold2_loader, test_loader]
+    
+    train_loader, val_loader, test_loader = dataloader.preprocess_data(data_format, train_set, val_set, test_set, 
+                                                      batch_size, standardize, False, img_channels, img_size, downsample, data_type, pathmaster)
+    data_loaders = [train_loader, val_loader, test_loader]
+    print()
+    sys.stdout.flush()
+    for data_loader, group in tqdm(zip(data_loaders,groups), total=len(data_loaders), desc='SMOTE', unit='Data Loader', leave=False):    
+        sys.stderr.flush()
+        
+        # Define your original data and labels
+        data = np.empty((0,img_size*img_size))
+        labels = np.empty((0,1))
+        
+        sys.stdout.flush()
+        
+        for data_batch in tqdm(data_loader, total=len(data_loader), desc='Loading', unit='batch', leave=False):
+                sys.stderr.flush()
+                
+                # Extract input and labels
+                X = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[-1] * data_batch['data'].shape[-1]).numpy()
+                Y = data_batch['label'].numpy().reshape(-1,1)
+                
+                data = np.concatenate((data, X), axis=0)
+                labels = np.concatenate((labels, Y), axis=0)
+            
+        sys.stderr.flush()
+        print('\nData shape:', data.shape)
+        print('Labels shape:', labels.shape)
+        sys.stdout.flush()
+        
+        if group != 'test':
+            # SMOTE
+            # data_resampled, labels_resampled = apply_cassey_smote(data, labels)
+            data_resampled, labels_resampled = apply_borderline_smote(data, labels)
+            # data_resampled, labels_resampled = apply_adasyn(data, labels)
+            data_resampled = data_resampled.reshape(len(data_resampled), img_channels, img_size, img_size)
+            sys.stderr.flush()
+            print('\nResampled Data shape:', data_resampled.shape)
+            print('Resampled Labels shape:', labels_resampled.shape)
+            print()
+            sys.stdout.flush()
+        else:
+            data_resampled = data
+            data_resampled = data_resampled.reshape(len(data_resampled), img_channels, img_size, img_size)
+            labels_resampled = labels
+            sys.stderr.flush()
+            print('\nResampled Data shape:', data_resampled.shape)
+            print('Resampled Labels shape:', labels_resampled.shape)
+            print()
+            sys.stdout.flush()
+        
+        # Define a directory to save the images
+        # save_dir = os.path.join(r'\\grove.ad.uconn.edu\research\ENGR_Chon\Darren\NIH_Pulsewatch', smote_type, split, group)
+        save_dir = os.path.join(r'C:\Chon_Lab\NIH_Pulsewatch\TFS_pt', smote_type, split, group)
+        os.makedirs(save_dir, exist_ok=True)
+        
+        file_names = save_images_parallel(data_resampled, group, save_dir)
+            
+        # Ground truths
+        data_labels = pd.DataFrame({
+            'segment_name': file_names,
+            'label': labels_resampled.reshape(-1)
+        })
+        
+        csv_file_name = os.path.join(r'C:\Chon_Lab\NIH_Pulsewatch\TFS_pt', smote_type, split, smote_type + '_' + group + '_names_labels.csv')
+        data_labels.to_csv(csv_file_name, index=False)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/utils/smote_accelerated_lab.py b/utils/smote_accelerated_lab.py
new file mode 100644
index 0000000..90201ba
--- /dev/null
+++ b/utils/smote_accelerated_lab.py
@@ -0,0 +1,177 @@
+import torch
+import torch.nn as nn
+import torchvision.transforms as transforms
+import os
+import csv
+from imblearn.over_sampling import SMOTE, BorderlineSMOTE, ADASYN
+import numpy as np
+from tqdm import tqdm
+import pandas as pd
+from concurrent.futures import ProcessPoolExecutor
+
+import sys
+sys.path.append('R:\ENGR_Chon\Darren\Honors_Thesis')
+
+# Import my own functions and classes
+from utils.pathmaster import PathMaster
+from utils import dataloader
+
+def apply_cassey_smote(data, labels):
+    cassey_smote = SMOTE(random_state=42,sampling_strategy='not majority',k_neighbors=5)
+    data_resampled, labels_resampled = cassey_smote.fit_resample(data, labels)
+    return data_resampled, labels_resampled
+
+def apply_borderline_smote(data, labels):
+    borderline_smote = BorderlineSMOTE(random_state=42,sampling_strategy='not majority',k_neighbors=5)
+    data_resampled, labels_resampled = borderline_smote.fit_resample(data, labels)
+    return data_resampled, labels_resampled
+
+def apply_adasyn(data, labels):
+    adasyn = ADASYN(random_state=42,sampling_strategy='not majority',n_neighbors=4)
+    data_resampled, labels_resampled = adasyn.fit_resample(data, labels)
+    return data_resampled, labels_resampled
+
+def save_image(i, image, group, save_dir):
+    
+    # Generate a unique file name with zero-padding
+    file_name = f'{i+1:06d}' + '_' + group + '_tfs'
+    
+    # Convert the image to a PyTorch tensor
+    tensor_image = torch.tensor(image).to(dtype=torch.float16)
+    tensor_image = tensor_image.reshape(tensor_image.size()[-2], tensor_image.size()[-2])
+    
+    # Save the tensor to a .pt file
+    torch.save(tensor_image, os.path.join(save_dir, file_name + '.pt'))
+    
+    return file_name
+
+def save_images_parallel(data_resampled, group, save_dir):
+    file_names = []
+    with ProcessPoolExecutor() as executor:
+        results = [executor.submit(save_image, i, image, group, save_dir) for i, image in enumerate(data_resampled)]
+        for future in results:
+            file_names.append(future.result())
+    return file_names
+
+def main():
+    # Initialize save location specifics
+    # smote_type = 'Cassey4k_SMOTE'
+    # smote_type = 'Borderline5k_SMOTE'
+    smote_type = 'ADASYN6k'
+    
+    # split = '2foldCV_60_40'
+    split = 'holdout_60_10_30'
+    
+    # groups = ['fold1', 'fold2', 'test']
+    groups = ['train', 'validate', 'test']
+    
+    # Device and drives
+    is_linux = False
+    is_hpc = False
+    is_internal = False
+    is_external = False
+        
+    # Input
+    is_tfs = True
+        
+    # Intialize the focus
+    focus = 'misc'
+        
+    # Initialize the file tag
+    file_tag = 'temp'
+        
+    # Image resolution
+    img_res = '128x128_float16'
+        
+    # Data type: the type to convert the data into when it is loaded in
+    data_type = torch.float32
+        
+    # Create a PathMaster object
+    pathmaster = PathMaster(is_linux, is_hpc, is_tfs, is_internal, is_external, focus, file_tag, img_res)
+        
+    # Image dimensions
+    img_channels = 1
+    img_size = 128 
+    downsample = None
+    standardize = None
+        
+    # Split UIDs
+    # cross_val_fold1, cross_val_fold2, test_set = dataloader.split_uids_2fold_60_40_smote(pathmaster)
+    train_set, val_set, test_set = dataloader.split_uids_60_10_30_smote(pathmaster)
+        
+    # Preprocess data
+    data_format = 'pt'
+    batch_size = 256
+        
+    # fold1_loader, fold2_loader, test_loader = dataloader.preprocess_data(data_format, cross_val_fold1, cross_val_fold2, test_set, batch_size,
+    #                                                     standardize, False, img_channels, img_size, downsample, data_type, pathmaster)
+    # data_loaders = [fold1_loader, fold2_loader, test_loader]
+    
+    train_loader, val_loader, test_loader = dataloader.preprocess_data(data_format, train_set, val_set, test_set, 
+                                                      batch_size, standardize, False, img_channels, img_size, downsample, data_type, pathmaster)
+    data_loaders = [train_loader, val_loader, test_loader]
+    print()
+    sys.stdout.flush()
+    for data_loader, group in tqdm(zip(data_loaders,groups), total=len(data_loaders), desc='SMOTE', unit='Data Loader', leave=False):    
+        sys.stderr.flush()
+        
+        # Define your original data and labels
+        data = np.empty((0,img_size*img_size))
+        labels = np.empty((0,1))
+        
+        sys.stdout.flush()
+        
+        for data_batch in tqdm(data_loader, total=len(data_loader), desc='Loading', unit='batch', leave=False):
+                sys.stderr.flush()
+                
+                # Extract input and labels
+                X = data_batch['data'].reshape(data_batch['data'].shape[0], data_batch['data'].shape[-1] * data_batch['data'].shape[-1]).numpy()
+                Y = data_batch['label'].numpy().reshape(-1,1)
+                
+                data = np.concatenate((data, X), axis=0)
+                labels = np.concatenate((labels, Y), axis=0)
+            
+        sys.stderr.flush()
+        print('\nData shape:', data.shape)
+        print('Labels shape:', labels.shape)
+        sys.stdout.flush()
+        
+        if group != 'test':
+            # SMOTE
+            # data_resampled, labels_resampled = apply_cassey_smote(data, labels)
+            # data_resampled, labels_resampled = apply_borderline_smote(data, labels)
+            data_resampled, labels_resampled = apply_adasyn(data, labels)
+            data_resampled = data_resampled.reshape(len(data_resampled), img_size, img_size)
+            sys.stderr.flush()
+            print('\nResampled Data shape:', data_resampled.shape)
+            print('Resampled Labels shape:', labels_resampled.shape)
+            print()
+            sys.stdout.flush()
+        else:
+            data_resampled = data
+            data_resampled = data_resampled.reshape(len(data_resampled), img_size, img_size)
+            labels_resampled = labels
+            sys.stderr.flush()
+            print('\nData shape:', data_resampled.shape)
+            print('Labels shape:', labels_resampled.shape)
+            print()
+            sys.stdout.flush()
+        
+        # Define a directory to save the images
+        # save_dir = os.path.join(r'\\grove.ad.uconn.edu\research\ENGR_Chon\Darren\NIH_Pulsewatch', smote_type, split, group)
+        save_dir = os.path.join(r'\\grove.ad.uconn.edu\research\ENGR_Chon\Darren\NIH_Pulsewatch\TFS_pt', smote_type, split, group)
+        os.makedirs(save_dir, exist_ok=True)
+        
+        file_names = save_images_parallel(data_resampled, group, save_dir)
+            
+        # Ground truths
+        data_labels = pd.DataFrame({
+            'segment_name': file_names,
+            'label': labels_resampled.reshape(-1)
+        })
+        
+        csv_file_name = os.path.join(r'\\grove.ad.uconn.edu\research\ENGR_Chon\Darren\NIH_Pulsewatch\TFS_pt', smote_type, split, smote_type + '_' + group + '_names_labels.csv')
+        data_labels.to_csv(csv_file_name, index=False)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/utils/smote_transfer_location.py b/utils/smote_transfer_location.py
new file mode 100644
index 0000000..93ffd22
--- /dev/null
+++ b/utils/smote_transfer_location.py
@@ -0,0 +1,93 @@
+import os
+import pandas as pd
+import numpy as np
+from PIL import Image
+import torch
+from concurrent.futures import ProcessPoolExecutor
+from pyarrow import csv
+import cv2
+from tqdm import tqdm
+import sys
+
+
+def preprocess_and_save_data(data_path, output_path):
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+    group_directories = [entry for entry in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, entry))]
+    for group in tqdm(group_directories, total=len(group_directories), desc='Data Transfer', unit='Group', leave=False):
+        sys.stderr.flush()        
+        group_path = os.path.join(data_path, group)
+        group_output_path = os.path.join(output_path, group)
+        if not os.path.exists(group_output_path):
+            os.makedirs(group_output_path)
+        # else: # Only use for resuming converting
+        #     print('Skipping', group)
+        #     continue
+        files_to_process = [file for file in os.listdir(group_path) if file.endswith(('.csv', '.png', '.pt'))]
+        with ProcessPoolExecutor() as executor:
+            executor.map(preprocess_file, [group_path]*len(files_to_process), files_to_process, [group_output_path]*len(files_to_process))
+        print()
+        print(group, 'data transfer done!')
+        sys.stdout.flush()
+
+def preprocess_file(group_path, file, group_output_path):
+    is_tfs = True
+    if is_tfs:    
+        dtype = torch.float16
+        input_size = 128
+    else:
+        dtype = torch.uint8
+        input_size = 500
+    
+    downsample = None
+    
+    file_path = os.path.join(group_path, file)
+    if file.endswith('.csv'):
+        # data = pd.read_csv(file_path, header=None).to_numpy()
+        
+        # Use PyArrow
+        read_options = csv.ReadOptions(autogenerate_column_names=True)
+        data = csv.read_csv(file_path, read_options=read_options).to_pandas().to_numpy()
+        
+        if data.shape != (input_size, input_size):
+            print(f"Warning: File {file_path} has shape {data.shape} instead of", input_size + 'x', input_size + '.')
+    elif file.endswith('.png'):
+        data = np.array(Image.open(file_path))
+        if data.shape != (input_size, input_size):
+            print(f"Warning: Image {file_path} has shape {data.shape} instead of", input_size + 'x', input_size + '.')
+    elif file.endswith('.pt'):
+        data = torch.load(file_path)
+        if data.shape != (input_size, input_size):
+            print(f"Warning: Image {file_path} has shape {data.shape} instead of", input_size + 'x', input_size + '.')
+    else:
+        print('Incorrect data type')
+        return
+    
+    if downsample is not None:
+        # Downsample the image
+        # Use OpenCV to resize the array to downsample x downsample using INTER_AREA interpolation
+        data_array = cv2.resize(np.array(data), (downsample, downsample), interpolation=cv2.INTER_AREA)
+        data_tensor = torch.tensor(data_array, dtype=dtype).view(downsample, downsample)
+    elif file.endswith('.pt'):
+        data_tensor = data.to(dtype).view(input_size, input_size)
+    else:
+        data_tensor = torch.tensor(data, dtype=dtype).view(input_size, input_size)
+    
+    # base_name, extension = os.path.splitext(file)
+    output_file_path = os.path.join(group_output_path, file)
+    torch.save(data_tensor, output_file_path)
+
+def main():
+    smote_type = 'ADASYN6k'
+    split = 'holdout_60_10_30'
+    input_path = os.path.join(r'\\grove.ad.uconn.edu\research\ENGR_Chon\Darren\NIH_PulseWatch\TFS_pt', smote_type, split)
+    # input_path = os.path.join(r'\\grove.ad.uconn.edu\research\ENGR_Chon\Darren\NIH_PulseWatch\Poincare_pt', smote_type, split)
+    
+    output_path = os.path.join(r'C:\Chon_Lab\NIH_Pulsewatch\TFS_pt', smote_type, split)
+    # output_path = os.path.join(r'C:\Chon_Lab\NIH_Pulsewatch\Poincare_pt', smote_type, split)
+    
+    preprocess_and_save_data(input_path, output_path)
+    print('Data transfer complete!')
+    
+if __name__ == '__main__':
+    main()