From 9388a463adbefccb970dc0178d9130bc7815ceb4 Mon Sep 17 00:00:00 2001
From: Luis Roberto Mercado Diaz <luis.mercado_diaz@uconn.edu>
Date: Thu, 18 Jan 2024 14:27:48 -0500
Subject: [PATCH] Model Gaussian Variational Process

Adding the final version of the model with Semisupervised and supervised methods.

Co-Authored-By: Dong Han <dong.han@uconn.edu>
---
 .../ss_active_learning.cpython-311.pyc        | Bin 0 -> 6942 bytes
 .../active_learning/ss_active_learning.py     | 120 ++++++
 .../__pycache__/ss_gp_model.cpython-311.pyc   | Bin 0 -> 12179 bytes
 BML_project/models/ss_gp_model.py             | 198 ++++++++++
 BML_project/ss_main.py                        |  84 ++++
 BML_project/ss_main_ss.py                     | 103 +++++
 .../__pycache__/data_loader.cpython-311.pyc   | Bin 0 -> 17950 bytes
 .../__pycache__/ss_evaluation.cpython-311.pyc | Bin 0 -> 9772 bytes
 .../__pycache__/visualization.cpython-311.pyc | Bin 0 -> 5734 bytes
 BML_project/utils_gp/data_loader.py           | 297 ++++++++++++++
 BML_project/utils_gp/ss_evaluation.py         | 167 ++++++++
 BML_project/utils_gp/visualization.py         |  81 ++++
 pytorch_file_generation_loader_update.py      | 374 ++++++++++++++++++
 semisupervised_method.py                      |  34 +-
 14 files changed, 1431 insertions(+), 27 deletions(-)
 create mode 100644 BML_project/active_learning/__pycache__/ss_active_learning.cpython-311.pyc
 create mode 100644 BML_project/active_learning/ss_active_learning.py
 create mode 100644 BML_project/models/__pycache__/ss_gp_model.cpython-311.pyc
 create mode 100644 BML_project/models/ss_gp_model.py
 create mode 100644 BML_project/ss_main.py
 create mode 100644 BML_project/ss_main_ss.py
 create mode 100644 BML_project/utils_gp/__pycache__/data_loader.cpython-311.pyc
 create mode 100644 BML_project/utils_gp/__pycache__/ss_evaluation.cpython-311.pyc
 create mode 100644 BML_project/utils_gp/__pycache__/visualization.cpython-311.pyc
 create mode 100644 BML_project/utils_gp/data_loader.py
 create mode 100644 BML_project/utils_gp/ss_evaluation.py
 create mode 100644 BML_project/utils_gp/visualization.py
 create mode 100644 pytorch_file_generation_loader_update.py
diff --git a/BML_project/active_learning/__pycache__/ss_active_learning.cpython-311.pyc b/BML_project/active_learning/__pycache__/ss_active_learning.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..45c9875deaf1bad595378c59b1e08a1296ac1ec3
GIT binary patch
literal 6942
zcmcIoTWs6b873u4qAs><$2ZAREIUdX`5HU1ouu(4zQm~=$5~Pjh+$7tMB0`kO9n|N
zO-BLVmaXa#U>VRNZV_O5=rXqrP_HOZKkT7<DB3*?AUPHi2owk~6hp8V1?_;KFWY~p
z8!0<zHnd0Le-6*}KZoah|M#CiR#ez92#3$#r^$K@`v-a{1R|gK`p=NKhf&x(M&Xnp
ziq9L=*f?)gV`83w*cdgfndi+oszXFAYveqMV+M>SR;`5&A+3&v-c$BMt~P!EFYgyK
z7z1UB;gtCS4!Q8;!n~ET+{We|l#{aF#^)=jD#{LVCFOuzXHm0isuFUmD8m&jQ1j|Z
z+ZaQKIGXasW4;-9#_6!H=Op}3_a2A8ulHE*ahq)<w85>!nbW=~v)0>t>{y@eF&gw@
z09S0|Aue<w9-?Rl-kdX$SY$NBg;%C$=unJRi0}p#QjFpCjj$oFD2h<p2+!9?LA-|F
z!;<(CPT{vn9vjE>vrQOyj5jPHeGM1G#pyyj^dSzQPC7um37srsJhoVsz+X4;xC`=f
zfx=4Z-x=;yLr1t`iiWPy(RZ<cQ89EKQ><)gZ9Pf{Bb4I25sF5r5EqHZf)IsRq^Hlf
z`R%XAPA|-{G{Y_wR$*Z_MvpU*8?>*}Hy4ALVI$lvUwp|o7LUc~Fc%4La0}z{@WvV)
z<Jg7C2)D9vZDBMxb0K(%iLXM9g;7Ya=hGo*af1#<=@1i(#FiIWHdvO`wSMbc+nH#D
z<-+l`^`Tc=)M*5B+Ou6}qVaGj$__!IP^AjAXAgmRx`_!DbGiRcBpKg|$dyO88|2EK
zV)oZ)!5Fj`VOgp-Vqu!$LXlXZd!_<P6tg-cn_HHj`j|?TS)tQ|L0`oQ4~sr844f$p
zjD{L}8KhV-Cs)jEp3B%hTT6o7FA#nPsrnf0jIbF*Ay^=xZj~J<x(8o4RVdNL`b~pl
zJpO$3ma&AHL;dH&YH^!#Xykn04f?~z6Az%?`^CBm3s*eO7`AFEw9{+Vhk;&}acoga
zkUU9g_Gn5w<3eMstd2&XVik-k(~L6eHA|7Dx}_9XQ%dV+marON?cl+9tG-14may@*
z5^N>xy7?DDjJKy8rIAW0{kqsOecu$u8-Haeo!W$hH>c3%+Y@+43hlo=LEnE0?ZrJo
zUq`P2j@|jY6`&pc^y`UZG5aB`2V;qwhF6SNv70!KU4`!oU)Ef%Hy{mhQ6UhDFlehh
z{ttvel|s;fF^ZW9#i;n2V$Pjjg+OOkF>!GwyrNiQ@!&EOq7>prgubZ|Y~)p1F>rCk
zOwl(YVOk+?gqTPOj;LY@G0SY6;S|eKG{kXqOfl21KtwTUQz@p{#@hO=K!rjsuLsxS
z6dhG6AQg>Vr=yXTc$`vfpiMBULbzfro_3VHhLD_Q6-O+H@FpHZFs3w?BSSC}qd+HC
zaptB%6&sq=9+U$NO>Kl}N(Iy)=mz>K_I1dO0OZBNP8eZ2ik*wE7y4q+_x5Q+4r!WM
zE*@S9v4E|?q8{9>97X|XGAE!N!XGvXwgRAsaJ)A2i<zI#-k#l@%@EaTqFy5EMWRtA
z8U>;;)7&Buo-gY?Z>;@ceA_MhPssigI}x#dM6Ms%oc+pLdB^*D^PT1_Mw%w^oXphu
zvzWz-UXe^rWX;&2qub_pI@7Huq}CHVbG!W?EvEY>rM}6}aj|bo?wk62RBWA<TW8a)
zm!;OrUtGB=w$96~^Ma!}<9=bQbDP{Ig`RQo=!AT9LUf;#-RF|inHsNLb0j(TrMvll
z=k|dQn%--A*Za`BT`9T;WcR?XZP&GHlig!b)8S7$+9gN(_Vfp{@6C!GgL21Ux?@c0
z7!x}t<c<l^aZYxe6V!j!sp@VlO(-|5C+ZZx(M(i3>AmTvPoM71%;(f%4u5JYbu~35
z+q(s#n?+ybFTSz*38V2-BQasId}_l%KBgc)j0|}ppgpU?eLsA@dl;uN_&hKK-uTe)
z0LtGlLUw{coMG{o2@`Lkj3v-WX<8OK$FQ<GMF20=z<{ag!GJ0Q`&4mkZL4w<k<!kp
zSWyo;siL7MR)B=QPQuI+&oB>V3=lx;HctTPm@cYt14i5i2RFLGa==U!M-Fd-EC+;7
zv8g9Bh;W4ghKn%`AOaQ!Y0Vr&q8SMv66l*SM?k=t4KZ8<;9@Dl1uTjkL0cX=m;qF)
z1&LFrYSkK0!Q3F%*AN$Ex=|S#i$#_!f3$O_x|Z391@|-<glfl8{d*ubv9B7A?6iH}
zBU}v&4V2hG$qkf1)WYGb@n$iDsa+LGQ^r|)_s4e<X=jV%Y)Ku~oXa1HtpjrFz;2D`
zJR>{Lq@BZ(bNCap=$w?DlWFI)<eV0rGqQ6g?Yts6uZYe$**TXaGGt?#^h=~a^|DB|
z$z+?L{vX+!1fof0En)3<M60Fl;4JJeL`o6GNSL%cd9}DVIZey*9mJQJ@`MGEhr9UV
zz#jUr|BlpCjD7dxchU!qelV|m0Qv(nDES@<D^L|ebM~IEl?_pia+<M<)=v$oXAqu;
zCjIF9Q=A9TN6BYOY3NzLdx-RP5_TY$MNh0$3sA%QeT|{lTqQO|vZNc)_QQ+_G{v;P
zkICD4$5I)Y#9_DHImMf}(x=5%i$shs!yna0GhfWX2|`vCQy8@J@8XJ)imU}H8PpTw
zMxqG`J<=!yLJGwUjAkwLiUxNqd9DCO>+3*m6hmxXv4`WaFko2>9^f~Q9L(f64)X%)
z{Y50&kwCblkfCT)r67t8rE-o8(}l{5*u)@atW*L+($bTfeBYY9XAzX68iImIQ;4C1
z`PTYQnF}z|6g=!1I84A)n!H;~a&_~3fsHhY{yy2?w{uIZKP%Ut-JE???aN|j)6f$n
z{%@cIK&!65d-IKh>FQRgx;51=R=3I3ZAo*+)w0!?^1pTP{z2K*k#-%IT*r5D!F618
zot9mv)2<=OH6*%5WY<XAbxv}f6J6(J*ZE{+#v4d`dn9kqj{oDMA0GWv+edBb-chM{
zRO}s>d&foZgzTLVNOy*;Ns}&#bZs5^ecRh@Z*|=7NV_{EcZcZilHFY**)5aZ0@<A*
ztA6YJjdSau5O`5&JR_2WGC3%agOGS+Kk(+|H-3_u66?C<x^6*b7b*t-^X<1#`0H?3
z3mYh{g0UIk?+5W7En4ppAX^WNqz>RsJgN64z$|FAT`fX1rvZiXK6z7FA6Bk-B*FJA
z_NNCo)%Ucj=3^20ZaVLCUmvlo&b|Qpo#ta-IOPxt$cM7^vKPHiyS_yK2Eb|9-?Idd
zQ?nX{QOcqIdA!E55i5AdYSE8Op}UGbfigZUGmPJtE_}sx)x@W&_Y4XdoWGP?rgbG~
zjkvcOzAtSOPA*XB6Lwr(A<b7(hU@Ac=AFDbmwFY>gI*^}>)BWFmXu~U6nX-aTcvNM
znxf$U?2}u-CnvOb2|0SKGTxGj8isKL4NN0MiZKcvv=O*yxvBC%DdR;#Z?tlpcMKIE
z>Y@^5<}6B$f&h!fa<|rL#TbckOe@NsK-s37D>OqhhanxP&RHvTS;kBvq1vrBltRW!
zsna^T+zLapEAc4BbfU5fF3tfTSBW;wmW)}Ro9BFO2ALUV8r64D5jM2U&@{S|RP36b
zwKslwhn_{-P3wcJ=^SPEF!WL+20p!}i;@>U4-L`7R>CSjrx*TjGgjBQWfyDxa;<-J
z241Uc@4mE^5UV=ns?N=qAfv&n`ssTtqsC9KG!17l5UQ76VI6*gfk;kd+|4QT{mz`d
z*`C|!f4G?L9F{tVKf%S$QMq&UlTpz<DZ3}r?kUMV^|yY}eOY#22H)D-wzar@a{J`Y
zwAel(w~vV4v$FSW@?xf?Gu_fBwe;;=|2Y0(T<o8e`zO==GgAMI*gq@x&x$RV<d#c<
z;~=;w-gep3o%Zxgp8nll!P75#hGfrB+A}J7Mn%uK>={pcrX<gl=(!+!E(n*dNuF!z
zrd6qF^^5B<u_-P$#giAHpQeE9?n%4*BzIr#79i~&l-z@&dsuc4C#PYFS`NKKZg=cf
zOKqoje<sXalZLN}fw1VKWFIBaOR{fSaCnOs213W6=ope6LxN)n64@qLtgNxHhU!8-
zL96)<66)(Sg?b;bPc7fRcLBo_t&?5G&$@`oUdv~FILH9b)WVK|s4o}Ipr~501cOvO
z91P|T7CJn+lZnowQl(y(b!~8wDBGo8v>}{?i>T}BEqYhp`^~wvEvPMWO%z82uGVja
zB0vG7w0d2w-WxH<<5YPWgEp^v6|RyCb<2NCo`L(bjVL|D1fdjw4P*ZWB5TBPJWCKb
zk;Ra(VfX<7tI7Q{SjDFL%V3U8^_Rh35cK~H)-LG(kF0gcx;vhf=^b0zHz4^2MBgdd
zcS@)qzq@pIX?H`kPRQ1YO>>qo;D^EQEeNs-KbFPxVg$G1o-C#pT@F;SC_ETmnNu@M
M97HA05b9$57ai8PoB#j-

literal 0
HcmV?d00001

diff --git a/BML_project/active_learning/ss_active_learning.py b/BML_project/active_learning/ss_active_learning.py
new file mode 100644
index 0000000..546c4ad
--- /dev/null
+++ b/BML_project/active_learning/ss_active_learning.py
@@ -0,0 +1,120 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Dec 18 18:23:23 2023
+
+@author: lrm22005
+"""
+import numpy as np
+import random
+import torch
+from torch.utils.data import DataLoader
+from sklearn.cluster import MiniBatchKMeans
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+def label_samples(uncertain_samples, validation_data):
+    labels = [validation_data[sample_id]['label'] for sample_id in uncertain_samples]
+    return uncertain_samples, labels
+
+def stochastic_uncertainty_sampling(gp_model, gp_likelihood, val_loader, n_samples, n_batches, n_components=2):
+    gp_model.eval()
+    gp_likelihood.eval()
+    uncertain_sample_indices = []
+    sampled_batches = random.sample(list(val_loader), n_batches)  # Randomly sample n_batches from val_loader
+    
+    with torch.no_grad():
+        for batch in sampled_batches:
+            # reduced_data = apply_tsne(batch['data'].reshape(batch['data'].size(0), -1), n_components=n_components)
+            # reduced_data_tensor = torch.Tensor(reduced_data).to(device)
+            reduced_data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device)
+            predictions = gp_likelihood(gp_model(reduced_data_tensor))
+            var = predictions.variance
+            top_indices = torch.argsort(-var.flatten())[:n_samples]
+            uncertain_sample_indices.extend(top_indices.cpu().numpy())
+    
+    return uncertain_sample_indices[:n_samples]
+
+# def uncertainty_sampling(gp_model, gp_likelihood, val_loader, n_samples, n_components=2):
+#     gp_model.eval()
+#     gp_likelihood.eval()
+#     uncertain_sample_indices = []
+#     with torch.no_grad():
+#         for batch_idx, batch in tqdm(enumerate(val_loader), desc='Uncertainty Sampling', unit='batch'):
+#             reduced_data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device)
+#             predictions = gp_likelihood(gp_model(reduced_data_tensor))
+#             var = predictions.variance
+#             top_indices = torch.argsort(-var.flatten())[:n_samples]
+#             batch_uncertain_indices = [batch_idx * val_loader.batch_size + idx for idx in top_indices]
+#             uncertain_sample_indices.extend(batch_uncertain_indices)
+#     return uncertain_sample_indices[:n_samples]
+
+def run_minibatch_kmeans(data_loader, n_clusters, device, batch_size=100):
+    # Initialize MiniBatchKMeans
+    minibatch_kmeans = MiniBatchKMeans(n_clusters=n_clusters, random_state=0, batch_size=batch_size)
+
+    # Iterate through data_loader and fit MiniBatchKMeans
+    for batch in data_loader:
+        data = batch['data'].view(batch['data'].size(0), -1).to(device).cpu().numpy()
+        minibatch_kmeans.partial_fit(data)
+
+    return minibatch_kmeans
+
+# def compare_kmeans_gp_predictions(kmeans_model, gp_model, data_loader, device):
+#     # Compare K-Means with GP model predictions
+#     all_data, all_labels = [], []
+#     for batch in data_loader:
+#         data = batch['data'].view(batch['data'].size(0), -1).to(device)
+#         labels = batch['label'].to(device)
+#         gp_predictions = gp_model(data).mean.argmax(dim=0).cpu().numpy()
+#         kmeans_predictions = kmeans_model.predict(data.cpu().numpy())
+#         all_labels.append(labels.cpu().numpy())
+#         all_data.append((gp_predictions, kmeans_predictions))
+#     return all_data, np.concatenate(all_labels)
+
+def stochastic_compare_kmeans_gp_predictions(kmeans_model, gp_model, data_loader, n_batches, device):
+    all_data, all_labels = [], []
+    sampled_batches = random.sample(list(data_loader), n_batches)  # Randomly sample n_batches from data_loader
+    
+    for batch in sampled_batches:
+        data = batch['data'].view(batch['data'].size(0), -1).to(device)
+        labels = batch['label'].to(device)
+        gp_predictions = gp_model(data).mean.argmax(dim=0).cpu().numpy()
+        kmeans_predictions = kmeans_model.predict(data.cpu().numpy())
+        all_labels.append(labels.cpu().numpy())
+        all_data.append((gp_predictions, kmeans_predictions))
+    
+    return all_data, np.concatenate(all_labels)
+
+import random
+
+def refined_uncertainty_sampling(gp_model, gp_likelihood, kmeans_model, data_loader, n_samples, n_batches, uncertainty_threshold=0.2):
+    gp_model.eval()
+    gp_likelihood.eval()
+    uncertain_sample_indices = []
+
+    # Calculate the total number of batches in the DataLoader
+    total_batches = len(data_loader)
+    
+    # Ensure that n_batches does not exceed total_batches
+    n_batches = min(n_batches, total_batches)
+
+    # Randomly sample n_batches from data_loader
+    sampled_batches = random.sample(list(data_loader), n_batches)
+    
+    with torch.no_grad():
+        for batch in sampled_batches:
+            data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device)
+            gp_predictions = gp_likelihood(gp_model(data_tensor))
+            kmeans_predictions = kmeans_model.predict(data_tensor.cpu().numpy())
+
+            # Calculate the difference between K-means and GP predictions
+            disagreement = (gp_predictions.mean.argmax(dim=-1).cpu().numpy() != kmeans_predictions).astype(int)
+
+            # Calculate uncertainty based on variance of GP predictions
+            uncertainty = gp_predictions.variance.cpu().numpy()
+
+            # Select samples where the disagreement is high and the model is uncertain
+            uncertain_indices = np.where((disagreement > 0) & (uncertainty > uncertainty_threshold))[0]
+            uncertain_sample_indices.extend(uncertain_indices)
+    
+    return uncertain_sample_indices[:n_samples]
diff --git a/BML_project/models/__pycache__/ss_gp_model.cpython-311.pyc b/BML_project/models/__pycache__/ss_gp_model.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7ea0e5aee16a5d2ee1c342415375672e4d81589b
GIT binary patch
literal 12179
zcmb_idu$uWnO~C2_evrqil#`)T0JaTvSe9u<X05ivR<~Hem7AhGZc3vQKm>`x$>*z
zMa`v%k`oXWa8RXn0p6iaomRaXcP-8Zt_R=YT=@P7u)COIi3JQ8xa$@EkG>>8&_6Es
z%~E`<B&SYsOY-CF>^C#t_susm-~4udT2f*o;Ckz$kJzeug7{B-le>(YiO>EA65l5n
zVuE0Fj6R~9(8IqWVpuRt7<Ab@a)MN!87GYJPex1&<_YtHWx}F9H%6=rwh0@gO%ry;
zJV7y*i4r}*lJljx#vommOQ#&URPq1EaP#wg2B&ANQ5|FZSO>ZA%U%<udSZ}Z?C%f^
z#Tw_Gx%~O^T<T*3@WU^AO;j)yOxZiS2^ZsHobX@Cl)pnvxEcKgqPg<c5nC_E26>jD
zV^Mk(etm3+?l=P9V+T5pz0^T>9_T!1vz-hs@pCcm7#-mjIy(;>=(0V<)sHpn6q0|F
zS!mWPHH#b@3dh5-XaEwyNF*>555+h(5MNqcjB&hD!o@;?;8G}{W<$x+NN}2s1g68$
zAQ!&HJ~cs&Pf1KVfhc5Xi3uu((Be`EZu6IZGxb>mB;JKyV~GhJ^qhW$n9(u1cPtYI
zMh_a2E5r@mgb}nfD2~yk2p{Hy@vDPpM`H{d3FYkxl~~}1?e!TA^h4eE3BKs*d_K)%
z(Kat0%er|nI<3W%Iv&@!dClj1j7Sxg$vq{Agf3+++!a!cUfb^)nez1)%QUOgme??S
z47U1t&R#@P{~Wojg;oox=kTgyslb&@B|wXWo*_4kAA^m4p0^Y4E^ro0%9u9HTF#__
zuPX3r$3d#3@T8DR7_{2z2#v(~QJ2b(xk6b&+ruwvP4I-mI7|?M{211>oF9>?{D{=F
zoNtXF3ZpkwQOuJ||3!VSk@7F9LE|-`rz<B9L%?XVjD?p^z!?4CufF;U2C;oQ$cN?v
z@wwn4dlBwE5jMz01K}vM6beUY1CdxL$it)%e+v9x!C16LmlgteD8>~#PDWt%iSlv9
zmQ4hh@IrH?V&r37Xig!yV3gso07?aB&R4q_3*$nT*~Ob#PWv^OMAVjo5v8VgE*4?q
zS8rY}WcG#QJQto`!onzI93~uvynv~DWVE-4?*h+(M6)-QiXAdJ+)s)TrZkpQECG-w
z%m)IBae)m+;|kRadh)?2KMJ>s9cKm>V$4#6Rm@jeF3Ltg=L;}5vcu|~Vm*IqU^7XD
zVxYk$d$SQFWFs?5*;a);z?4%HOjV?ktN01zXO*x90-;DS9uEZKcp{?VTK?0u-eZ%O
z;w%@R%uVN$<59Mc3twaDHu_Q&{<(OVze&etXizE2hWK!3iJ$C?g_ag@XH5=<`MIU(
z$y0&Rk-%9lHV+(=ry#w!nFg&v*-2=3b`k5uM)oh><Z3~JTUA@<;{7=@R>I?WKltyL
z%fuhNHFsKm^7`*P`ybpCz2}hk+)CLO7NVgQ)gPAYza`awD^2-iik2ukRW4Euh-&!t
zo?o^6_Uip9v2z%84rdHR!>hVygucP{IVOUI{(>Uty{P7(Tr(uq3_Ye?h@xfcC5d_|
z?QpHdgr*b1#1$x4(gnpzN}*Uu=@%wq7yZ~%pWaoKDIw@#sL@PAjX(s8|BG_MTKb-O
z)ht^)lEoubwTYJf$g*Fs><3}}4R?;qzC)7l(8I2OKlbUjA1#Qzlc;x6@EsC;Q^+^9
zLL!UjvA+TNTV?+d$$vzZ$uCol64m(Yu3sJd?YHkQhzCc|!4ZLK6sb`}jXw5N-?9B<
zIQ5F?=|G;2jFG55`iwACgG@khja7pB{S2&0gz~dL+~9dxUFUuPiSH8$A_f23uf&#h
zB&i2Z=#}l)HcmtFEI*IiQm<5gMabpg|AZdqaLrtkBoideFuJj3Bi8^N++IxZ!l0BC
z`+8g{SDh&9@$6WPTL?y=Of%-x-B3*G$h)DKvt#OpV&9xwAVgvE<ACP&V}A2YjJqD>
zm|LE0j=A~dC18t>09ht7<wUt#cGgPH+ElIRY(UP26;m3r%Il@_`c#it-iXQ@SIp4+
zyK0cTRd%0{+$SEG9+^e=S>!&uV$YZehflD0xpt_q*@P#NXmEjrNyHY|9Q}}{0)aP|
zf|2YKb=+`=v9KkZ!#Tu4d=+Tcacx*O+&L#zZ8*GEsszuO@j)Okm~x+tzS?NZ5Id0L
zPNOeFd>v8cRcn5VV}L(-)KMJ>&9R}Y>SP?4S&G0Y=R?I|4rWzdE(IxVV6|nD-_gN}
z-ATLLCr!m6*nHl>Qu%NTTFze(u<JJTT4&qYrN5>{Gf(9!B<Ax$TdHV5FFa3}QagrK
zTCEww2C0=QY0>DF54TeGLfwT_!kjP_2TloMJ|8S;?FFG3^h04=khJpEg?bp1mOE)<
z%t`w!k^3-~JH{l%*Az<dwS{zIOQ_0N85?8YptK=gAF*bb`mbb3f`F86sw!CuHp&M}
zh1v*0Ya<<zFux$5W#^D<TYaTWNg>+YIrK_sdGaApv7C%qYc-}++n3+i!^)u5vLea8
z>6mcniG(Rprj;t`gj{RFnQ&;OvlGek1jRUt+QSR*wkuJdDhlHZkF{3Swh4V!p4wI5
zEu^%ifX|Zh6rL7RTlv;>t3L3Tg0KbFo&4p6`1t%-fl%C=t5Dl=7XcfKVm@DvPc!ax
z&l`7IyD$WQG*_M=80U-CaSP@&Z9U)B7ZglZFy%?tERn2)pF2^ZjxIX6D^Vm<VpkEr
zC*k??{tvzk-dEV}q>5&XA`P~TfN$*aRT|sw&eaSO7L9{hUGc2EQK9wYq*wL&Dy_Up
zpX&9#gjdU*Dw@M`t!<rgU}X3cu7v+Z{QegBM$(n2Ozk{&yf0FRR&p2<?&s9zo2PR!
zr;6s^TpoB->4r;N+h5W?jI}zxzffzUGT~+_H{9A4WD?fgWOZ_PvL;ExJW}Lg&yl`b
zD;elXF`j~7KBv^~9ZFUG*-|yzO1U&+twQ_!7R*6O7XttwLuma)M?~%QL<bQkuj_9Z
zUL&sSbi`{gCxG;>gzc(oB<@nem0dA4-dvxvAk140xpeWrcBX+<?nSsPFT%xm?-&xs
zt(vSER{t$wXPm0f+7tE!c~y;;xLy2kK`zFZC;=YG_#yG5Eh{_LJF16z{#&DIW3Du`
zshKsBwV-=b?rHIVqL%R&dQn>nY-@gQsp_vOWqEF?-CtA61W^<<V}OlE%@~J+IRG;$
z3^vRFFuKICn>dBKjb`JsKllh=zc~R|hd#o6qEjrGmkIKKRfeN89Nuxk{@t2~LlsjN
z4^=EP9a-#CF=H4M;84W|`}A-WK$19M85eOHFcZbdF2+K00|1tg3>yzA<PyLPiV={N
zYXDl{rZ|i(K@J8`xigS{ierU8!cV<GHWR~M3x`;c&j5%DAQZt67lV8=)WHR3Sq=|y
zZU7U!OTZv$HlhargJ%I+2=Pi~Bz%>Pgy&*0CQy{^iY@Zt1%OVtqTE|`{Xs5rlaBMT
z#YIqp2C!uongL`v$kVDqH1{fQ?lnv%aGQJ8{e+?i1m^<I#GM3twS1#-#d#q%!!HDH
zjO6n;9E>PNRRo3XVS)>aZ86Bf7K#VZLLnC-0J4-_E}$g+Bd5j{LxhbgW^B1Q3znRT
z#DYABHztY^5T;oc@Fr}0#R^Kr0<&C@QOwZu7}VnYn9USiTx6q+LWX&EL9t8+Ls#*p
z5nvjgT~tW+8lX<*=w?w1My;Ua0`>@Au`Wg9Z!WPcU`-~#y5ccTF~pY^KvoPBnS$Ib
zXvpC$3{;1^*j8k0<06OaAVFeADZ{kL=*@KsmGht=1CW#r<N$9{)1@k423E=9Zveby
zp&)Q*%Od2l(GaVY6>Z=ka}FE@qcwoW<t?sQ^Zl(Dz`RPyG#lpw*_Oa>ooeUC1E76)
z0RSLSzzn^?g+p-;&nt>OE20V%D(37BY|gWZn~Ev6#4j$XDnf05<3%vc2fhn(<{QZ>
zhTR={7w6)wJoqR^7QxEQ4aJIcvuzl0@`j@R-OSo#vyIYRg;~zRAW-x(9f}S%;RSod
z>p*zG%l^!jMG;D~+T_qK?uStJH2mVHVTxTQ9#eji+Ks5)GF2;4wLbwcrx{Vr%l(g?
zRmizVb{>|Thaa31E?yFxhehXQ<h(39Uz41#{eI#N(HTU};PTL8N9Aq1?4TtFovM^;
zJEYnUvGyRUJ$T<BIB3z)jU3&}XCB+iSKDM;lVoeUYrEI~u=f##PEU$`Q>brBur-Oc
zE68?bx%aWFYJJc5MsANRkET6U0$B;TgxixYccoqKbVX&_?aA0pWu^=PWO?ul2f!X5
zvg>1_dmnP|yVvsIuyFocg7a0;`6_b0nju~?x}RaPVodv*GK9@q{S1?p;WXv^VEFyv
z_eNJoS4Pv6Tc-R{&K3f-J55!}RE<Q{h*T}2Y6Yq`?XFEVB6qXw?vUIa_x9bNL7n|_
z=aAGn^r%DZ97Ua@k2ukN9=XrU?n{#U((f-%itZ`oo?02iWt)(@MRvDI?zUgdi0&@r
z?t=VE@7ksH!{57d`^w5d+O=z~ciogSi>|%MwO6RvyV94YD%M7zeVN)TQG4&Yep&Ui
zs!x0yK9TA|RF^<?r76b;qwkNtH@-T)G7dt2F!uhKNcj=v7qB*du%5N_1Kazy_o!8B
zg-Tn>KPY>@Ote&G;kd^o<*QTg1y%zY!eeb#$zuRG_y3~e?gVN(AvT{x%_l`~5Aya1
zr$P@8K6vxfgO6^Z@t`<7jfSVi!D+EKgnC2p5P2D}>Mrlf>2#HEWhlLy27z|fWe7{j
z7=V+Qt{Kw)M&#drH;()V*Q{v|wy39B_H;;|j(c^Yrwe(yWX}o7bK-$c^qfMTQ?jRD
z^7KEP5j~^GGrDHPC?k~$-!r4eBf_4e4;ux~8PRhFdCsgE)75qBmr`9HP2QPYv!(0y
zr0P@i_x6Z&M^N1nxvpEP>lW*dqq^fl?eVqJw8OJLCi~kZf4k`KK>iNV(XnPpJG`=^
zR&vy)_WZQv?^^!0^~bHEqa8Wg1xI_@Q6oDVB}ZfG){ozkn_iKcUU^U_G`%7=^`oYK
zxoJph8WNjEP}7L$7)6dz!7&PnKQs+IxFuYe79w$>i5HuA1e9u6IkR?colA9OY($A`
zZ5Uy|w@IR!?pj5v6H%Q4)rtH5G@`0yszIU}QkO-l6;Z7M)rubsA*v<|$EG-uYC%+s
zK(#!fTt70b4~w3?$g@|Z_O6`HbmY2K?b~O#mp{*Ub)nO@q`v&}%Pl$C+m{H+wYDJH
z>jis#+TQ>JhlZh1{g`sD4&CjSTf3#!Zn5<^YCWFrcv<ckkU9p$j?<{)bo#(i`9Pm^
zpiew7fDQ~~jC#*$U53E)$(dK58TBO-It*)1;7p(x=Z3U}DA}EIic}pmk}jiDr=_x1
zp{x}q9Kqdpzw3eXH-{ksU(wNz9Q}f$KkcYoGd?pCjy6C#%Xi)Ou8)h(R^)759!ir|
znXHn?s`VCO&tc>}B9ccDc~l^erpZ#7+%1v2*KY|eJ*eiCNcJMKS0H=SWQj~xOJw!>
zywH3C?LH}zJ&5cP$et&5$6Cw!tYF_GkbC0z?aP1o2D{GrjiR4CM_a{u9ne2!W3s0w
zLTWq<O!&h18(!i+`zxRTck5Dcv;ERW;ShW3Q`?y!XDc7<wv9ZlfAgBpL+x5lj36Wo
z>aKh`X-b$FT_F}t6_wA`1HibJ2L}e*uC1?!nJrNI&Nyf>f~k$UlV$+<&D)xR42Z>d
zL^!otz6jd6r9}(u(Dm@sCAOdlTllOAD@KpEUt1*jf07b*MXJS<HogeKN^CiX(ML?(
zoP-`v+LKg*;>HukR1rE{kQMe8_CkLrtOcAXm48KD;D#MMMuatcsN(WFzs%O!VP@N&
zZT0dCN@po<YlnJn0W!c?)m`#8*eBy77{)Pf9Mn`dN9x{%y9l?<4laswaY)a_fGFg(
zFne8%xVhJHCf*h(=FM1-v*SA>gtdzh)rDfwnJ}XUu~ruHaGc9>^SIO!Cf6{*8}#N9
zg}RDE?RfT0Bz|eUnJR%)QJ`zb8-!f=>?$-6;OFpWAQr(9IF3PC2*QI4i|~qBF=j)~
zY|+i=zI?N};uy6Qnkv3`=BnrO(XKA6So67Y3_5B;&upOSiL(n~e5Quwu7%?uh$;|l
zxVU>z;6wPuzXJbRCekGJ?&#a2-x+^re0e-gI%U!=k#3RnAkrg{9*FRk$Nyk0U9Env
zX0;|mP{xa}C}5hdq%(xwdJ$F!Ojr8T-kOx@PMhp)mAtKYFWoz|F(tR1klJwc&~^&7
zoqBLe^bR8LpzIxzyhEQJ620e<_dJ9twQX{3msHzz@9ICr{vjqF8bpT%<wK*=p;7VB
zI65>g)}BSRXK@@@-LgJ)_sHEN_lCuNC(*u>Vs#Iy?pYawz^b-grjJVWQIUQb(J#yN
z35h--(mjaok?BE+9u(;_h(05XydlwV;8l`t7Fu2w>25@KX9%(ctA*+Js5RZ%(R77B
zLl~T(rK?_cwMed(yTkW&8)Ns!Q1`gpeOc<hEWAD?bYB*`ub}QLLf{S59hAFgr0yB9
zdk%Ha3H!pLYaY4g1@-$xTaC~?_UKL2eNNbSUUXeRt_y<teP&hVD8u&57z(D&8vL1R
z_RovDptw7>r`51wYbBd(euDA|q&JT7fq&_58t63qu9F-%Wd7Yz9netmdV^<seE->9
zn8%*~I#@fqz*_c#>nNXJ4*^}zr=PRp=@JBA^u8>iFC3fF&W0OX4_g^DNK}3W(@Krk
zU`7wGm-R4j|9#S!FfxY1ti1F3wuPriI<VR1#cY|YnJt*x&g)k3u_&!}5FZ#BQ;}pl
zu6tio-VEz0#1C6)wIoQ$HzX{u(n3!Zj)d*JW@&A=^YJS!kDX94MRN6*3G&7IQAJ!y
z#*9B&nZ5>b#fJkd!8c%fv*DSsHy8;m;WztnnxA874D`@@`>$me1$rcQo#p6Lv85=}
zOshu@X?UNna;gvqKJbW(&BAGyD2-?S{Wi7Q9&R>%Oii`XD(5l!BJimjd_c1XgG=#v
zI2fhj+zVh7@hs2&%>sqhBwMO*Ps1skSP0JUKpofN_>9{4;8|_)q6O#}-3Ky_s8|k-
zVU;kW!fGH2=Ar>-;pppg;Skgmqv23d))uN6HZ?iV0)83IE5k;nV_5()D_gz{leL{{
zq^u&`De!RaJSJP#?X!?K)?AuhxUr{lK}@DG31Pxul3&reFuuhQT46!wczmla%~tMh
zoc?P}uuE~@1)|srl5(Hm-1jm08%$P##PL|nFO8e{OJ11(F3kNSu>1`3M%2YH;~|W;
zcZc2{`p(EZBg-SO6M1+1?QxN;L}Vp^AKO6_J39Tk*f)v#CRNa63fZQXdyAG!o3R~W
z4r6<|(z9YyS4^Yzl?(v{)=Rf<ec<+j?A|B2_ucKk*SIk%w;q>TkKcb&Z0$j<Jr6oX
z_W*Ja$nG<e`^=|}qWc_jpTqctZ+-A~RCYH>?xtUuM0Y!Kw{L+jo>1=fgYdbQx-R<K
zk*{6$9h7_rMPC>4b;-VClJA)4>qfqA!P_lT-77;YLl~T>_%O7-4|&@}_kQHw52ut?
zY?;cO5~?Nu*cjVC$3(q-E>TyX9k@IM<pxu#d$fN&&};8E8y=d;e!KZ$sSao}$>DBO
z0d}q%r(OX9Cs^*_zg>zs5MbbZaDYp}y`qmUs_<;qk+7R7W$~-((Zc;XY!OH49NzFL
zmAOX{V8KYiqY?*1Cd`1}+^eVdIQ*VDi!<X{RiVQ1K=4{H4Dl13&oikbJ+74WEJ6)8
z!V54?2hV1Y(_k!5jjmL$`h>_Hg~5l1{$%Nfa&W%yIQP$R3$8}QtAJ(<I-M><k~$J%
z4<I%|S0NDP*>9St6tv$oK`pDFG*Py!e$vE{u+uk991yhMC#7Y}_B2(pY=wwUAS#|%
zD_1I4{VC&*Z8CjWq7RGoQA8hw&HLKS+RXhW(b|u!{mZ5d*`b3YKikRLjoWXbT`xVu
ik3KIvpVJNNbd4Ee8=cTs;EJ@Q#jkT`i0wo*-TwiKZh$8M

literal 0
HcmV?d00001

diff --git a/BML_project/models/ss_gp_model.py b/BML_project/models/ss_gp_model.py
new file mode 100644
index 0000000..c18f06f
--- /dev/null
+++ b/BML_project/models/ss_gp_model.py
@@ -0,0 +1,198 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Dec 18 18:01:41 2023
+
+@author: lrm22005
+"""
+import numpy as np
+from tqdm import tqdm
+import torch
+import gpytorch
+from sklearn.metrics import precision_recall_fscore_support, roc_auc_score
+from sklearn.preprocessing import label_binarize
+
+num_latents = 6  # This should match the complexity of your data or the number of tasks
+num_tasks = 4    # This should match the number of output classes or tasks
+num_inducing_points = 50  # This is independent and should be sufficient for the input space
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+class MultitaskGPModel(gpytorch.models.ApproximateGP):
+    def __init__(self):
+        # Let's use a different set of inducing points for each latent function
+        inducing_points = torch.rand(num_latents, num_inducing_points, 127 * 128)  # Assuming flattened 128x128 images
+
+        # We have to mark the CholeskyVariationalDistribution as batch
+        # so that we learn a variational distribution for each task
+        variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(
+            inducing_points.size(-2), batch_shape=torch.Size([num_latents])
+        )
+
+        # We have to wrap the VariationalStrategy in a LMCVariationalStrategy
+        # so that the output will be a MultitaskMultivariateNormal rather than a batch output
+        variational_strategy = gpytorch.variational.LMCVariationalStrategy(
+            gpytorch.variational.VariationalStrategy(
+                self, inducing_points, variational_distribution, learn_inducing_locations=True
+            ),
+            num_tasks=num_tasks,
+            num_latents=num_latents,
+            latent_dim=-1
+        )
+
+        super().__init__(variational_strategy)
+
+        # The mean and covariance modules should be marked as batch
+        # so we learn a different set of hyperparameters
+        self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([num_latents]))
+        self.covar_module = gpytorch.kernels.ScaleKernel(
+            gpytorch.kernels.RBFKernel(batch_shape=torch.Size([num_latents])),
+            batch_shape=torch.Size([num_latents])
+        )
+
+    def forward(self, x):
+        # The forward function should be written as if we were dealing with each output
+        # dimension in batch
+        # Ensure x is correctly shaped. It should have the same last dimension size as inducing_points
+        # x should be reshaped or sliced to have the shape [?, 1] where ? can be any size
+        # For example, if x originally has shape [N, D], and D != 1, you need to modify x accordingly
+        # print(f"Input shape: {x.shape}")
+        # x = x.view(x.size(0), -1)  # Flattening the images
+        # print(f"Input shape after flattening: {x.shape}")  # Debugging input shape
+        mean_x = self.mean_module(x)
+        covar_x = self.covar_module(x)
+    
+        # Debugging: Print shapes of intermediate outputs
+        # print(f"Mean shape: {mean_x.shape}, Covariance shape: {covar_x.shape}")
+        latent_pred = gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
+        # print(f"Latent prediction shape: {latent_pred.mean.shape}, {latent_pred.covariance_matrix.shape}")
+
+        return latent_pred
+
+
+def train_gp_model(train_loader, val_loader, num_iterations=50, n_classes=4, patience=10, checkpoint_path='model_checkpoint_full.pt'):
+    model = MultitaskGPModel().to(device)
+    likelihood = gpytorch.likelihoods.SoftmaxLikelihood(num_features=4, num_classes=4).to(device)
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
+    mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_loader.dataset))
+    best_val_loss = float('inf')
+    epochs_no_improve = 0
+
+    metrics = {
+        'precision': [],
+        'recall': [],
+        'f1_score': [],
+        'auc_roc': [],
+        'train_loss': []  # Add a list to store training losses
+    }
+
+    for epoch in tqdm(range(num_iterations), desc='Training', unit='epoch', leave=False):
+        for train_batch in train_loader:
+            model.train()
+            likelihood.train()
+            optimizer.zero_grad()
+            train_x = train_batch['data'].reshape(train_batch['data'].size(0), -1).to(device)  # Use reshape here
+            train_y = train_batch['label'].to(device)
+            output = model(train_x)
+            loss = -mll(output, train_y)
+            metrics['train_loss'].append(loss.item())  # Store the training loss
+            loss.backward()
+            optimizer.step()
+
+        # Stochastic validation
+        model.eval()
+        likelihood.eval()
+        with torch.no_grad():
+            val_indices = torch.randperm(len(val_loader.dataset))[:int(1 * len(val_loader.dataset))]
+            val_loss = 0.0
+            val_labels = []
+            val_predictions = []
+            for idx in val_indices:
+                val_batch = val_loader.dataset[idx]
+                val_x = val_batch['data'].reshape(-1).unsqueeze(0).to(device)  # Use reshape here
+                val_y = torch.tensor([val_batch['label']], device=device)
+                val_output = model(val_x)
+                val_loss_batch = -mll(val_output, val_y).sum()
+                val_loss += val_loss_batch.item()
+                val_labels.append(val_y.item())
+                val_predictions.append(val_output.mean.argmax(dim=-1).item())
+
+            precision, recall, f1, _ = precision_recall_fscore_support(val_labels, val_predictions, average='macro')
+            # auc_roc = roc_auc_score(label_binarize(val_labels, classes=np.arange(n_classes)),
+            #                         label_binarize(val_predictions, classes=np.arange(n_classes)),
+            #                         multi_class='ovr')
+
+            metrics['precision'].append(precision)
+            metrics['recall'].append(recall)
+            metrics['f1_score'].append(f1)
+            # metrics['auc_roc'].append(auc_roc)
+            val_loss /= len(val_indices)
+
+        if val_loss < best_val_loss:
+            best_val_loss = val_loss
+            epochs_no_improve = 0
+            torch.save({'model_state_dict': model.state_dict(),
+                        'likelihood_state_dict': likelihood.state_dict(),
+                        'optimizer_state_dict': optimizer.state_dict()}, checkpoint_path)
+        else:
+            epochs_no_improve += 1
+            if epochs_no_improve >= patience:
+                print(f"Early stopping triggered at epoch {epoch+1}")
+                break
+
+    checkpoint = torch.load(checkpoint_path)
+    model.load_state_dict(checkpoint['model_state_dict'])
+    likelihood.load_state_dict(checkpoint['likelihood_state_dict'])
+    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+
+    return model, likelihood, metrics
+
+def semi_supervised_labeling(kmeans_model, gp_model, gp_likelihood, data_loader, confidence_threshold=0.8):
+    gp_model.eval()
+    gp_likelihood.eval()
+    labeled_samples = []
+
+    with torch.no_grad():
+        for batch in data_loader:
+            data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device)
+            kmeans_predictions = kmeans_model.predict(data_tensor.cpu().numpy())
+            gp_predictions = gp_likelihood(gp_model(data_tensor))
+            
+            # Use GP predictions where the model is confident
+            confident_indices = gp_predictions.confidence().cpu().numpy() > confidence_threshold
+            for i, confident in enumerate(confident_indices):
+                if confident:
+                    labeled_samples.append((data_tensor[i], gp_predictions.mean.argmax(dim=-1)[i].item()))
+                else:
+                    labeled_samples.append((data_tensor[i], kmeans_predictions[i]))
+
+    return labeled_samples
+
+def calculate_elbo(model, likelihood, data_loader):
+    """
+    Calculates the ELBO (Evidence Lower Bound) score for the model on the given data.
+
+    Args:
+    - model: The trained Gaussian Process model.
+    - likelihood: The likelihood associated with the GP model.
+    - data_loader: DataLoader providing the data over which to calculate ELBO.
+
+    Returns:
+    - elbo_score: The calculated ELBO score.
+    """
+    model.eval()
+    likelihood.eval()
+    mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(data_loader.dataset))
+
+    with torch.no_grad():
+        elbo_score = 0.0
+        for batch in data_loader:
+            train_x = batch['data'].reshape(batch['data'].size(0), -1).to(device)
+            train_y = batch['label'].to(device)
+            output = model(train_x)
+            # Calculate the ELBO as the negative loss
+            elbo_score += -mll(output, train_y).sum().item()
+
+        # Average the ELBO over all data samples
+        elbo_score /= len(data_loader.dataset)
+
+    return elbo_score
diff --git a/BML_project/ss_main.py b/BML_project/ss_main.py
new file mode 100644
index 0000000..a610684
--- /dev/null
+++ b/BML_project/ss_main.py
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Dec 18 18:47:27 2023
+
+@author: lrm22005
+"""
+import tqdm
+import torch
+from utils.data_loader import preprocess_data, split_uids, update_train_loader_with_uncertain_samples
+from models.ss_gp_model import MultitaskGPModel, train_gp_model
+from utils_gp.ss_evaluation import stochastic_evaluation, evaluate_model_on_all_data
+from active_learning.ss_active_learning import stochastic_uncertainty_sampling, run_minibatch_kmeans, stochastic_compare_kmeans_gp_predictions
+from utils.visualization import plot_comparative_results, plot_training_performance, plot_results
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+def main():
+    # Set parameters like n_classes, batch_size, etc.
+    n_classes = 4
+    batch_size = 1024
+    clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled = split_uids()
+    data_format = 'pt'
+    # Preprocess data
+    train_loader, val_loader, test_loader = preprocess_data(data_format, clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled, batch_size)
+
+    kmeans_model = run_minibatch_kmeans(train_loader, n_clusters=n_classes, device=device)
+
+    # Initialize result storage
+    results = {
+        'train_loss': [],
+        'validation_metrics': {'precision': [], 'recall': [], 'f1': [], 'auc_roc': []},
+        'test_metrics': None
+    }
+    
+    # Initial model training
+    model, likelihood, training_metrics = train_gp_model(train_loader, val_loader, num_iterations=50, n_classes=n_classes)
+
+    # Save the training metrics for future visualization
+    results['train_loss'].extend(training_metrics['train_loss'])
+    results['validation_metrics']['precision'].extend(training_metrics['precision'])
+    results['validation_metrics']['recall'].extend(training_metrics['recall'])
+    results['validation_metrics']['f1'].extend(training_metrics['f1_score'])
+    # results['validation_metrics']['auc_roc'].extend(training_metrics['auc_roc'])
+
+    active_learning_iterations = 10
+    # Active Learning Iterations
+    for iteration in tqdm(range(active_learning_iterations), desc='Active Learning', unit='iteration', leave=True):
+        # Perform uncertainty sampling to select new samples from the validation set
+        uncertain_sample_indices = stochastic_uncertainty_sampling(model, likelihood, val_loader, n_samples=batch_size, n_batches=5)
+
+        # Update the training loader with uncertain samples
+        train_loader = update_train_loader_with_uncertain_samples(train_loader, uncertain_sample_indices, batch_size)
+
+        # Re-train the model with the updated training data
+        model, likelihood, val_metrics = train_gp_model(train_loader, val_loader, num_iterations=10, n_classes=n_classes, patience=10, checkpoint_path='model_checkpoint_last.pt')
+
+        # Store the validation metrics after each active learning iteration
+        results['validation_metrics']['precision'].append(val_metrics['precision'])
+        results['validation_metrics']['recall'].append(val_metrics['recall'])
+        results['validation_metrics']['f1'].append(val_metrics['f1'])
+        # results['validation_metrics']['auc_roc'].append(val_metrics['auc_roc'])
+
+    # Compare K-Means with GP model predictions after retraining
+    gp_vs_kmeans_data, original_labels = stochastic_compare_kmeans_gp_predictions(kmeans_model, model, train_loader, n_batches=5, device=device)
+    
+    plot_comparative_results(gp_vs_kmeans_data, original_labels)
+
+    # Final evaluation on test set
+    test_metrics = evaluate_model_on_all_data(model, likelihood, test_loader, device, n_classes)
+    test_kmeans_model = run_minibatch_kmeans(test_loader, n_clusters=n_classes, device=device)
+
+    results['test_metrics'] = test_metrics
+    test_gp_vs_kmeans_data, test_original_labels = stochastic_compare_kmeans_gp_predictions(test_kmeans_model, model, test_loader, n_batches=5, device=device)
+    plot_comparative_results(test_gp_vs_kmeans_data, test_original_labels)
+
+    # Visualization of results
+    plot_training_performance(results['train_loss'], results['validation_metrics'])
+    plot_results(results['test_metrics'])
+
+    # Print final test metrics
+    print("Final Test Metrics:", results['test_metrics'])
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/BML_project/ss_main_ss.py b/BML_project/ss_main_ss.py
new file mode 100644
index 0000000..0d0aed4
--- /dev/null
+++ b/BML_project/ss_main_ss.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Jan  4 14:40:13 2024
+
+@author: lrm22005
+"""
+from tqdm import tqdm
+import torch
+from utils_gp.data_loader import preprocess_data, split_uids, update_train_loader_with_labeled_samples, update_train_loader_with_uncertain_samples
+from models.ss_gp_model import MultitaskGPModel, train_gp_model, semi_supervised_labeling, calculate_elbo
+from utils_gp.ss_evaluation import stochastic_evaluation, evaluate_model_on_all_data, threshold_based_labeling, resolve_conflicts
+from active_learning.ss_active_learning import run_minibatch_kmeans, stochastic_compare_kmeans_gp_predictions, refined_uncertainty_sampling
+from utils_gp.visualization import plot_comparative_results, plot_training_performance, plot_results
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+def main():
+    # Set parameters like n_classes, batch_size, etc.
+    n_classes = 4
+    batch_size = 1024
+    clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled = split_uids()
+    data_format = 'pt'
+    
+    # Preprocess data
+    train_loader, val_loader, test_loader = preprocess_data(data_format, clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled, batch_size)
+
+    kmeans_model = run_minibatch_kmeans(train_loader, n_clusters=n_classes, device=device, batch_size=batch_size)
+
+    # Initialize result storage
+    results = {
+        'train_loss': [],
+        'validation_metrics': {'precision': [], 'recall': [], 'f1': [], 'auc_roc': []},
+        'test_metrics': None
+    }
+    
+    # Initial model training
+    model, likelihood, training_metrics = train_gp_model(train_loader, val_loader, num_iterations=50, n_classes=n_classes)
+
+    # Save the training metrics for future visualization
+    results['train_loss'].extend(training_metrics['train_loss'])
+    results['validation_metrics']['precision'].extend(training_metrics['precision'])
+    results['validation_metrics']['recall'].extend(training_metrics['recall'])
+    results['validation_metrics']['f1'].extend(training_metrics['f1_score'])
+
+    elbo_threshold = -0.5  # Define a threshold for the ELBO score
+    use_threshold_labeling = False  # Initially, do not use threshold-based labeling
+
+    active_learning_iterations = 10
+    # Active Learning Iterations
+    for iteration in tqdm(range(active_learning_iterations), desc='Active Learning', unit='iteration', leave=True):
+        # Perform uncertainty sampling to select new samples from the validation set
+        uncertain_sample_indices = refined_uncertainty_sampling(model, likelihood, kmeans_model, val_loader, n_samples=batch_size, n_batches=5)
+
+        # Semi-supervised labeling with K-means and GP model
+        semi_supervised_samples = semi_supervised_labeling(kmeans_model, model, likelihood, val_loader)
+
+        labeled_samples = semi_supervised_samples  # Initially, use only semi-supervised samples
+
+        if use_threshold_labeling:
+            # Threshold-based labeling to decide when a sample's predicted label should be trusted
+            threshold_based_samples = threshold_based_labeling(kmeans_model, model, likelihood, val_loader)
+
+            # Combine the two sets of labeled samples
+            # Implement your logic for resolving conflicts between the two methods here
+            labeled_samples = resolve_conflicts(semi_supervised_samples, threshold_based_samples)
+
+        # Update the training loader with uncertain and newly labeled samples
+        train_loader = update_train_loader_with_uncertain_samples(train_loader, uncertain_sample_indices, batch_size)
+        train_loader = update_train_loader_with_labeled_samples(train_loader, labeled_samples, batch_size)
+
+        # Re-train the model with the updated training data
+        model, likelihood, training_metrics = train_gp_model(train_loader, val_loader, num_iterations=10, n_classes=n_classes, patience=10, checkpoint_path='model_checkpoint_last.pt')
+
+        # Store the ELBO score after each active learning iteration
+        current_elbo = calculate_elbo(model, likelihood, train_loader)
+        results['elbo'].append(current_elbo)
+
+        # Determine if the threshold-based labeling should be used in the next iteration based on the ELBO score
+        if current_elbo >= elbo_threshold:
+            use_threshold_labeling = True
+
+    # Compare K-Means with GP model predictions after retraining
+    gp_vs_kmeans_data, original_labels = stochastic_compare_kmeans_gp_predictions(kmeans_model, model, train_loader, n_batches=5, device=device)
+    
+    plot_comparative_results(gp_vs_kmeans_data, original_labels)
+
+    # Final evaluation on test set
+    test_metrics = evaluate_model_on_all_data(model, likelihood, test_loader, device, n_classes)
+    test_kmeans_model = run_minibatch_kmeans(test_loader, n_clusters=n_classes, device=device)
+
+    results['test_metrics'] = test_metrics
+    test_gp_vs_kmeans_data, test_original_labels = stochastic_compare_kmeans_gp_predictions(test_kmeans_model, model, test_loader, n_batches=5, device=device)
+    plot_comparative_results(test_gp_vs_kmeans_data, test_original_labels)
+
+    # Visualization of results
+    plot_training_performance(results['train_loss'], results['validation_metrics'])
+    plot_results(results['test_metrics'])
+
+    # Print final test metrics
+    print("Final Test Metrics:", results['test_metrics'])
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/BML_project/utils_gp/__pycache__/data_loader.cpython-311.pyc b/BML_project/utils_gp/__pycache__/data_loader.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0b1a7ebdb4b25cf36cb492dfb1b7d20074e1bb08
GIT binary patch
literal 17950
zcmd6Pdu$tLp4bdIr1+93iF%8+C{vbA+1AUlW6QQ2SuZ~%*|B9OmX$Cxhmt5$q&!2}
zmUigV?&T0B@L_%1q*1b7EcdXR(_WmO3S<i(+M?dx0qZOF&>3L>5z|v4ToIu8r>8~s
z3O7Y@{r$cnha5`ICffq-=;Lp``Cjuqe&6r=y+(glTx_G@8vEggp+7u9QU3)WvX?0n
z`SLMDKA>1?oMLI#5TVCuayN_{$(<QD!QB`!&lZgr(P|mXxCPRfh;`OBZkx4_+ex}9
z;+QQSFD7wwq-54P?u2;JThw@|ff}M%%Ucv{4cR_6Qq(8#Q?Kzdwt_8wiykj$E7=mb
zyVxq$3HJ)N6lzwoWdN(#YSsm5)vVzP<*QjdX6xfZfq01Z#G;-N`1OZ^o{nP>y4#O;
zw|9Cv+dB{2Y%d4q<I^#&+Y{ktJ3HIkkJ!GzmyY^q#dK~qFctC{l%oDXJiv$IiVY*f
zu>c$5Aiv~FJP>6A9D5}gh>#S^SZpj5<zrkh({pmO0>rU=d98?|-mZV!{Q(uHa|LF!
zI7@#F@lUkp1SL?cp;jxJGHB7fNZg!z(AS^QW-!I%%H(3KQC}Y4b`rAT&g9+-Po~_{
zqMC2smQs|dY02e4O=~8#NS~^E%FDyn7wx16woJ|Z@n`WdHw1`%48KpblmyM%S;v$i
zVVI)W;y<MmM!1*2onf79DWsbOL&7W=6GZ|enDBiGOT1XCPO)WiXYNTb>u2bP6k9Hs
ze@Oj^(WleY*R)ZjFAHtBgraZRhGknD6@q1_Hej@sFk0&rm9R}w@7BXkp(2)qU7%8k
zhubdw?DbQxKbQb4OE?n6LUBIsB}(8qZ>17W!JZ<Nqp(TAsqZUSp_`j(p#)~2=0^p%
zM^g`agc8Aghvv@1y$J4uaJRz!j8MW><uHb=y+!r`f3zUIE|cCO*iwK)+b^L+-!qze
zr~U;c3&sGw`(v2NdZb>7QkoJ<QzhHmrzqC_cvs;&px*9?bG9^52K8)-^0+rwQ-^Ei
z`kHLRBH+6|E#Zpq&DDOVUZ4A}JL#UJS<hRRck25K_eDjr9&!zA<IcGTN+=V`1(#64
z?s@F}80f_(8qG*l#`opkDpa!6Hy%UN_A&fX-j3GyKT)NB&;Q9=Sn6xuQ!P{rR<ieN
zV4u_qHGuc^fZ-(qeTU*Ih1wTj#WwtT;D-8j0uA+FBK4~PUpt_kTDTu7z|Sn+HWs93
z@wQT^`^N7*rrT3{b$C>zpH<jx%|g{qwBl>psNL4aKB4wowo$jOjr~I1PHhNP`TKi{
zPE-lC1u3;c9qc1t{+hzPLph;t(g^2$%jn{FCnl!2*qu;ofNh-*#-h>I5IaA?h4@f_
z3r<fAj1FD)_f5y56Qk$O`Y+8#_|V-zJUH#gGyNtU?-RZ3%sd+o2IApZ)PJ=PT9}v&
zM*|UmARZ6R&iT9i9sd4Ubjp8mde+a+&&~$81wV2T9qpY*T7&!@#d!5x{}*`ZD#p<(
zmlgA+-o8uM`V`}pYh#Lo56sR*LjGth+B>Ki;AhRGK(stR)6ZcS6k}I=CxDK2UzK7!
z+}?oz5)QW?L2wkoO8`3B5nyfy=5`!Ia2!A<W^`gkCuVeZA;7wwSgjL^bhSf~E^Mj`
zo9b%Ew2m&H1>;b%tK%>N62n5+Vi&g9g)Me<Vs1OUudCzuqOEt(6X(K#NVjLv1+hRl
z8jenR`1zYNp<tZ%ES7uXAwI58tMJT6BY~Tt2ykgyNuOCU0v`sT13J~-32$yc0xxZU
z3Db^sa!CCbiw|Wlm~+qKF3)s;_e5hJJ~TBOipKfYY&PB%`mjQG0n29QfMbize$fI=
zz$3$lLM$c+r(@w@XzDPz{_1p4W5J*tl0g^zzN`cAKK%hT1q{;bMG0D<-!UMw@+XD|
zmMhfHDXtd^_>797wOyg_DHeh>fG0d+gvYb=)xEy%iK~2w<0rDr=ETKlsGkep33(2A
zu14X`@!|M_CpPIp^n-~D2j}Ax{juOY8Pvp3I6ggpbK;DDWY~X+i{aExoPqdUCO#hz
zM|l6#+yo0q?T-*nwRLXcE8EFPn2!fzvvWO*yZO0DIPRYhvwZ8xNDO#az6T=N`~m1J
zzYD<gB}#1QQNzmZ6rI}t6PuRsB^01^(}Y|onWjOAyl==%(=Oo0D3~ZCPShgs3;CSy
zI|e<4U`dJ0l<tOIO@Dz40Sjb|grYu%I|EN#9{|M~jLk>me%NJPKRoleNHE1q9Cn|>
zF8r=>Vi~>z02${su>r%=+o`ftN9whobpOQt3)kxPf4Td!-A`$$qgV2rQA+^ihM?9U
z$OZJAQpAPkA^||1k3PifFHbMl_C)~oVOF!-fd}*sq8tMI0&tc0Ovbp}27!H%OU!JL
z#fn^7Zi9S*WAs%hhB;QTfM~<|k>r>Ie0*Upq!{@)r<fKe!;y$$o{xra&xaJ#92btp
zx${s@VP*sIX~hzbhd3^Fmsd<-J{pKZ^|`rFlvPa8`PsPz4tZ{_2g^H1LaVx8xYL+q
z;$Y5Mc+>o>!XUo*b}2S?GD`{-OCS>Q<6xAk-a$Vh2L8doba*y24NpK2l*;YtFl5EO
zEtw6oKqvu8`A}YCa{(@PZ-Ji$k_<GnM60DXUYvWzM$5HFZ3>mTU}kMHy@MiIvZIu6
zp;?t^kuh-B$dJQPHgr!h!jy5yKEtM-4)X-<$W)PeDaX0+$0q$jATD9pc_Ch@hUvQ*
z;xHAae3tjeWB$2t@Rm{oTlHq(W*D~Mf}e$oT=*vJ+E`R!ST1l^sVL~8I=0%vR2X6A
zB&pdc3rUaS+95|NEUrxscLBzyZYJFQo)=#)zf5+SV#^T{?haHbg&+TSfTho2OMPDE
zdar(qqITaj{Dz{+gNEM{xO6^UvnO>#uJNtcw6E2)|H38Jz!QYy>oup>YEHv$t2rar
zoLM@boR^r&&Bg;O19D^M>iAQOeB_+ec<$MG@zn`=_;soAbtrRRVs@u@?_a5qceldh
zcO<5Mb6>}5tGuuG>9~05hCDDK`orSPJ=wn?4J=6e7MIS8POro?Z8~e0_sUMsx^v%}
zbD!k&$xh$W*)0pz(3BcjeqChhwhYEyO<PnJh)gZyHcil?_XY;Wliuyk6y2;mur!*k
z@GXs`YmbP>My1+Ok*V5XtfKAW`}}f`RNW$1x4;90C->F6#9We@OCocL6dQ(Or=)71
zT<sHW!w{aluHGePL}o@rW`q=b?R{RXKO|MR%GIr+?KKSj*VKr_T$h>aB6EGqOzjz_
z#l{O5nBAL|)zG)n@}+ZW#<_F>|I$^S^(x<5l~1ZVAXgpOqHLyp1iWKQnv#6Fr0n6|
z2Qx`y+Ew-NzU*pFTGDps_q*TiemE`J8)SO}%!;-0b4Qt2-k<UUP=n+ckR1b}V<7FQ
zNZU&uMn!uQgcaB7i0JEu@H8mCa!VY<kglj+rr#S}4*p=|;mC@2CGfa;weQaltsF`_
z>ed~NYmUa0S8_DVj%Lx(oOV>MJKSpy_ws96+Vf4uE$Ty>vAuoadlw#FUb-MLH8N8p
zGBs(&E9yhKb|0?IZtu_f#Qoh;(+Roh#1mF(IwS4wlXv%tO#Ozl{NeF;?mxJ{bT*wm
z0pa9ZJqJK)=#e3fai__Fp&bg(AoH5ka7~6}$h4OwkBa3@V)LnV#lh9kmXUUx`V9r(
zw+NQ{|ItjjJt;=qd;VvkPy597Gt$96`C#AEpmgw@)O=oUJ`Y*4=Ymu}EY}aiI5s>@
zsj*M@{=z7B4NC1pa{JITuhc#wwT#LwqhiZN2=d-blIIoK^NPsqN;e!6Ph6E6u3~5E
zV4|wiPFP~2=^z0~<0h=n{^aT8>9nI^-O;?}Xio8x<ACfqAUX~_-)v~aiq7gS%3$gs
z0I<PcB38ACb_^@L*gYb)k#Oz0IDSLEHX*(lT)P$&Aw**0+-*6+iFX#(A`4<<A?;{d
zcN|!A99Zd-9PP5BU39cRCkQ?Bv!K}0BQ>9vn@@|j84P37YD8jg$;>U0xkU<2VZl>U
zbC2BIBig1gTn?!biJ6v}X_1*G1$iuZMr!VpoBKo?kKy`lH6k%_nTd-`eAC^&S|+;>
zi}hWIBlWE-x8?c{v91#*s^O(4^pnOXM!Df6Ov0wUL2T?2kB*9V*wdrr@DBo|(=MWn
zfTu5!qxgUdQJ@^9#tm!<%YZu4$eKVo$*^XCCZgyxqmpyHh_wQ=ur`2J)(+6dIsn?)
zVqkonN^##jACJwVZ~~ggF~tan{a_Fjh}wnkMb7fe6#yRqlO<4oEmJVSlg7mgMtI8h
zlLEyT<;R-j5=`)ftcChyh9^)vs8_y#_%S4XlFa}P*st{kR7(~uBQK(_0cu_fxMX_N
zXH~3(J)J|2#7_8s-cKGF!admej9xXo7*gz{D6mYxzbH<^_8^;~*5a@aiVe=lD33*W
zEHMcp-?SgA!NmEBkjalsasrg(5(w@i_yYt>2)+w|$96IYo_e-f{C?o#;(q^Pm2M$g
zv*|an+7$pxRJwLgl6g>)u5V5<vZW?nQ3H=w?J=9&(2Cjajk-M_wmfQqBF?nMv2Jm%
zS=^Fkw`|!hW`l}j+UmheaJ9%<pM?mlHI;(<_DetGsK|qL(%ONzh6A`3+;^Y_kXJmR
zdoY8+xIZI1`J;i^5bp<OCNs@Su`Wk3tF!-entKac;PC|>mGS;Vh%WBZ&sf2VM6gOD
zfF)|PrFFgK;#$i^spS>9<rT@Y4`--ux&P6HW%yI4LF+P9(WYSu9^QtDNK&BhqTjNh
z21Y;lgy9`(21=<{N}Ggi3f_eR`2+NU)Xq9~ecLK3`wXKx0SNaH5rCQ@qRENHs+^eX
z${qSI>;erkV+pK4FBJ9B1SNe&BHk!Q5X{th#nFOHqt5IgJS?{8XEwmHFBIcAD9`T&
z0KAx3cR>xQ!>hrk6@OhT9z73X`LKEiNTwQkfO|tR13rLEz~N#-8m_ToiH7cycPUO)
zxbSBah;;j=M@0aT0Wy6m+&9_uIcSX^1_1rq%=Qa<x3V9ARChtHyO4AO2V*UMU|qM=
ztXXQ78zhTQw)jMg@Aud+E$}M6y+xpM78kwzncWfwA~JDFsLH*I{mo&xQsno8EDCg?
zQok0N<a<~gv0#ZpE}~0z?UkI(va?yVG;haeL>XM=FVg^UTj$V3pmQ4U1Q7FFpM^aA
zX~G!Sh&8dA&>zEo`9#|@T(dyOHA0OgX7HWl(vSTXN#jcLOX&K*L@R;p<h?D^4BQI$
zQG$UZ#wZwA!z0wq8Anyo5J@tS8wT%+iO>+ximCb<LUVj(b<W>`HkBe!=z!vlQ`h2s
zOtPXvgHU##NkvOzn7zj>Lmu}57ATE_u<3^b;I<zy80>I7_LmTLm8{$S{;5zLB{EpS
z+QJpovXC(H^8kRjSJwkezTyRbJLyQ5R=j6l9+pdeNz><cSl;D2$=)Q}K}s-LPp-H=
z<yX5tJ0-RC%5A-CgyY?8@IFecT#*|(M28y!V78~}$M-+FzY>uePRI=>AZ@c||LT!v
z2Sw+r7%Z=<c!GG5y)-=euaNRhu{i;@S-}YURs*a@9{%i1;e>4#Eo32b7DRzL3Ga^Q
z*^t1xXDGlnH`LHD<OT03uyLxiRG_DjebMUT@q~C|MJhu$01hz#5ac}ER4loUF+>&a
zLj)vWMGNkqV*<elGd2JO6>+2x8C*z{NaF+-fdvV3i;a4u*r5oL1^&ALP{^%#I3`!_
zlgjqXW&4xnboGJMQL*{}grtKIF*wfFEsblI#?(Q{(k5HlL`z$`$p@#u{o$}=X#_dI
z;mXG&qt%;sls*_)cX-wuo>YY<?|>KtVRKjG@?f(6!AKJRR7A-?+APp5LQot)0#F5)
zpu-d)ACRG5V5;~>AR8Kqpr{!wp2|nMYy}<B5CEyraH3!M1oMLSv<1}~gGeE&agX!)
z%o&Yb2@GpsjV!~OrVNkG1tuwzV9GaA@63TBzpW>833D8&bLJ{E@50<n67y$K!UFB*
znZ0LhTAh3&s1*<%6r1YB?G>!71q?_A!5XDmD{LSOZW;mD6EFu=3UtbmEt`KA3XHp8
zZ0ygpv^!H5OibUjuAsg5!PJBQ4pM<(5*;FJql<O^9nBeNx%i4cNYv^|QHCCq-2=qh
z$cN?>^E7B1L!e$HjG>O{L4+Yp&oPL+m@zH%Y{<7Plb!w*-AXzDx?U4eD*;#32TkT3
zy<b^QMgp@p*+9?Y%lb|FyZG09HMiQrkFn`0087-f-v2oCABM!SSN~@C*TdrV@n2tD
zqEvQ>AwA;GL9s7zi*SeoU;GnXzG9VAGQ@a=Q6-m|SU5`L3uK=_t;cg0vC1U?iV2jg
zkp;yX0;OOGj5{I4d?yf@5Aj6!;E<5{Y~(=8oqwvj(WX)+(CUViOq`saE;T*>@Z`B4
z;tP?`!9j^e6+gU$$3Bw-Fn0=aZ$sK*>$jUjq^5i?&>A=rYwD84=`vT+yy>cb@Awu~
zYCT0jvTvid{!#Uh-5<GAtlWI`iBaqsmyV4~wKwG28_Dw<&awx0zkmPT`^z(`zHN&t
zDL%EatKrd&mAWUcr>{I)5XS@J=uL4~P}&ugcLg8X(#`FwwQ}=`_2%BS=3ep44G2>6
zgxowKmU=gQo#Np^$u}hXhPEhs8FT}HRI&GA-?BUHX<2zm_8gWy-A@3Vet3?Y<)H1d
zR-W2CIq=Lap1cZSWnpDOyz%<7d0CKZ4yNn(tt7<ylMvEw-@3bf4O9hg$=xHndqj87
zbD-+(y(<n>9MpF`aX))qJU$L#r?Tn8Czl6PCnb0L^XHpR_wuCZY!WR^+u1-oR0-8~
z!$A+nJ3MBu9NYJT<23(>rc+RV`}K93uYislzSX-80yur)X^?;H?{GR8t-2B{qdIDE
zQ-U#N-riquqcG{IHg`GSz&(Sf9J%soc^-u}vJ{*buw`?Mu)aMwud1_c!1<?9<-(IG
zmFF0gE6*0O7S^J7QA?PjpXJX2Yt4)?^ujUP^nKzk#Zm8`xn?vcB%#1j22L<75Bjdn
zMd4YND0;yd3*3=XsJ7pB0c!_)J<k=*6z$iI&z>1y%L~Wn(6_)<>iScbedV_CnWD~o
zCl|I@-_G}dQJm4va5z<|R7JL}{JT(qL*V#jN>%2U$j0@;{T0ef#SdolD5{C#UIrJj
zwp<DeZX)1R;f$m8Fmn~2;XAoG=*F)}cWkMCd_lu?(3B-C_ra#D-tW?Pse8uj)Lq(F
zHmcBm)c|gX(>xOQUl|)5YvmT88;cB{<Zy1{c+^vC31W(IE;<Ey0Ks$^Uo^G?sJ<HI
z=jY~P9NH!EbU~veXxJ9(2EaVXdB8{&4Dmd%)~f;<nDvWWJp=cGp*gfxb$dWFfF~pO
zud~2c$NdY8{{@0o1OkFTN3aAyv4B#6zY7*@#Soo?Ly_YG3;CyH9Q-Dy6|?F=iI!D<
zIxrXFkTNPv49t{@bw0|2VKW4JK?Y?(#dZzPQPO9k(o;&ax<fw?CRpgdVhToL(U4-*
zy3Sq29$m&(O^YEe#`~=5*-MVjVj>XWnT}>a#R5mHs+%my$(D1GSe$!=4H(0-Q(#n`
zog#9CLWg*y<=T0xs%}1lq{V9eajQwPI9vQbkaKmTrtVSEj~yR5Qe!`TV|8Nv$k5u6
zp=U<v$gq55_}MG+k&9BzCAsEOvUtN)`QA$(oczJbRPl=NWL9ds@Qjs5-;mngkX(M*
z<xg6+7?-s*?R3e`hSZTY=YG+-f919HLp^JUdY;^o4h_nO1|eyq(fiYyPxm}NusZ!T
zF18FyEyFUv#u2%3<YCE%cmGdMu9isN)3W#Ux_4;JJM@fxc38Z0Me>fx-Z8P%10h{j
zDVH^-uCA4}h-ED+_tx89UTb^#X_eG=R&F~BNgGx5@6DuYq^cIVs^y_^qkh+;W2wFm
zPdz&I(4xM=B6*L?-s9`uzBO;((_YCtD0>HWt&gph`NT5c%Jh2c>9y9=Pfb$mpxinL
zN!UX7%CzKtN%p>^*7>~3BQ|zRRVU=C6JqHJbr!ZLy7&T}_U>Ev9$WJs6OW&hyys=_
zd8o4K@~*plYc8MUIv~4<{o`|I4H!MFtvRrP#;(gN0sshq>wOmZo916Pt51^SitM-|
zI<9QEYTi4soRD0HWY;0lQ5c@XwPk{4;m$X}!vFHXK;;lk{ff4owHtqBKXTS=d`8g_
ze`YoT3~GWIltr<KFbV$|B6=Nwo`8Y4XGrA>g-~9-K#0h1F`Eqj+5(-E^w~Vk)7KrM
zJGYhJlAf?7=zQ%#PEQ~dXaqnWFpP4aLhnG_NA^G|o&-(2rsU`T3e!v~arh;~xqpvk
zRYfHc{S`BAe(<Hi`GU=;Zo_Bru((^l4GXCSmf^<$fcmbiK`Ltm)CRHMS-I}?t~tG_
z{y!W3)8W<f)v?dUpBg@ULu%@moBAc^fb1MtcaE$%N1n022Hy6%?7W_2Hk=g?gk{KX
zl%0*DrI9dxqdwypA=iUGhXPgx6si1v#pd_V#@KoA5A!PyzyJ1pAfo1Qe+0cDs#xw%
zF!s+7{0PBM5FjhhA%Wqr>qMVSUZW2A9;P7(z>h~XS<?Qt3>FjEarL0WV7g4Bxle-@
zGnUdoDUFt54Vdkw*PwhJtSdHow<rzlt}^Z2qBPK6ZGt_efli}o585LOfjR{2_#l}p
zZWN+HB(2#C*&ebRUW15!Hwb77GoW`jutb1ApCAsAD`Aaeoy-M-GHS?*1Pg0;Y%C~g
z6KsHo%;+HZ7>fVgq}Xl}XF(oqQbqjq{NyBvWLB1o&4G~w+)eBt+xhRtxLbgBigON(
z9wBrtoCLc{jPn`DDsZR{@!1P)QU3{23k*`+2BzUoSBmgJA|D@MM0M%nQRkqYGFjbw
zwRWiZRnH94<q*sLJ(T`i`0?if0A`oeFP}?&XSM&yfv4XQ$AVG`E0?fKLmOrF%R{TS
zrvu{EH^j)@Es8pCxL|-=`7rU}7$!a(!v^r-uvdyqr7Fne%_zk{`(#!A8t4p~B)K9M
zJn&i2K(a<)k)478QW!`vL5dkt%xn=nS>VYE3>wASGHhkwTL~gY)~=^iz$8Ogve)kv
zq8QduP`~I~)i(i?3M0;m8wpEv14dKwdyd9g(Axj|(cFR2l>VNhDf|C!G|qgR2wUKH
z4+4f6sRHpUc8xBcY@3b7+qgE(fw&EwfZIlT$A){)fH@&bR62fOSLY*fzD@HkeiaGE
z;)!jwwE}wg<F@VY$BRANDi6<x`L@d$pXRo{0MCaOcyK)9Lv0y<P;gQXfYb5<ALbYL
zv`x)T@@;$&lup6vNxq})Ryf!JZoi#vY;3yYXh(a;4rQh%=hQM;FV)2}+uGSy=6Ckx
z*yZj#A8ee$i5E;^&Ouw9!k2V4Uod?;cyBMB`KBJ>II9!cS)ItARdW0&MPq|kh__|!
zjv7>S2v^Ft&p-EXfC4F2tc)T9&;48Sd}+*|k+xdSML|awW<5ySGa^{42b|Pne8{s8
zq`7?_5XSa_=++9}uLaW8ub?pyU6!y-3pgc$v@?HCF+*&6E~r@1rCpU=N{Kd*_vfYX
zfjb}@<x+A-l48%~p$N2tG^3P&l}xpokP_TK!28$W$4>yy3Ch7D=N9EK1?Y6e<s>*k
zye5K2MEwnb7_ma8g=8MX%+n;(uV#W~t@09$$yZ78n+S`_ABNV7>cyh^jf$%GhCUen
z!EmZ}<(yP;RIWIhv~E<@yf^hh<OdPadssYjNve89u6iYD*C(Hos?N(*=acrNJ#DR6
zHmzBAiPl~D?Ol*6j>#3!C4I$j)*iEFb&FQ_|CRPWFR2l02c(igxnxkZ45sZB4{xPT
zi1t>IX;qI~jhTTd*Yt+}3LXlXAU*Rv0mpK}oB+Fqjs-$OM}wUObSi@BvEgG7GCn~;
zgIopd5|#zF>IG$O+uAk0u&f<eFPmT|1lBk@2CTc@9S7JY#45tK_#6box!ah4hcY*h
zfbdM;#TZdCBrx{h04NUPFq?6GS8TAovT-|pe~@|LQRSD%KxMoakZ;SVJ_w3)zlCDI
zfgk^u0KneYa_mXH)G{Eq3`oqrrE|&eEMM4gwX7Upy(RX&DqatYx95Rh>Ni|Owkcqk
z!&}8|<emURV*r<iH_G;})UI9-d&k6UZ;DY4%JdqBvFICyS;B8c3BMI3{8kkBt&MWu
zif<KE-{a!Mq!_;kMTZQRu_$Zc@#czS4S?_tfZ*Y2uU@W8-4N~VBGay74lpR%g>k~D
zc_8vOeAAGmGmbP=p(727V}S_lZ5z(lNMMflfKxPlEy3ew3iHumh>L^Yv`77dh4<VH
z1aEobF;9Ft6y`j^7zYRATnuX9*C47c$wTZ&NZQcftJOwY?&YTVZW23`Yt^6mE++HF
zZqK>gtKkK?g4*k$Czz38np-msW*)meW4Sy}jC`8nxjP*WPD4kdI1HZDo&|#q_7?`I
z?=tvA<@fn=C_c|cwQdiC-3RO7sBm=py85Y5XK4EX)E?_>Y{*p;8(RxRG{+)m0zN;P
zffV)1_tgOBqSBmmQ4$Kk7CQqsg${F@aL;~`qDqN_kg~WB#+;dL;;jCn&qEl%1meE+
z9Q=yxLuyzlOAg84vD+v+z~MKOs0wkt&^UZ5QKKPB!IG-|L45SQNxGxBcIXNBKcE2q
zvajB%q8w*w?Oh`1g6u<*eMpA4Geg@Y6HQ)3^Mu~hCSer7d+hakV)$i13NfZ&jN31i
zDD|8y7;4=*{~Ppqf$!tdX#Ln!AW@nHGi)VuU?IDeuFe5g!Y)E|B}5A`t{)hIVe;9I
z2Q{T0a9@Zlc*p^c-$;>9$adN>!Fkwc@Ig#I^;5Aulat>va3A`&f_RzTDVbMgQK8$@
z>%q?(5Uu0$@ckNp&=U&e_6znopt3Kj$BhTKNEkl&f}X4W&bFKrpu0V1kRSD+Fs*(K
z15=P2d4?bqY_43sI<9##eQaJmb;fWywRh*w8IrHDL83|^LOrLz8pFoegqx~NA>w%v
ze1fsVWGc>rnIT7z;>?zXwIouy>aVKKh3bM*k?E;!ZMJ)y?pKTHO8F57R42?3n_Uu)
z0t_<4kH<4{iP~~g)!@b7xpWaYs0LJ7nqDS4oR?|IQkS$pyt-+rUbh@rvm98t@T5|*
zoR%%Xcws;<^yXCTX^VJmT%5cmE-b=XGGZ9TBgK6Yj}(A#qyPjrY4n6<8pz%n3+D47
zIB(=M`5r_HHkVn)7l3G`-$L*izWD`;!~#1Nd}6P0CSY+i<5mNX1>T$cM-kw)0ag=m
z9D=X2R2Jdyphv(uZ^Dhx5LJmr7TU`FPe@hD(L+do%&1lMJPG%|u!-*@_$L56amFn#
zT){%($8$zlKM$^<s*>46<v6V1=gvK%_sHrWNX{YIIka?klX0yx?llHKd|;Yordec~
zKd;}bF4+ySenP6Bkn1PVhdsAAK9j078&r|qXBZsx(StstnmXpIK_^Bw7WDg7kzyPN
z_3r>E<~i8C0o8RuF<v@1OpGtwFEOu_m}gqa7ly5f?Hry<M5U!v^0y-Jm2I>&$3FpK
zP?fh;q~4t{4_2dAO^M4B*KP9Ikg6K{du)tweaLgEYlVnTW~xB137#OTvb2Y@K@kWP
zKLQZGvY_cLhM}1)3W1HHZA(`COH<`yHl!(+m<?&FQq+eu<yazrX{ue^DQr(qQwKzS
z*kUfy^hMZ&y4y{|3{CIZ0$049;KQNckeuJ<=f7q&(p8`#)d8EEZrh^tpvO$ZXBv7?
bYNyYlayAF+YG}_ErGYLhKE42ueg3}zD%MC=

literal 0
HcmV?d00001

diff --git a/BML_project/utils_gp/__pycache__/ss_evaluation.cpython-311.pyc b/BML_project/utils_gp/__pycache__/ss_evaluation.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..14c7dfa31576c8ee01e847158a5109e4899ef798
GIT binary patch
literal 9772
zcmcIqYit`=cAg=J?-6B*dXUtMvL%JGVy$S|vaGjdJz`t76s3+6ty1ooGm>cYq23wF
z4-8vvkVUk+2I{r}>>pl$Sfp76@eiYa>K|?4EgG-?6qq5VATfXe0mY*5pF)8XG)Nb1
z&mBI8k~Ys?&YZdT&bjxV`<QdSdoF)dRb?k21U>h-sTP9xcbq5$Dj#|I3?lakmRKTK
zk~KugC4=@fE*Z5awM4<w7%@f7OJ)-DP!UVix@3*omTXb`l0E8Ja%lM`$fuWRk}wdQ
z@mf`(A&5H*@l<soR{s155}y<^kbyPFNY?TZ3Au3R!crA$eUDf=#M*$?uuj(T9=TM@
z(vWhoRY2=l!y80*_3c6XS;R2{$NJ(i-$l5mxR5U}2-i^H)X<4O-|@cV{dW5VlMvS8
zXvh~q(c{Pa`cB$k;D#=Bld3bqT;(FctKk@f!ne6@O08Z;Tqw+k<FOzgiX%>?A<aZ0
z`IL1fkdIZNcqqsuLb)W=)mAE*FS4T55abi<>v1HgR4BnRsxh>l2$}O;#SbgoMz~)-
z0eX)Ri4-LE2K~s9gh;0LGC@uypMdMmhiisHOt2JU1R)x(6;=qLp9y`6B}5bOnhGUv
zL+ob;Pa#Pp&8tMx0=HGyX0fk&$xad^Arw~&L0rS*poNsJ5G#M~EiIa-h<E-wX)Bjs
zw3W$Y7ws%rN~g3hq`XcXXASyZrW}<V8kezxi{6h*tl&yIbR$vxPCzcIB-bkWo$96y
zum<2JM2Eh-K2`c6lT~8X{$C|)cxcj>N*&s-fBHE{Iz>yWW<NgBsjm%ItMvd*`kS;=
zS=*gI*g%Wn-$$^bKPj$il88Bu5CMXxZWwMF-zIL5B=I(^GtgPzid*O`^-8pg7S{aG
z@)3MTpA^5PW%y!})yYH2nq;k5EglkUSnC5LXeHYvm0}rzdGWsx5SC0eX-ei}QZ=&S
zsA`HbAryy{h1uW`v&wZFRa;SYsOFsRPz@`AZkI}N8%#ts32_u!Q>}=JvFjX)s>TQx
zQ!Tg|o)c92RYnM{1$j_lswEZ=t|Eq2t*h&|^2LBh2**}=wQ7aog&?%W1~(X_QX66J
zhH6X1_&-Z<927PPan&qvF+L72qZ|`c%?w(NGB;H!EO1dst*>)27U3@ygfm6sEEiGj
zk??gc5?+hPS=E7M3P$1#%OTawavR|gr`lq{P=w)mj>l0=Scp1tZ{pElIK~1KuhwBD
zi)9OkK%)n>G2_)5oXdoS1QRJ_8N=*N)udIU+5`@Igd0`uT2w2hI`Z{^7UwmMUS27x
z1MyKkCJlIc$RO{6r*Cig>c-ij<;y&W_~nA>Se}n@Qz*Q_`HuN6$KZ+huyD&4U-6xd
z$6{Pa2!|5F@>D#Oh;lK3U!DyMYl*ANlfjE~!37k*1{}+i5MR&76GAw`FY|m5PjrG2
zK;qu@TWUQo#6xQ^Tj5aI8#DxR;t!KQ2McSH$WpfV&%b;Ar*rSkZO&yW=lk>T&dXH2
zLe)!D{quV72Y>pvXSW@Se^~AqQ94HC`cb8RR2pL*-+DHzTwvsJ1`<mB)y+#gO)V1T
z&bqx&@b@<7otB@s-D%q;?507D{K~I*doteBY42&-JFIwzORuu7_Fcke13n;IXR~!}
zO5H2lMy2l6t?8`0<^IZps*L+++I@8Uxa{s#+`Sq1VA?(SxJGslEAHWpdo1l9d%PjL
zXB79$*16}N)(6e0#P*dB|6J-CkUggq&#8=ODD4?~45>-QGr4ts$J=rLXzFt6^7d)D
z^Q6*wQuYog_!{22kZtYGwDzT2`yMSyt$lLqpwc>+X&p_sj>@g$O6&O6{0{BQ(66NF
zSGKEVdQhPUC3-MRw`Ay!G~JO}`c3fHLAi54=^T*hQwn`b(ym<>?xL|U9$NQ5!^87=
zag0hsQW5*|%P%W(_Dg=x+P?4-zHw=ML0Vjq)^A8{H|4gQ3Q($f^W4_PU35RNOA`%k
z*;;qjd1%*SqRqQRj+o5+DOgg!9cY^wH2!n_+3}fn)6;qy5>MNynNHi&P9u){NgVIi
z;)B%8h~?=x2{cqh15ig1$?Sr5{Na%dAPW=RW}Qtf>VCa#($Iw&WXgvU!bz1&TzFl9
znzYhIYmE_--U0#c_^)MXIcZ5+MXL^Dl;Nsk<zW4EJVm5a;@FErSfk#y?T?D~APyU}
zRfa9TVJUkNOW5>1)u*l%5meG4Iu1Zed$2^37EP)B@e-XX8|Bhy=;tz3h7u3r(aQ;V
z!+b!C_U}^LoiG2N@kU#zU)PGrOEdv4GwWAD(j@H9Yu<;~ESjzr@fll&)J60D`PVgk
zgR0B%Pv!3HT~c_ZB&`T-u-1pReb7-=(uon0&|S{0lOEe(Er^tU{vO&ZdR-$NEAYbC
zSOaUXv6L>wSmPcX`va{n*71<m_YrVkmFNWQSIbsCpaA(fFCo0hHOz-_vk0$Sgtx)4
zJEWMKVgB~?#V{YmXw4Un`Ghsj7fVF1atPxGo{6qUINo=a6K-%^%vXd$KEO%790c;^
zP!YnDifVHeTMF397~eq9kRzIi2;m&!`k1N&I~KfSg`I1+8{rW|ZcM;mK%iDk@Y+H?
zAgYZ6(Ey(c9BRjD3d2iu1i#1N7<FOd#{}yH)dbvtTMV&v)e(xvLQpOSw`$_om~~FI
zzX|Aun?@*(H1vq@Lc{2iiLG)7>rB<c$5#Zvn%yphR}4CeNe?EkVuI%%^#M_<F(fR3
zJzgcls2{&Qf!{jH-k<@TbEr6yc?4N8CTI|Ixys4~<1sF{78lSNcykaqM5l2vZyvXC
zK@AK;$siUFBT}>g3;|rj^RGv1c>vbn-kdMPU)30Y0oezC6<$4ex2Jk?$ZD5xVjKqK
z=dW#@*-Gf)*U3i>4@WaSqv@W}$Bf)Fq4Z2VIV`)U758+;{aV`n+Or1Py`Z=kw$5d}
zZ7Iux-i)^=?d{o~k-hzjw}0!xu9;}+JP>)oTVGG6Es$;tJYuA_fZR5yv;nwk-nA1|
z?K#LL`3E!pk+gqA_Kzw4v5bEz?VpnUGm3vkre_s;R-$LKbW?^toTd+_hBHS_q>r4C
zj|?bB^k}RFchp!I0&R#s!{hWhCR*}9qM@;`<@%nrebGxaw>}t?j*dNfU22+@n`V`!
zSpeA28{8knQo+XosbN%Z7*!fZH|GJDS(_D0d#YWrbZl2EewZG`KasXfNS2A`PPgRw
ziR@$)CnH&zFJO$vAOD54z)G=%)V3kFZ74uXAn*5H|90D_hySJf**SUgO=a>;$$Lfi
zUQxVPB&zYbvq@_H(W9nJU@9G$dR8k1rsTk!5}3;b7Se$QIk2b%7Bhjj(t)?+z>*SJ
zlGdV<GbTG@iZix3yHnqqgO6F4ch|NLf+X5z9LC?aO<AW;m_GH>kofcjH9ctibkK<7
z!z7N6;P_JqHB)VQT1x``5BNeL?2PXZB|ijB0Z@`mX<U^=4-~-c8@?GIhLYbDMvBHM
z;x744B54-QIvA%J%SwqVcmN3R4V5mZJy367h$@spD80E%TEJ<DmFS)T4w`+zrheS^
z2Z?%~iWaQ^7!4RSiWSb=3O<`?D}lxX0Hg6?h57%XWEQR0we7+FP*CuehO0tqyMRq%
zK8SYxTLAx24g5O**wP|(U7KSx3m|u2m-lxW0nt%%4}K>H+2VoHSj+3y_lzX&#O{)J
zZhTv?E1CfQnD)RQ4f)hm;tY%?&{<55V1kXiYRQ{u4NIUYoL>YPfD(A&7GMZtI3}P;
z{7!=%uRyH3N~N!3M-`tlzu4=Qk5xge<e1Wcgz92e5yxT2Oi)-u9KRNiu&O=A-6#M9
z3?uSBth^hnHeY?YL&*tMm#1yTYSA37z@(*l3;}gMr5s{#7n=rG7+>&ofqxGD1PX9)
z^ZyA)A=tY^zkl)Fi$9%zZ+>$=OVwnkhBVb6Q*MQFV`oi`Yx8^=R&bg|!S<U*v-Q4R
zqRKY<87AO^0b6c<a4h3}HSK+M`|_iKhwo&Lji--|KOyB~lghElCzG;wR`Jeeyyw&2
z^Zz;^dlwb&B6w2Tk7e3VrrS?Gy8id^zl+NQv&z71X5eCa;G#S*uMEt~?F&l#f<(7w
zTaMm;XM1pa@X>`Fu8>>KC@p8UE&(L)`W4Ts8Bc%O)Bo5YdHQ9~u;Llcc*fJ7aoKZ5
z@tnzcX49To*>g_uoRcmF)1F|)y_$Bf{@dDh*&R{bk*#xJ_FB49VY#_aY3|!4s4D0w
zkiW9tCGVchy4+GzZ`#!>xq7p8t(m&cbX{jErgWXobWNnYCY~%xT@!NGYf9H^&uWye
z3z@Ekbk~C1wWxG0%5`rjb#F-8wNof7b%A$E=^B>nMwGe{NxMF?;YM9BqPxbzKx?D@
z86NV_G0~C-5)F3j!{sCB2JpG@7|j3E^w6xy_&XCdYqR{0CV_@D^#!kRFvE>a&dYy*
z<#CSy69%Vu?}7h+s=|AbG_nTBC->x20+cKS(U>Yr6>=-jkMs&6RW47lh^|)Z+t;dZ
zBWV&%pkl9Mb1ND*2y_^p6g*qOxF2A0MWbjgDa@4$Zw;t5f4;8oQ^)zd94&uD4(e;<
zh=};)(#V{Hp|@k@u*mW8$OdftVAHe`0oQkF11Q{x`@nV2<ISPA?}KyB{3fy2t`$Cm
zR{0?<b__V9VSLiap^Y%sDtXQ!-yA;SEEV;wpm<c<dRCUu+5kP|i*j&W%J$|fu2ffF
zUvXuLJr(DBE+kN_C_kuUJP>7q{|Ri53%Ph8_8kX2hN1FxjEymN=fA|({T>m9@8cb-
zX0FMk0pN<^Zwz;AZxA07C<F{28&yMZpGx!Gs%92q4D5OFww=c-4eHi(_E+{1e1H&&
zN7u)0cjjdZ7H6z?Boc=sVt%Z*#P$m){u1w|HwmdJl)K~;vybMUbl#ua?kwiL1Qz1E
zbU+h7M_8yrCofusoNp-TFM$=UVG;%c2jE70q(oikZt<!q5rgvx&D?fd)W*sMlV1sH
zW95VwUK}b+Utu+>b(MACaaHRoE6{RjyphgN43AboIv!(Om=qjx#<XV7WFP@}M9h2`
zmc=Hq+d$w`jXJPJ%`Az>{lZnCa#KiwIGEAg?C3MW;M6rrjXjyhzI0=s+<07RJTANX
z6<2@8bvo@j{f|eavBk{TmGsyZdF*Xv>}`2yNf}y_T|ZV_Ki-_%ad+$zR?}N#j&9Le
zN2ApABguiUM|0BDTha-9flO6!@NgteA4x4s{j<+#rT?<j@uo~)QRpj@c4ezPQcFOp
z!dI4V{^7X4+;KEXRFgLCx&>IYGdnPf2Yd0^ic{+(DZ+nBN?JGtrS}H?rXp!5--L+<
z)>ta9w+a<}WghN7%4a&LKGw`yG_RfY_V5COc+SW8G)G!^B@E6hSoWZM@V_>|Q76RV
zC0>${%OLHr4^$)yhxp#FNc)TbfrH)`SY5opy5|MnyIn#PK)Vg#{#uWO1=X|yN1_58
z^up<`_E!*7Bpib?PddtkW5Il7x=&bbTE|l5+;~Nqg1O>o6Ka0|H;+H`O=5?x{mZ#u
z%-ye(=~jhq-JAwYbiek4)Az>zdR%sOD6Wppxi73lL(~0{ZSujmT;HSA_dKs{yiccI
ze^4#g`juM$bEoU>T&m`SdD+>aI6Jbg#)>_DPCt<7b!f7?UbO^+Ec^i`m_zH+keTx(
zVmBfjUtW)J41%Lf5E#6T9M5Bbiw8-qF1*Wk8{ttyc+?PHd73jY=Ofhoe)zv2Djnv7
z%mxz%OA_I<Q&7#hrXgnSgV#nRrwy?h%V}-v2>9d@5pE3q4Dzs%;Qs(**GQ7&E=7?P
zphzHgf^<nlZSKkv^rrU964jg9FH7`E`YTH;N?&(niD5~9?bzzK>hCnCOdr}azEf%6
zDcN^g@tu|$rtYrXU3r|4ZPSWvdegj1wUKZ*S0t0<ahxuaHL{*;*d_F&x0P((C5mK%
Mbl@_3i8hJ<2S65~k^lez

literal 0
HcmV?d00001

diff --git a/BML_project/utils_gp/__pycache__/visualization.cpython-311.pyc b/BML_project/utils_gp/__pycache__/visualization.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a8c3afd30ad9aefea43425eec7756a9d74ec4254
GIT binary patch
literal 5734
zcmbtY&2QVt6(_0Dr{(x7@<%pE*nG>09Dik#+D_uFovh>7PP$39fUB**h_uWsMQVqX
z*A`^J0bIa`9JD}+80{9&p-3GhhaA(QK=)4|7X%s*2vD@>!8ZdzFMH}6N}@=4?R8L!
z9KD(M-tf(vnfIH=KQ}i=D7dD~HDRHNqW(=PwTr0|?<awHM)A}<#nU_^(esS`@5`cj
zKV4}H%m<;(Cxx=%`7pGhb!tAs`=M;&15ieJ<{lLf7Oq4lu)t{o&nhxI3(usGW(Tgo
zJu=uoa_JH~*grTFiHvi3ZBfA^tc0_JgZ=%NBHP56xj1dLrWJW1uZoJC%5oYOm(w*n
zi9`S%A3X1KP(GtHnlx4}p4JGbdKIbXz<cTNdAb0c=g`+nr@vK3^Vj4l>XD<(Gp~GJ
zZJP2(qM8=?1o8B9(0u7=H^@3~C^AKg_w#{Q!RMg=(&?i?o{Z4G_`tq41Rc51zH%W)
zF1)W?xF+`;h?lhr)nJ`twLNll;E~gOq{r!#Z{njr;RUzXXujE{dx&rOAl+7%uAguF
zAl-JCZU=w(Cv@vG7u(m<A3k%P`}TLRnX}^nnSy=i7h*)^YLwzB*xy}q+hjw<ne9XH
zw@n88g0_MSVn!7U!Zz7h1?FL*(0m_rqAbc8c1BTE5b;agLqW0vH*!jPQMC|hw!%3q
zq(!pR3k*(J;k&h}6#$Bpqym#kSb@)M+F}+43Za_=?7g&t1uMwq(<!W^3(QKQ5d7r9
zMB=kM6NUEAIZ5O>4Z37!1qdE#)e2ULc+~P^PR<CHPZDIymy<Lr0G8)TZ)*NwPEs@r
z*`;3-HA%1n%T=pas{fLZ5oF$qYGP(lOG(^{k_Q!aQTaOVvmzQ9b4ns3YhQBgNM);b
z1O$8RDv1*;%l`2DiIL<3RlsVp76X%aWMLAEO9Go<AIR{JRZ&}El?8S}k!2yRiRrwS
zoKW&u71ZQSaF88MlN0xDBraVUyp;F_w<4$_2V>{bi=r&BGXjUnG$ik0<q>p{8cHsR
zGAE^Sm3lI-iISRJ64g8yT(BM2ms_!Vh#OM&fIvHy6Yzq9vz(k3@EI^a3Xl3*SP5&?
zyXN-N!0*zVz27KLl`Y!nj2q3pW^?b_&Hn}{e`tNW^tFzT8|b)+jsx)y1=nwv=+Z!m
zGf=mQx^>h|+NL%>FU^+^Z>4l})j(HGbQK7~xV51e=$MI)>F5}#Pj4)H6sI?q4Ag0&
zP91gH{KqrvGix)Im+$c=J$81h$&3xZ=D$0yqd5c3nP?7(sv(3OGGha;hu<vgXv#oS
zCYl1m#YSc<v8BBk(a}u<-89inAYAOQ85?{(`CV8?(*~M0(KHZ*eM>)d!ayfYbW%qr
zNyF~ZtQe@rL_Ip{vH6wJ{09Y1bl5Y_smIgn(`)wcJ<JA`c7P5lWM7hH_Wob6BfTK`
z$dTjeSIn-k;&1^D9hl^O0CK)vq24X)fFFT4SI{J22mRC`7vRLLSBI0`Am)~H;Gi)q
zd3y6&ybGN8@Fexc$E&nQ$8EU_qKb5V6h*r1*a=>BF0U=SNBZQl=S@x@eAB*uKiF8w
z9NU-I3Yvl$h}La~BTh?AlA`$LU2k@C4;2}{rLOx2>W{luh+DoD<l8<#z8&N{K0y93
z$jA1Vw_&Ms4iJe1rETInOin!5rU>TY6HpW)HX_+@gbBtKx;zlY-hpV#-WRh%+=s~!
zaT_T*NO6*gAKi^CbsaziLDg!J9?%bJHT)SVG&#67!rFZoLKR4ZDtSvGQbRRpAa5*$
zt9S<W!%&Zha5wy6;t70&6d#e|C@GGU;%87;{u~x%4S*<!1vM{esuid}4?Yj<8tmZH
zL^f*2_o^Je2u){*3=dllLOKad2Myc5DkaV46I&N_blE_cO>`NEYN$JFpmQcVr=xS;
zP__#I-nL)dp=kdcT`4yL??O#aLZvRfCt<erZ7rE?AHP}nyYyG-FY@1HJv3{CX3fwn
z5O7!Db-_Rh6D4$%aJ`F=)j0Xjjw74b%#Pl*nRh6>anV3WOmsv?N1&!hhBl++Uz^<n
zK*4RGVG|AOXxQc@O3Cs$Gj?8&B;YnspNaZ()MxX~mx|>{vn#GgfPLOT7fb|xxM1@x
zmZb8}%}!9d2)BXyP1LW~ZhRKJ{o`p31Mz6@h|M#+?^Q!wOG4CJWmbJ887oqg4ad7x
z<VjrW(GGbd0mwPWNQx@bMdXd!tA5Qn8Ui*r(WeLrP0_~(>${W>y$UxbI9`dea{vZ6
zI2RxBS_F~I({I^HWE>SBcE{2NE!U{(dVI9;U3~M8dM~ht_Zm{O)j)&y0u9~^?%}<l
z?L8OP?BP9^#2(%|)HovWgInw1-g9dm(0d-7+dI*4Bf7Ji8u81H=q+>g03K&BS#H~8
z)!4D!bJiiKN`mJ5V^BP!GO!1}h^*2@ny2b(@*Cz!6QnORCMSd!)C<O9`ugFp&xu$i
zO)P82#UCPLlBjBFC7ZiexM0UfLUR}n#!Et~8ZZ0CBn7e)^%@X0>FdymN&@m4rN_o9
zcllI_-yC?FaT@Gxf8VyjqB9Vw{hTaA(!p@cmTw`e70!No7rvSBg2V!3@FE=e6<KAM
zRQ3)QGvwUP&X6=pEwono@R_CB(sUo^;k<9N{}2Dzs5u1Vz{6xjF5lqG(C}eSnIc_O
zozgR|+jp7YJhl1Nw`VIgC~c<{&fHGS3LFe}|AEJ&HWFH)YQCmgjGVKY;EPcjzB$S8
zU=rahpDIJ*w+J7ivRuvzs{$l5c1jl^WHqg`=}KycS<td!G#;|rGP%@}n))(J3`#*Z
z$yqH*bpjyiBNG7AC1xzdEeoo32*h*s>9YLkY$}tp+N=6*Rv1{7W|eG5$O7%Fg2~-`
ztv5UivZL^*`3gR_c07%4{<2JOUDjKNjMgEub?EV}wTX?xI{~WmDp_v5qfi>LF*7!{
zHuo;p_4GI8IU_b?#)hC8J-V6Nl8xw9GkSIH1_8<2CI9CAGQTzOr_AdiH<G_ap^din
zPyc~p-=mn`dEG$cCK}h#xWnlD9#XHaxPf|2)T^Uj52LhdpwlKgt)tUm+L2FaGV6WU
zp)`8NO`suX>-p&0WBR!fqi59Y86^#^Z27o8IBB%rFk5e|-Ky9ZhTQPf)>p62zE*WK
zYM@aQf$*sFwu*h%40PQ@*L8H=VI0)H{ZlM^k;a>?U@FBc=~T+{%lT|>1z&?%vD)A>
zC7C-(d<eNSp{zjVzp_|^s=z%|u#Crvbc_0>M6&X}>URNq!P;51eGaoH8{0dO91<#F
zDKwT<_`D=s!}p<$L{W7eiX9(K(>n;!Xon(2grYlis=ac*rH<>~`z;mIYxi5~g6_R{
aP&eJOLpjB82R*PuIYn<L;qESM&-@Q`3(Hsl

literal 0
HcmV?d00001

diff --git a/BML_project/utils_gp/data_loader.py b/BML_project/utils_gp/data_loader.py
new file mode 100644
index 0000000..14ed5fe
--- /dev/null
+++ b/BML_project/utils_gp/data_loader.py
@@ -0,0 +1,297 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Dec 18 18:09:02 2023
+
+@author: lrm22005
+"""
+import os
+import numpy as np
+import pandas as pd
+from PIL import Image
+import torch
+from torch.utils.data import Dataset, DataLoader
+from sklearn.preprocessing import StandardScaler
+from torchvision.transforms import ToTensor
+
+def split_uids():
+    # ====== Load the per subject arrythmia summary ======
+    df_summary = pd.read_csv(r'\\grove.ad.uconn.edu\research\ENGR_Chon\NIH_Pulsewatch_Database\Adjudication_UConn\final_attemp_4_1_Dong_Ohm_summary_20231025.csv')
+    df_summary['UID'] = df_summary['UID'].astype(str).str.zfill(3)
+    
+    df_summary['sample_nonAF'] = df_summary['NSR'] + df_summary['PACPVC'] + df_summary['SVT']
+    df_summary['sample_AF'] = df_summary['AF']
+    
+    df_summary['sample_nonAF_ratio'] = df_summary['sample_nonAF'] / (df_summary['sample_AF'] + df_summary['sample_nonAF'])
+    
+    all_UIDs = df_summary['UID'].unique()
+    # ====================================================
+    # ====== AF trial separation ======
+    # R:\ENGR_Chon\Dong\Numbers\Pulsewatch_numbers\Fahimeh_CNNED_general_ExpertSystemwApplication\tbl_file_name\TrainingSet_final_segments
+    AF_trial_Fahimeh_train = ['402','410']
+    AF_trial_Fahimeh_test = ['301', '302', '305', '306', '307', '310', '311', 
+                             '312', '318', '319', '320', '321', '322', '324', 
+                             '325', '327', '329', '400', '406', '407', '409',
+                             '414']
+    AF_trial_Fahimeh_did_not_use = ['405', '413', '415', '416', '420', '421', '422', '423']
+    AF_trial_paroxysmal_AF = ['408','419']
+    
+    AF_trial_train = AF_trial_Fahimeh_train
+    AF_trial_test = AF_trial_Fahimeh_test
+    AF_trial_unlabeled = AF_trial_Fahimeh_did_not_use + AF_trial_paroxysmal_AF
+    print(f'AF trial: {len(AF_trial_train)} training subjects {AF_trial_train}')
+    print(f'AF trial: {len(AF_trial_test)} testing subjects {AF_trial_test}')
+    print(f'AF trial: {len(AF_trial_unlabeled)} unlabeled subjects {AF_trial_unlabeled}')
+    # =================================
+    # === Clinical trial AF subjects separation ===
+    clinical_trial_AF_subjects = ['005', '017', '026', '051', '075', '082']
+    
+    remaining_UIDs = []
+    count_NSR = []
+    import math
+    for index, row in df_summary.iterrows():
+        UID = row['UID']
+        this_NSR = row['sample_nonAF']
+        if math.isnan(this_NSR):
+            # There is no segment in this subject, skip this UID.
+            print(f'---------UID {UID} has no segments.------------')
+            continue
+        if UID not in AF_trial_train and UID not in AF_trial_test and UID not in clinical_trial_AF_subjects \
+            and not UID[0] == '3' and not UID[0] == '4':
+            remaining_UIDs.append(UID)
+            count_NSR.append(this_NSR)
+    
+    from numpy import random
+    random.seed(seed=42)
+    from numpy.random import choice
+    list_of_candidates = remaining_UIDs
+    number_of_items_to_pick = round(len(list_of_candidates) * 0.15) # 10% labeled for training, 5% for testing.
+    temp_sum = sum(count_NSR)
+    probability_distribution = [x/temp_sum for x in count_NSR]
+    probability_distribution = [(1-x/temp_sum)/ (len(count_NSR)-1) for x in count_NSR]# Subjects with fewer segments have higher chance to be selected. Make sure the sum is one.
+    draw = choice(list_of_candidates, number_of_items_to_pick,
+                  p=probability_distribution, replace=False)
+    
+    clinical_trial_train = list(draw[:round(len(list_of_candidates) * 0.1)])
+    clinical_trial_test_nonAF = list(draw[round(len(list_of_candidates) * 0.1):])
+    clinical_trial_test_temp = clinical_trial_test_nonAF + clinical_trial_AF_subjects
+    clinical_trial_test = []
+    for UID in clinical_trial_test_temp:
+        # UID 051 and maybe other UIDs had no segments (unknown reason).
+        if UID in all_UIDs:
+            clinical_trial_test.append(UID)
+            
+    clinical_trial_unlabeled = []
+    for UID in all_UIDs:
+        if UID not in clinical_trial_train and UID not in clinical_trial_test and not UID[0] == '3' and not UID[0] == '4':
+            clinical_trial_unlabeled.append(UID)
+    print(f'Clinical trial: selected {len(clinical_trial_train)} UIDs for training {clinical_trial_train}')
+    print(f'Clinical trial: selected {len(clinical_trial_test)} UIDs for testing {clinical_trial_test}')
+    print(f'Clinical trial: selected {len(clinical_trial_unlabeled)} UIDs for unlabeled {clinical_trial_unlabeled}')
+    
+    clinical_trial_train = [clinical_trial_train[0]]
+    clinical_trial_test = [clinical_trial_test[0]]
+    clinical_trial_unlabeled = clinical_trial_unlabeled[0:4]
+    
+    return clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled
+
+class CustomDataset(Dataset):
+    def __init__(self, data_path, labels_path, UIDs, standardize=True, data_format='csv', read_all_labels=False):
+        self.data_path = data_path
+        self.labels_path = labels_path
+        self.UIDs = UIDs
+        self.standardize = standardize
+        self.data_format = data_format
+        self.read_all_labels = read_all_labels
+        self.transforms = ToTensor()
+        self.refresh_dataset()
+
+    def refresh_dataset(self):
+        # Extract unique segment names and their corresponding labels
+        self.segment_names, self.labels = self.extract_segment_names_and_labels()
+
+    def add_uids(self, new_uids):
+        # Ensure new UIDs are unique and not already in the dataset
+        unique_new_uids = [uid for uid in new_uids if uid not in self.UIDs]
+
+        # Add unique new UIDs and refresh the dataset
+        self.UIDs.extend(unique_new_uids)
+        self.refresh_dataset()
+
+    def __len__(self):
+        return len(self.segment_names)
+
+    def __getitem__(self, idx):
+        segment_name = self.segment_names[idx]
+        label = self.labels[segment_name]
+
+        if hasattr(self, 'all_data') and idx < len(self.all_data):
+            # Data is stored in memory
+            time_freq_tensor = self.all_data[idx]
+        else:
+            # Load data on-the-fly based on the segment_name
+            time_freq_tensor = self.load_data(segment_name)
+            
+        return {'data': time_freq_tensor, 'label': label, 'segment_name': segment_name}
+
+    def add_data_label_pair(self, data, label):
+        # Assign a unique ID or name for the new data
+        new_id = len(self.segment_names)
+        segment_name = f"new_data_{new_id}"
+
+        # Append the new data and label
+        self.segment_names.append(segment_name)
+        self.labels[segment_name] = label
+
+        # Append the new data tensor to an attribute that holds all the data
+        if hasattr(self, 'all_data'):
+            self.all_data.append(data)
+        else:
+            self.all_data = [data]
+            
+    def extract_segment_names_and_labels(self):
+        segment_names = []
+        labels = {}
+
+        for UID in self.UIDs:
+            label_file = os.path.join(self.labels_path, UID + "_final_attemp_4_1_Dong.csv")
+            if os.path.exists(label_file):
+                label_data = pd.read_csv(label_file, sep=',', header=0, names=['segment', 'label'])
+                label_segment_names = label_data['segment'].apply(lambda x: x.split('.')[0])
+                for idx, segment_name in enumerate(label_segment_names):
+                    label_val = label_data['label'].values[idx]
+                    if self.read_all_labels:
+                        # Assign -1 if label is not in [0, 1, 2, 3]
+                        labels[segment_name] = label_val if label_val in [0, 1, 2, 3] else -1
+                        if segment_name not in segment_names:
+                            segment_names.append(segment_name)
+                    else:
+                        # Only add segments with labels in [0, 1, 2, 3]
+                        if label_val in [0, 1, 2, 3] and segment_name not in segment_names:
+                            segment_names.append(segment_name)
+                            labels[segment_name] = label_val
+
+        return segment_names, labels
+
+    def load_data(self, segment_name):
+        data_path_UID = os.path.join(self.data_path, segment_name.split('_')[0])
+        seg_path = os.path.join(data_path_UID, segment_name + '_filt_STFT.' + self.data_format)
+
+        try:
+            if self.data_format == 'csv' and seg_path.endswith('.csv'):
+                time_freq_plot = np.array(pd.read_csv(seg_path, header=None))
+                time_freq_tensor = torch.Tensor(time_freq_plot).reshape(1, 128, 128)
+            elif self.data_format == 'png' and seg_path.endswith('.png'):
+                img = Image.open(seg_path)
+                img_data = np.array(img)
+                time_freq_tensor = torch.Tensor(img_data).unsqueeze(0)
+            elif self.data_format == 'pt' and seg_path.endswith('.pt'):
+                time_freq_tensor = torch.load(seg_path)
+            else:
+                raise ValueError("Unsupported file format")
+            if self.standardize:
+                time_freq_tensor = self.standard_scaling(time_freq_tensor)  # Standardize the data
+
+            return time_freq_tensor.clone()
+
+        except Exception as e:
+            print(f"Error processing segment: {segment_name}. Exception: {str(e)}")
+            return torch.zeros((1, 128, 128))  # Return zeros in case of an error
+
+    def standard_scaling(self, data):
+        scaler = StandardScaler()
+        data = scaler.fit_transform(data.reshape(-1, data.shape[-1])).reshape(data.shape)
+        return torch.Tensor(data)
+
+def load_data_split_batched(data_path, labels_path, UIDs, batch_size, standardize=False, data_format='csv', read_all_labels=False, drop_last=False, num_workers=4):
+    dataset = CustomDataset(data_path, labels_path, UIDs, standardize, data_format, read_all_labels)
+    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=drop_last, num_workers=num_workers, prefetch_factor=2)
+    return dataloader
+
+def get_data_paths(data_format, is_linux=False, is_hpc=False):
+    if is_linux:
+        base_path = "/mnt/r/ENGR_Chon/Dong/MATLAB_generate_results/NIH_PulseWatch"
+        labels_base_path = "/mnt/r/ENGR_Chon/NIH_Pulsewatch_Database/Adjudication_UConn"
+        saving_base_path = "/mnt/r/ENGR_Chon/Luis/Research/Casseys_case/Project_1_analysis"
+    elif is_hpc:
+        base_path = "/gpfs/scratchfs1/kic14002/doh16101"
+        labels_base_path = "/gpfs/scratchfs1/hfp14002/lrm22005"
+        saving_base_path = "/gpfs/scratchfs1/hfp14002/lrm22005/Casseys_case/Project_1_analysis"
+    else:
+        # R:\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch
+        base_path = "R:\ENGR_Chon\Dong\MATLAB_generate_results\\NIH_PulseWatch"
+        labels_base_path = "R:\ENGR_Chon\\NIH_Pulsewatch_Database\Adjudication_UConn"
+        saving_base_path = r"\\grove.ad.uconn.edu\research\ENGR_Chon\Luis\Research\Casseys_case"
+    if data_format == 'csv':
+        data_path = os.path.join(base_path, "TFS_csv")
+        labels_path = os.path.join(labels_base_path, "final_attemp_4_1_Dong_Ohm")
+        saving_path = os.path.join(saving_base_path, "Project_1_analysis")
+    elif data_format == 'png':
+        data_path = os.path.join(base_path, "TFS_plots")
+        labels_path = os.path.join(labels_base_path, "final_attemp_4_1_Dong_Ohm")
+        saving_path = os.path.join(saving_base_path, "Project_1_analysis")
+    elif data_format == 'pt':
+        data_path = os.path.join(base_path, "PT_format")
+        labels_path = os.path.join(labels_base_path, "final_attemp_4_1_Dong_Ohm")
+        saving_path = os.path.join(saving_base_path, "Project_1_analysis")
+    else:
+        raise ValueError("Invalid data format. Choose 'csv' or 'png.")
+    return data_path, labels_path, saving_path
+
+# Function to extract and preprocess data
+def preprocess_data(data_format, clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled, batch_size, read_all_labels=False):
+    # Extracts paths and loads data into train, validation, and test loaders
+    data_path, labels_path, saving_path = get_data_paths(data_format)
+    train_loader = load_data_split_batched(data_path, labels_path, clinical_trial_train, batch_size, standardize=True, data_format=data_format, read_all_labels=read_all_labels)
+    val_loader = load_data_split_batched(data_path, labels_path, clinical_trial_test, batch_size, standardize=True, data_format=data_format, read_all_labels=read_all_labels)
+    test_loader = load_data_split_batched(data_path, labels_path, clinical_trial_unlabeled, batch_size, standardize=True, data_format=data_format, read_all_labels=read_all_labels)
+    return train_loader, val_loader, test_loader
+
+def map_samples_to_uids(uncertain_sample_indices, dataset):
+    """
+    Maps indices of uncertain samples back to their corresponding segment names or UIDs.
+
+    Args:
+    - uncertain_sample_indices: Indices of the uncertain samples in the dataset.
+    - dataset: The dataset object which contains the mapping of segment names and UIDs.
+
+    Returns:
+    - List of UIDs or segment names corresponding to the uncertain samples.
+    """
+    return [dataset.segment_names[i] for i in uncertain_sample_indices]
+
+def update_train_loader_with_labeled_samples(current_train_loader, labeled_samples, batch_size):
+    """
+    Updates the training DataLoader with newly labeled samples.
+
+    Args:
+    - current_train_loader: The current DataLoader for the training set.
+    - labeled_samples: A list of tuples, each containing a data tensor and its new label.
+    - batch_size: Batch size for the DataLoader.
+
+    Returns:
+    - DataLoader: The updated DataLoader with the new labeled samples.
+    """
+    
+    # Extract the current dataset from the DataLoader
+    current_dataset = current_train_loader.dataset
+    
+    # Update the dataset with new samples and labels
+    for data_tensor, label in labeled_samples:
+        # Assuming the CustomDataset class has a method to add new data and labels
+        current_dataset.add_data_label_pair(data_tensor, label)
+
+    # Create a new DataLoader with the updated dataset
+    updated_train_loader = DataLoader(current_dataset, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=4, prefetch_factor=2)
+
+    return updated_train_loader
+
+def update_train_loader_with_uncertain_samples(current_train_loader, new_sample_indices, batch_size, standardize=False, data_format='csv', read_all_labels=True):
+    # Extract current UIDs from the current_train_loader
+    current_dataset = current_train_loader.dataset
+    # Map new_samples back to their corresponding segment names or UIDs
+    new_uids = map_samples_to_uids(new_sample_indices, current_dataset)
+    # Add new UIDs to the current dataset and refresh it
+    current_dataset.add_uids(new_uids)
+    # Create new DataLoader with the updated dataset
+    updated_train_loader = DataLoader(current_dataset, batch_size=batch_size, shuffle=False)
+    return updated_train_loader
\ No newline at end of file
diff --git a/BML_project/utils_gp/ss_evaluation.py b/BML_project/utils_gp/ss_evaluation.py
new file mode 100644
index 0000000..5a01ee4
--- /dev/null
+++ b/BML_project/utils_gp/ss_evaluation.py
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Dec 18 18:17:40 2023
+
+@author: lrm22005
+"""
+import numpy as np
+import torch
+import gpytorch
+from sklearn.preprocessing import label_binarize
+from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
+from sklearn.metrics import precision_recall_fscore_support
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+def stochastic_evaluation(model, likelihood, data_loader, device, n_classes, n_batches):
+    model.eval()
+    likelihood.eval()
+    random_indices = torch.randperm(len(data_loader.dataset))[:n_batches * data_loader.batch_size]
+    
+    all_predicted_labels = []
+    all_actual_labels = []
+
+    with torch.no_grad(), gpytorch.settings.fast_pred_var():
+        for idx in random_indices:
+            batch = data_loader.dataset[idx]
+            test_data = batch['data'].view(-1).unsqueeze(0).to(device)
+            test_label = torch.tensor([batch['label']]).to(device)
+            
+            predictions = likelihood(model(test_data)).mean
+            predicted_label = predictions.argmax(dim=-1).item()
+
+            all_predicted_labels.append(predicted_label)
+            all_actual_labels.append(test_label.item())
+
+    # Compute metrics
+    precision, recall, f1, _ = precision_recall_fscore_support(all_actual_labels, all_predicted_labels, average='macro')
+    # auc_roc = roc_auc_score(label_binarize(all_actual_labels, classes=np.arange(n_classes)), 
+    #                         label_binarize(all_predicted_labels, classes=np.arange(n_classes)), 
+    #                         multi_class='ovr')
+
+    return {
+        'precision': precision,
+        'recall': recall,
+        'f1': f1,
+        # 'auc_roc': auc_roc
+    }
+
+def evaluate_model_on_all_data(model, likelihood, data_loader, device, n_classes):
+    model.eval()
+    likelihood.eval()
+
+    all_predicted_labels = []
+    all_test_labels = []
+    
+    with torch.no_grad(), gpytorch.settings.fast_pred_var():
+        for i, batch in enumerate(data_loader):
+            test_data = batch['data'].view(batch['data'].size(0), -1).to(device)
+            test_labels = batch['label'].to(device)
+            # print(f"Test data shape before t-SNE: {test_data.shape}")
+          
+            predictions = likelihood(model(test_data)).mean
+            # Debugging - check shape of predictions
+            # print(f"Predictions shape: {predictions.shape}")
+            predicted_labels = predictions.argmax(dim=0)
+
+            # Add debugging information
+            # print(f"Batch {i}: Predicted Labels Shape: {predicted_labels.shape}, Actual Labels Shape: {test_labels.shape}")
+
+            all_predicted_labels.append(predicted_labels.cpu().numpy())
+            all_test_labels.append(test_labels.numpy())
+
+            # Debug the accumulation of labels
+            # current_predicted = np.concatenate(all_predicted_labels, axis=0)
+            # current_actual = np.concatenate(all_test_labels, axis=0)
+            # print(f"After Batch {i}: Accumulated Predicted Labels: {current_predicted.shape[0]}, Accumulated Actual Labels: {current_actual.shape[0]}")
+
+    # Concatenate all batch results
+    all_predicted_labels = np.concatenate(all_predicted_labels, axis=0)
+    all_test_labels = np.concatenate(all_test_labels, axis=0)
+
+    # Final check
+    # print(f"Final: Total Predicted Labels: {all_predicted_labels.shape[0]}, Total Actual Labels: {all_test_labels.shape[0]}")
+
+    # Verify if the shapes match before proceeding to calculate metrics
+    if all_predicted_labels.shape[0] != all_test_labels.shape[0]:
+        raise ValueError("Mismatch in the number of samples between predicted and actual labels")
+
+    # Compute overall evaluation metrics
+    precision, recall, f1, _ = precision_recall_fscore_support(all_test_labels, all_predicted_labels, average='macro')
+    # For AUC-ROC, you need the predicted probabilities and true labels in a one-hot encoded format
+    test_labels_one_hot = label_binarize(all_test_labels, classes=np.arange(n_classes))
+    auc_roc = roc_auc_score(test_labels_one_hot, predictions.softmax(dim=-1).cpu().numpy(), multi_class='ovr')
+    return {
+        'precision': precision,
+        'recall': recall,
+        'f1': f1,
+        'auc_roc': auc_roc
+    }
+
+def threshold_based_labeling(kmeans_model, gp_model, gp_likelihood, data_loader, agreement_threshold=0.5):
+    gp_model.eval()
+    gp_likelihood.eval()
+    new_labels = []
+
+    with torch.no_grad():
+        for batch in data_loader:
+            data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device)
+            kmeans_predictions = kmeans_model.predict(data_tensor.cpu().numpy())
+            gp_predictions = gp_likelihood(gp_model(data_tensor))
+            
+            # Measure agreement between K-means and GP model
+            agreement = (gp_predictions.mean.argmax(dim=-1).cpu().numpy() == kmeans_predictions).astype(int)
+            
+            # Assign labels based on agreement threshold
+            for i, agree in enumerate(agreement):
+                if agree > agreement_threshold:
+                    new_labels.append(gp_predictions.mean.argmax(dim=-1)[i].item())
+                else:
+                    new_labels.append(kmeans_predictions[i])
+
+    return new_labels
+
+def resolve_conflicts(semi_supervised_samples, threshold_based_samples):
+    """
+    Resolves conflicts between two sets of labeled samples.
+
+    Args:
+    - semi_supervised_samples: Labeled samples from the semi_supervised_labeling method.
+    - threshold_based_samples: Labeled samples from the threshold_based_labeling method.
+
+    Returns:
+    - resolved_samples: The resolved set of labeled samples.
+    """
+    resolved_samples = []
+
+    # Create dictionaries for quick lookup
+    semi_supervised_dict = {segment_name: label for segment_name, label in semi_supervised_samples}
+    threshold_based_dict = {segment_name: label for segment_name, label in threshold_based_samples}
+
+    # Combine all unique segment names
+    all_segments = set(semi_supervised_dict.keys()).union(set(threshold_based_dict.keys()))
+
+    for segment_name in all_segments:
+        if segment_name in semi_supervised_dict and segment_name in threshold_based_dict:
+            # If there's a conflict, resolve it here. For simplicity, we're taking the label from semi_supervised
+            # You can implement other strategies like majority vote, confidence weighting, or agreement only
+            resolved_samples.append((segment_name, semi_supervised_dict[segment_name]))
+        elif segment_name in semi_supervised_dict:
+            resolved_samples.append((segment_name, semi_supervised_dict[segment_name]))
+        elif segment_name in threshold_based_dict:
+            resolved_samples.append((segment_name, threshold_based_dict[segment_name]))
+
+    return resolved_samples
+
+def parse_classification_report(report):
+    """Parse a classification report into a dictionary of metrics."""
+    lines = report.split('\n')
+    main_metrics = lines[-2].split()
+    
+    # Assuming the last line is like "accuracy: x    macro avg    y1  y2  y3  y4"
+    return {
+        'precision': float(main_metrics[3]),
+        'recall': float(main_metrics[4]),
+        'f1': float(main_metrics[5]),
+        'auc_roc': None  # AUC-ROC is not part of the classification report by default
+    }
diff --git a/BML_project/utils_gp/visualization.py b/BML_project/utils_gp/visualization.py
new file mode 100644
index 0000000..3ecf59b
--- /dev/null
+++ b/BML_project/utils_gp/visualization.py
@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Dec 18 18:20:55 2023
+
+@author: lrm22005
+"""
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.metrics import confusion_matrix
+
+def plot_training_performance(train_loss, validation_metrics):
+    epochs = range(1, len(train_loss) + 1)
+    
+    # Plot training loss
+    plt.figure(figsize=(14, 6))
+    plt.subplot(1, 2, 1)
+    plt.plot(epochs, train_loss, 'b-', label='Training Loss')
+    plt.title('Training Loss')
+    plt.xlabel('Epochs')
+    plt.ylabel('Loss')
+    plt.legend()
+
+    # Plot validation metrics
+    plt.subplot(1, 2, 2)
+    plt.plot(epochs, validation_metrics['precision'], 'r-', label='Precision')
+    plt.plot(epochs, validation_metrics['recall'], 'g-', label='Recall')
+    plt.plot(epochs, validation_metrics['f1'], 'b-', label='F1 Score')
+    plt.plot(epochs, validation_metrics['auc_roc'], 'y-', label='AUC-ROC')
+    plt.title('Validation Metrics')
+    plt.xlabel('Epochs')
+    plt.ylabel('Metrics')
+    plt.legend()
+    
+    plt.tight_layout()
+    plt.show()
+
+def plot_results(results):
+    plt.figure(figsize=(12, 5))
+    plt.subplot(1, 2, 1)
+    plt.plot(results['train_loss'], label='Train Loss')
+    plt.title('Training Loss Over Time')
+    plt.legend()
+
+    plt.subplot(1, 2, 2)
+    for metric in ['precision', 'recall', 'f1']:
+        plt.plot(results['validation_metrics'][metric], label=metric.title())
+    plt.title('Validation Metrics Over Time')
+    plt.legend()
+    plt.show()
+    
+    test_metrics = results['test_metrics']
+    print("Test Metrics:")
+    print(f"Precision: {test_metrics['precision']}")
+    print(f"Recall: {test_metrics['recall']}")
+    print(f"F1 Score: {test_metrics['f1']}")
+    print(f"AUC-ROC: {test_metrics['auc_roc']}")
+
+def plot_comparative_results(gp_vs_kmeans_data, original_labels):
+    fig, axes = plt.subplots(1, 2, figsize=(14, 7))
+
+    # Plot 1: Confusion Matrix for GP Predictions vs Original Labels
+    gp_predictions = [pair[0] for pair in gp_vs_kmeans_data]
+    gp_predictions = np.concatenate(gp_predictions)
+    cm_gp = confusion_matrix(original_labels, gp_predictions)
+    sns.heatmap(cm_gp, annot=True, ax=axes[0], fmt='g')
+    axes[0].set_title('GP Model Predictions vs Original Labels')
+    axes[0].set_xlabel('Predicted Labels')
+    axes[0].set_ylabel('True Labels')
+
+    # Plot 2: Confusion Matrix for K-Means Predictions vs Original Labels
+    kmeans_predictions = [pair[1] for pair in gp_vs_kmeans_data]
+    kmeans_predictions = np.concatenate(kmeans_predictions)
+    cm_kmeans = confusion_matrix(original_labels, kmeans_predictions)
+    sns.heatmap(cm_kmeans, annot=True, ax=axes[1], fmt='g')
+    axes[1].set_title('K-Means Predictions vs Original Labels')
+    axes[1].set_xlabel('Predicted Labels')
+    axes[1].set_ylabel('True Labels')
+
+    plt.tight_layout()
+    plt.show()
diff --git a/pytorch_file_generation_loader_update.py b/pytorch_file_generation_loader_update.py
new file mode 100644
index 0000000..cea6242
--- /dev/null
+++ b/pytorch_file_generation_loader_update.py
@@ -0,0 +1,374 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Dec 18 17:11:29 2023
+
+@author: lrm22005
+"""
+
+# import os
+# import pandas as pd
+# import numpy as np
+# import torch
+# from PIL import Image
+# import torch
+
+#### From first to the last
+# def preprocess_and_save_data(data_path, output_path):
+#     # Make sure the output directory exists
+#     if not os.path.exists(output_path):
+#         os.makedirs(output_path)
+
+#     # Traverse the directories for each UID
+#     for uid in os.listdir(data_path):
+#         uid_path = os.path.join(data_path, uid)
+#         if os.path.isdir(uid_path):
+#             # Make a corresponding directory in the output path
+#             uid_output_path = os.path.join(output_path, uid)
+#             if not os.path.exists(uid_output_path):
+#                 os.makedirs(uid_output_path)
+            
+#             # Process each file within the UID directory
+#             for file in os.listdir(uid_path):
+#                 if file.endswith('.csv') or file.endswith('.png'):
+#                     file_path = os.path.join(uid_path, file)
+#                     if file.endswith('.csv'):
+#                         data = pd.read_csv(file_path).values
+#                     else:  # if file.endswith('.png'):
+#                         data = np.array(Image.open(file_path))
+
+#                     data_tensor = torch.tensor(data, dtype=torch.float32)
+#                     output_file_path = os.path.join(uid_output_path, file.replace('.csv', '.pt').replace('.png', '.pt'))
+#                     torch.save(data_tensor, output_file_path)
+
+# # Define your input and output paths
+# input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv'
+# output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format'
+
+# # Run the preprocessing function
+# preprocess_and_save_data(input_path, output_path)
+
+#### From the last to the first
+# def preprocess_and_save_data(data_path, output_path):
+#     # Make sure the output directory exists
+#     if not os.path.exists(output_path):
+#         os.makedirs(output_path)
+
+#     # Traverse the directories for each UID
+#     # Get the list of directories and sort them in descending order
+#     uids = sorted(os.listdir(data_path), reverse=True)
+#     for uid in uids:
+#         uid_path = os.path.join(data_path, uid)
+#         if os.path.isdir(uid_path):
+#             # Make a corresponding directory in the output path
+#             uid_output_path = os.path.join(output_path, uid)
+#             if not os.path.exists(uid_output_path):
+#                 os.makedirs(uid_output_path)
+            
+#             # Process each file within the UID directory
+#             for file in os.listdir(uid_path):
+#                 if file.endswith('.csv') or file.endswith('.png'):
+#                     file_path = os.path.join(uid_path, file)
+#                     if file.endswith('.csv'):
+#                         data = pd.read_csv(file_path).values
+#                     else:  # if file.endswith('.png'):
+#                         data = np.array(Image.open(file_path))
+
+#                     data_tensor = torch.tensor(data, dtype=torch.float32)
+#                     output_file_path = os.path.join(uid_output_path, file.replace('.csv', '.pt').replace('.png', '.pt'))
+#                     torch.save(data_tensor, output_file_path)
+
+# # Define your input and output paths
+# input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv'
+# output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format'
+
+# # Run the preprocessing function
+# preprocess_and_save_data(input_path, output_path)
+######################################################################################################################################################
+#### First to last
+import os
+import pandas as pd
+import numpy as np
+from PIL import Image
+import torch
+from concurrent.futures import ThreadPoolExecutor
+
+def preprocess_file(uid_path, file, uid_output_path):
+    file_path = os.path.join(uid_path, file)
+    
+    if file.endswith('.csv'):
+        data = pd.read_csv(file_path).values
+    elif file.endswith('.png'):
+        data = np.array(Image.open(file_path))
+    else:
+        return
+    
+    data_tensor = torch.tensor(data, dtype=torch.float32)
+    base_name, extension = os.path.splitext(file)
+    output_file_path = os.path.join(uid_output_path, f'{base_name}.pt')
+    torch.save(data_tensor, output_file_path)
+
+def preprocess_and_save_data(data_path, output_path):
+    # Make sure the output directory exists
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    # Traverse the directories for each UID
+    for uid in os.listdir(data_path):
+        uid_path = os.path.join(data_path, uid)
+        if os.path.isdir(uid_path):
+            # Make a corresponding directory in the output path
+            uid_output_path = os.path.join(output_path, uid)
+            if not os.path.exists(uid_output_path):
+                os.makedirs(uid_output_path)
+            
+            # Create a ThreadPoolExecutor for parallel processing
+            with ThreadPoolExecutor() as executor:
+                files_to_process = [file for file in os.listdir(uid_path) if file.endswith(('.csv', '.png'))]
+                for file in files_to_process:
+                    executor.submit(preprocess_file, uid_path, file, uid_output_path)
+
+# Define your input and output paths
+input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv'
+output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format'
+
+# Run the preprocessing function
+preprocess_and_save_data(input_path, output_path)
+######################################################################################################################################################
+#### Last to first
+import os
+import pandas as pd
+import numpy as np
+import torch
+from PIL import Image
+from concurrent.futures import ThreadPoolExecutor
+
+def process_file(uid_path, file, uid_output_path):
+    if file.endswith('.csv') or file.endswith('.png'):
+        file_path = os.path.join(uid_path, file)
+        if file.endswith('.csv'):
+            data = pd.read_csv(file_path).values
+        else:  # if file.endswith('.png'):
+            data = np.array(Image.open(file_path))
+
+        data_tensor = torch.tensor(data, dtype=torch.float32)
+        output_file_path = os.path.join(uid_output_path, file.replace('.csv', '.pt').replace('.png', '.pt'))
+        torch.save(data_tensor, output_file_path)
+
+def preprocess_and_save_data(data_path, output_path):
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    uids = sorted(os.listdir(data_path), reverse=True)
+    for uid in uids:
+        uid_path = os.path.join(data_path, uid)
+        if os.path.isdir(uid_path):
+            uid_output_path = os.path.join(output_path, uid)
+            if not os.path.exists(uid_output_path):
+                os.makedirs(uid_output_path)
+
+            # Use a ThreadPoolExecutor to process files in parallel
+            with ThreadPoolExecutor() as executor:
+                # Create a list of tasks for the executor
+                tasks = [executor.submit(process_file, uid_path, file, uid_output_path) for file in os.listdir(uid_path)]
+                # Wait for all tasks to complete
+                for task in tasks:
+                    task.result()
+
+# Define your input and output paths
+input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv'
+output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format'
+
+# Run the preprocessing function
+preprocess_and_save_data(input_path, output_path)
+######################################################################################################################################################
+######################################################################################################################################################
+######################################################################################################################################################
+
+from torch.utils.data import Dataset, DataLoader
+from sklearn.preprocessing import StandardScaler
+
+class CustomDataset(Dataset):
+    def __init__(self, data_path, labels_path, UIDs, standardize=True, data_format='csv', read_all_labels=False):
+        self.data_path = data_path
+        self.labels_path = labels_path
+        self.UIDs = UIDs
+        self.standardize = standardize
+        self.data_format = data_format
+        self.read_all_labels = read_all_labels
+        self.refresh_dataset()
+
+    def refresh_dataset(self):
+        self.segment_names, self.labels = self.extract_segment_names_and_labels()
+
+    def add_uids(self, new_uids):
+        unique_new_uids = [uid for uid in new_uids if uid not in self.UIDs]
+        self.UIDs.extend(unique_new_uids)
+        self.refresh_dataset()
+
+    def __len__(self):
+        return len(self.segment_names)
+
+    def __getitem__(self, idx):
+        segment_name = self.segment_names[idx]
+        label = self.labels[segment_name]
+        time_freq_tensor = self.load_data(segment_name)
+        return {'data': time_freq_tensor, 'label': label, 'segment_name': segment_name}
+
+    def extract_segment_names_and_labels(self):
+        segment_names = []
+        labels = {}
+
+        for UID in self.UIDs:
+            label_file = os.path.join(self.labels_path, UID + "_final_attemp_4_1_Dong.csv")
+            if os.path.exists(label_file):
+                label_data = pd.read_csv(label_file, sep=',', header=0, names=['segment', 'label'])
+                label_segment_names = label_data['segment'].apply(lambda x: x.split('.')[0])
+                for idx, segment_name in enumerate(label_segment_names):
+                    label_val = label_data['label'].values[idx]
+                    if self.read_all_labels:
+                        # Assign -1 if label is not in [0, 1, 2, 3]
+                        labels[segment_name] = label_val if label_val in [0, 1, 2, 3] else -1
+                        if segment_name not in segment_names:
+                            segment_names.append(segment_name)
+                    else:
+                        # Only add segments with labels in [0, 1, 2, 3]
+                        if label_val in [0, 1, 2, 3] and segment_name not in segment_names:
+                            segment_names.append(segment_name)
+                            labels[segment_name] = label_val
+
+        return segment_names, labels
+
+    def load_data(self, segment_name):
+        data_path_UID = os.path.join(self.data_path, segment_name.split('_')[0])
+        seg_path = os.path.join(data_path_UID, segment_name + '_filt_STFT.csv')
+
+        try:
+            if self.data_format == 'csv' and seg_path.endswith('.csv'):
+                time_freq_plot = np.array(pd.read_csv(seg_path, header=None))
+                time_freq_tensor = torch.Tensor(time_freq_plot).reshape(1, 128, 128)
+            elif self.data_format == 'png' and seg_path.endswith('.png'):
+                img = Image.open(seg_path)
+                img_data = np.array(img)
+                time_freq_tensor = torch.Tensor(img_data).unsqueeze(0)
+            elif self.dta_format == 'pt' and seg_path.endswith('pt'):
+                time_freq_tensor = torch.load(seg_path)
+            else:
+                raise ValueError("Unsupported file format")
+            if self.standardize:
+                time_freq_tensor = self.standard_scaling(time_freq_tensor)  # Standardize the data
+
+            return time_freq_tensor.clone()
+
+        except Exception as e:
+            print(f"Error processing segment: {segment_name}. Exception: {str(e)}")
+            return torch.zeros((1, 128, 128))  # Return zeros in case of an error
+
+    def standard_scaling(self, data):
+        scaler = StandardScaler()
+        data = scaler.fit_transform(data.reshape(-1, data.shape[-1])).reshape(data.shape)
+        return torch.Tensor(data)
+
+def load_data_split_batched(data_path, labels_path, UIDs, batch_size, standardize=False, data_format='csv', read_all_labels=True, drop_last=False, num_workers=4):
+    dataset = CustomDataset(data_path, labels_path, UIDs, standardize, data_format, read_all_labels)
+    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=drop_last, num_workers=num_workers, prefetch_factor=2)
+    return dataloader
+
+
+
+
+
+
+import os
+import pandas as pd
+import numpy as np
+from PIL import Image
+import torch
+from concurrent.futures import ThreadPoolExecutor
+
+def preprocess_file(uid_path, file, uid_output_path):
+    file_path = os.path.join(uid_path, file)
+    
+    if file.endswith('.csv'):
+        # Ensure that the CSV file is read without an index or header
+        data = pd.read_csv(file_path, header=None).values
+        # Check the shape of the data and log if it's not 128x128
+        if data.shape != (128, 128):
+            print(f"Warning: File {file_path} has shape {data.shape} instead of 128x128.")
+    elif file.endswith('.png'):
+        data = np.array(Image.open(file_path))
+        # Check the shape of the image and log if it's not 128x128
+        if data.shape != (128, 128):
+            print(f"Warning: Image {file_path} has shape {data.shape} instead of 128x128.")
+    else:
+        return  # Skip files that are not CSV or PNG
+    
+    # Convert data to a 128x128 tensor
+    data_tensor = torch.tensor(data, dtype=torch.float32).view(128, 128)
+    base_name, extension = os.path.splitext(file)
+    output_file_path = os.path.join(uid_output_path, f'{base_name}.pt')
+    torch.save(data_tensor, output_file_path)
+
+def preprocess_and_save_data(data_path, output_path):
+    # Make sure the output directory exists
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    # Traverse the directories for each UID
+    for uid in os.listdir(data_path):
+        uid_path = os.path.join(data_path, uid)
+        if os.path.isdir(uid_path):
+            # Make a corresponding directory in the output path
+            uid_output_path = os.path.join(output_path, uid)
+            if not os.path.exists(uid_output_path):
+                os.makedirs(uid_output_path)
+            
+            # Create a ThreadPoolExecutor for parallel processing
+            with ThreadPoolExecutor() as executor:
+                files_to_process = [file for file in os.listdir(uid_path) if file.endswith(('.csv', '.png'))]
+                for file in files_to_process:
+                    executor.submit(preprocess_file, uid_path, file, uid_output_path)
+
+# Define your input and output paths
+input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv'
+output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format'
+
+# Run the preprocessing function
+preprocess_and_save_data(input_path, output_path)
+
+
+
+def preprocess_and_save_data(data_path, output_path):
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+    all_uids = [uid for uid in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, uid))]
+    for uid in reversed(all_uids):  # Reverse the list of directories
+        uid_path = os.path.join(data_path, uid)
+        uid_output_path = os.path.join(output_path, uid)
+        if not os.path.exists(uid_output_path):
+            os.makedirs(uid_output_path)
+        with ThreadPoolExecutor() as executor:
+            files_to_process = [file for file in os.listdir(uid_path) if file.endswith(('.csv', '.png'))]
+            for file in files_to_process:
+                executor.submit(preprocess_file, uid_path, file, uid_output_path)
+
+def preprocess_file(uid_path, file, uid_output_path):
+    file_path = os.path.join(uid_path, file)
+    if file.endswith('.csv'):
+        data = pd.read_csv(file_path, header=None).values
+        if data.shape != (128, 128):
+            print(f"Warning: File {file_path} has shape {data.shape} instead of 128x128.")
+    elif file.endswith('.png'):
+        data = np.array(Image.open(file_path))
+        if data.shape != (128, 128):
+            print(f"Warning: Image {file_path} has shape {data.shape} instead of 128x128.")
+    else:
+        return
+    data_tensor = torch.tensor(data, dtype=torch.float32).view(128, 128)
+    base_name, extension = os.path.splitext(file)
+    output_file_path = os.path.join(uid_output_path, f'{base_name}.pt')
+    torch.save(data_tensor, output_file_path)
+
+# Top-level script execution:
+input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv'
+output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format'
+preprocess_and_save_data(input_path, output_path)
diff --git a/semisupervised_method.py b/semisupervised_method.py
index 547ffcd..2608cd4 100644
--- a/semisupervised_method.py
+++ b/semisupervised_method.py
@@ -64,18 +64,13 @@ def __init__(self, data_path, labels_path, UIDs, standardize=True, data_format='
         self.standardize = standardize
         self.data_format = data_format
         self.read_all_labels = read_all_labels
-        self.transforms = ToTensor()
         self.refresh_dataset()
 
     def refresh_dataset(self):
-        # Extract unique segment names and their corresponding labels
         self.segment_names, self.labels = self.extract_segment_names_and_labels()
 
     def add_uids(self, new_uids):
-        # Ensure new UIDs are unique and not already in the dataset
         unique_new_uids = [uid for uid in new_uids if uid not in self.UIDs]
-
-        # Add unique new UIDs and refresh the dataset
         self.UIDs.extend(unique_new_uids)
         self.refresh_dataset()
 
@@ -85,10 +80,7 @@ def __len__(self):
     def __getitem__(self, idx):
         segment_name = self.segment_names[idx]
         label = self.labels[segment_name]
-
-        # Load data on-the-fly based on the segment_name
         time_freq_tensor = self.load_data(segment_name)
-
         return {'data': time_freq_tensor, 'label': label, 'segment_name': segment_name}
 
     def extract_segment_names_and_labels(self):
@@ -117,36 +109,24 @@ def extract_segment_names_and_labels(self):
 
     def load_data(self, segment_name):
         data_path_UID = os.path.join(self.data_path, segment_name.split('_')[0])
-        seg_path = os.path.join(data_path_UID, segment_name + '_filt_STFT.csv')
-
+        seg_path = os.path.join(data_path_UID, segment_name + '_filt_STFT.pt')
         try:
-            if self.data_format == 'csv' and seg_path.endswith('.csv'):
-                time_freq_plot = np.array(pd.read_csv(seg_path, header=None))
-                time_freq_tensor = torch.Tensor(time_freq_plot).reshape(1, 128, 128)
-            elif self.data_format == 'png' and seg_path.endswith('.png'):
-                img = Image.open(seg_path)
-                img_data = np.array(img)
-                time_freq_tensor = torch.Tensor(img_data).unsqueeze(0)
-            else:
-                raise ValueError("Unsupported file format")
-
+            time_freq_tensor = torch.load(seg_path)
             if self.standardize:
-                time_freq_tensor = self.standard_scaling(time_freq_tensor)  # Standardize the data
-
+                time_freq_tensor = self.standard_scaling(time_freq_tensor)
             return time_freq_tensor.clone()
-
         except Exception as e:
             print(f"Error processing segment: {segment_name}. Exception: {str(e)}")
-            return torch.zeros((1, 128, 128))  # Return zeros in case of an error
+            return torch.zeros((1, 128, 128))
 
     def standard_scaling(self, data):
         scaler = StandardScaler()
         data = scaler.fit_transform(data.reshape(-1, data.shape[-1])).reshape(data.shape)
         return torch.Tensor(data)
 
-def load_data_split_batched(data_path, labels_path, UIDs, batch_size, standardize=False, data_format='csv', read_all_labels=True, drop_last=False, num_workers=4):
-    dataset = CustomDataset(data_path, labels_path, UIDs, standardize, data_format, read_all_labels)
-    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=drop_last, num_workers=num_workers)
+def load_data_split_batched(data_path, labels_path, UIDs, batch_size, standardize=False, read_all_labels=True, drop_last=False, num_workers=4):
+    dataset = CustomDataset(data_path, labels_path, UIDs, standardize, read_all_labels)
+    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=drop_last, num_workers=num_workers, prefetch_factor=2)
     return dataloader
 
 # To validate the len of the dataloader