From 9388a463adbefccb970dc0178d9130bc7815ceb4 Mon Sep 17 00:00:00 2001 From: Luis Roberto Mercado Diaz Date: Thu, 18 Jan 2024 14:27:48 -0500 Subject: [PATCH] Model Gaussian Variational Process Adding the final version of the model with Semisupervised and supervised methods. Co-Authored-By: Dong Han --- .../ss_active_learning.cpython-311.pyc | Bin 0 -> 6942 bytes .../active_learning/ss_active_learning.py | 120 ++++++ .../__pycache__/ss_gp_model.cpython-311.pyc | Bin 0 -> 12179 bytes BML_project/models/ss_gp_model.py | 198 ++++++++++ BML_project/ss_main.py | 84 ++++ BML_project/ss_main_ss.py | 103 +++++ .../__pycache__/data_loader.cpython-311.pyc | Bin 0 -> 17950 bytes .../__pycache__/ss_evaluation.cpython-311.pyc | Bin 0 -> 9772 bytes .../__pycache__/visualization.cpython-311.pyc | Bin 0 -> 5734 bytes BML_project/utils_gp/data_loader.py | 297 ++++++++++++++ BML_project/utils_gp/ss_evaluation.py | 167 ++++++++ BML_project/utils_gp/visualization.py | 81 ++++ pytorch_file_generation_loader_update.py | 374 ++++++++++++++++++ semisupervised_method.py | 34 +- 14 files changed, 1431 insertions(+), 27 deletions(-) create mode 100644 BML_project/active_learning/__pycache__/ss_active_learning.cpython-311.pyc create mode 100644 BML_project/active_learning/ss_active_learning.py create mode 100644 BML_project/models/__pycache__/ss_gp_model.cpython-311.pyc create mode 100644 BML_project/models/ss_gp_model.py create mode 100644 BML_project/ss_main.py create mode 100644 BML_project/ss_main_ss.py create mode 100644 BML_project/utils_gp/__pycache__/data_loader.cpython-311.pyc create mode 100644 BML_project/utils_gp/__pycache__/ss_evaluation.cpython-311.pyc create mode 100644 BML_project/utils_gp/__pycache__/visualization.cpython-311.pyc create mode 100644 BML_project/utils_gp/data_loader.py create mode 100644 BML_project/utils_gp/ss_evaluation.py create mode 100644 BML_project/utils_gp/visualization.py create mode 100644 pytorch_file_generation_loader_update.py diff --git a/BML_project/active_learning/__pycache__/ss_active_learning.cpython-311.pyc b/BML_project/active_learning/__pycache__/ss_active_learning.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..45c9875deaf1bad595378c59b1e08a1296ac1ec3 GIT binary patch literal 6942 zcmcIoTWs6b873u4qAs><$2ZAREIUdX`5HU1ouu(4zQm~=$5~Pjh+$7tMB0`kO9n|N zO-BLVmaXa#U>VRNZV_O5=rXqrP_HOZKkT7SR;`5&A+3&v-c$BMt~P!EFYgyK z7z1UB;gtCS4!Q8;!n~ET+{We|l#{aF#^)=jD#{LVCFOuzXHm0isuFUmD8m&jQ1j|Z z+ZaQKIGXasW4;-9#_6!H=Op}3_a2A8ulHE*ahq)!nbW=~v)0>t>{y@eF&gw@ z09S0|AuevrQOyj5jPHeGM1G#pyyj^dSzQPC7um37srsJhoVsz+X4;xC`=f zfx=4Z-x=;yLr1t`iiWPy(RZ<)gZ9Pf{Bb4I25sF5r5EqHZf)IsRq^Hlf z`R%XAPA|-{G{Y_wR$*Z_MvpU*8?>*}Hy4ALVI$lvUwp|o7LUc~Fc%4La0}z{@WvV) zxy7?DDjJKy8rIAW0{kqsOecu$u8-Haeo!W$hH>c3%+Y@+43hlo=LEnE0?ZrJo zUq`P2j@|jY6`&pc^y`UZG5aB`2V;qwhF6SNv70!KU4`!oU)Ef%Hy{mhQ6UhDFlehh z{ttvel|s;fF^ZW9#i;n2V$Pjjg+OOkF>!GwyrNiQ@!&EOq7>prgubZ|Y~)p1F>rCk zOwl(YVOk+?gqTPOj;LY@G0SY6;S|eKG{kXqOfl21KtwTUQz@p{#@hO=K!rjsuLsxS z6dhG6AQg>Vr=yXTc$`vfpiMBULbzfro_3VHhLD_Q6-O+H@FpHZFs3w?BSSC}qd+HC zaptB%6&sq=9+U$NO>Kl}N(Iy)=mz>K_I1dO0OZBNP8eZ2ik*wE7y4q+_x5Q+4r!WM zE*@S9v4E|?q8{9>97X|XGAE!N!XGvXwgRAsaJ)A2i;;)7&Buo-gY?Z>;@ceA_MhPssigI}x#dM6Ms%oc+pLdB^*D^PT1_Mw%w^oXphu zvzWz-UXe^rWX;&2qub_pI@7Huq}CHVbG!W?EvEY>rM}6}aj|bo?wk62RBWAAmTvPoM71%;(f%4u5JYbu~35 z+q(s#n?+ybFTSz*38V2-BQasId}_l%KBgc)j0|}ppgpU?eLsA@dl;uN_&hKK-uTe) z0LtGlLUw{coMG{o2@`Lkj3v-WX<8OK$FQV3=lx;HctTPm@cYt14i5i2RFLGa==U!M-Fd-EC+;7 zv8g9Bh;W4ghKn%`AOaQ!Y0Vr&q8SMv66l*SM?k=t4KZ8<;9@Dl1uTjkL0cX=m;qF) z1&LFrYSkK0!Q3F%*AN$Ex=|S#i$#_!f3$O_x|Z391@|-{Lq@BZ(bNCap=$w?DlWFI)+3*m6hmxXv4`WaFko2>9^f~Q9L(f64)X%) z{Y50&kwCblkfCT)r67t8rE-o8(}l{5*u)@atW*L+($bTfeBYY9XAzX68iImIQ;4C1 z`PTYQnF}z|6g=!1I84A)n!H;~a&_~3fsHhY{yy2?w{uIZKP%Ut-JE???aN|j)6f$n z{%@cIK&!65d-IKh>FQRgx;51=R=3I3ZAo*+)w0!?^1pTP{z2K*k#-%IT*r5D!F618 zot9mv)2<=OH6*%5WYd&foZgzTLVNOy*;Ns}&#bZs5^ecRh@Z*|=7NV_{EcZcZilHFY**)5aZ0@RsJgN64z$|FAT`fX1rvZiXK6z7FA6Bk-B*FJA z_NNCo)%Ucj=3^20ZaVLCUmvlo&b|Qpo#ta-IOPxt$cM7^vKPHiyS_yK2Eb|9-?Idd zQ?nX{QOcqIdA!E55i5AdYSE8Op}UGbfigZUGmPJtE_}sx)x@W&_Y4XdoWGP?rgbG~ zjkvcOzAtSOPA*XB6Lwr(AgI*^}>)BWFmXu~U6nX-aTcvNM znxf$U?2}u-CnvOb2|0SKGTxGj8isKL4NN0MiZKcvv=O*yxvBC%DdR;#Z?tlpcMKIE z>Y@^5<}6B$f&h!fa<|rL#TbckOe@NsK-s37D>OqhhanxP&RHvTS;kBvq1vrBltRW! zsna^T+zLapEAc4BbfU5fF3tfTSBW;wmW)}Ro9BFO2ALUV8r64D5jM2U&@{S|RP36b zwKslwhn_{-P3wcJ=^SPEF!WL+20p!}i;@>U4-L`7R>CSjrx*TjGgjBQWfyDxa;<-J z241Uc@4mE^5UV=ns?N=qAfv&n`ssTtqsC9KG!17l5UQ76VI6*gfk;kd+|4QT{mz`d z*`C|!f4G?L9F{tVKf%S$QMq&UlTpz={pcrX}{?i>T}BEqYhp`^~wvEvPMWO%z82uGVja zB0vG7w0d2w-WxH<<5YPWgEp^v6|RyCb<2NCo`L(bjVL|D1fdjw4P*ZWB5TBPJWCKb zk;Ra(VfX<7tI7Q{SjDFL%V3U8^_Rh35cK~H)-LG(kF0gcx;vhf=^b0zHz4^2MBgdd zcS@)qzq@pIX?H`kPRQ1YO>>qo;D^EQEeNs-KbFPxVg$G1o-C#pT@F;SC_ETmnNu@M M97HA05b9$57ai8PoB#j- literal 0 HcmV?d00001 diff --git a/BML_project/active_learning/ss_active_learning.py b/BML_project/active_learning/ss_active_learning.py new file mode 100644 index 0000000..546c4ad --- /dev/null +++ b/BML_project/active_learning/ss_active_learning.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon Dec 18 18:23:23 2023 + +@author: lrm22005 +""" +import numpy as np +import random +import torch +from torch.utils.data import DataLoader +from sklearn.cluster import MiniBatchKMeans + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +def label_samples(uncertain_samples, validation_data): + labels = [validation_data[sample_id]['label'] for sample_id in uncertain_samples] + return uncertain_samples, labels + +def stochastic_uncertainty_sampling(gp_model, gp_likelihood, val_loader, n_samples, n_batches, n_components=2): + gp_model.eval() + gp_likelihood.eval() + uncertain_sample_indices = [] + sampled_batches = random.sample(list(val_loader), n_batches) # Randomly sample n_batches from val_loader + + with torch.no_grad(): + for batch in sampled_batches: + # reduced_data = apply_tsne(batch['data'].reshape(batch['data'].size(0), -1), n_components=n_components) + # reduced_data_tensor = torch.Tensor(reduced_data).to(device) + reduced_data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device) + predictions = gp_likelihood(gp_model(reduced_data_tensor)) + var = predictions.variance + top_indices = torch.argsort(-var.flatten())[:n_samples] + uncertain_sample_indices.extend(top_indices.cpu().numpy()) + + return uncertain_sample_indices[:n_samples] + +# def uncertainty_sampling(gp_model, gp_likelihood, val_loader, n_samples, n_components=2): +# gp_model.eval() +# gp_likelihood.eval() +# uncertain_sample_indices = [] +# with torch.no_grad(): +# for batch_idx, batch in tqdm(enumerate(val_loader), desc='Uncertainty Sampling', unit='batch'): +# reduced_data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device) +# predictions = gp_likelihood(gp_model(reduced_data_tensor)) +# var = predictions.variance +# top_indices = torch.argsort(-var.flatten())[:n_samples] +# batch_uncertain_indices = [batch_idx * val_loader.batch_size + idx for idx in top_indices] +# uncertain_sample_indices.extend(batch_uncertain_indices) +# return uncertain_sample_indices[:n_samples] + +def run_minibatch_kmeans(data_loader, n_clusters, device, batch_size=100): + # Initialize MiniBatchKMeans + minibatch_kmeans = MiniBatchKMeans(n_clusters=n_clusters, random_state=0, batch_size=batch_size) + + # Iterate through data_loader and fit MiniBatchKMeans + for batch in data_loader: + data = batch['data'].view(batch['data'].size(0), -1).to(device).cpu().numpy() + minibatch_kmeans.partial_fit(data) + + return minibatch_kmeans + +# def compare_kmeans_gp_predictions(kmeans_model, gp_model, data_loader, device): +# # Compare K-Means with GP model predictions +# all_data, all_labels = [], [] +# for batch in data_loader: +# data = batch['data'].view(batch['data'].size(0), -1).to(device) +# labels = batch['label'].to(device) +# gp_predictions = gp_model(data).mean.argmax(dim=0).cpu().numpy() +# kmeans_predictions = kmeans_model.predict(data.cpu().numpy()) +# all_labels.append(labels.cpu().numpy()) +# all_data.append((gp_predictions, kmeans_predictions)) +# return all_data, np.concatenate(all_labels) + +def stochastic_compare_kmeans_gp_predictions(kmeans_model, gp_model, data_loader, n_batches, device): + all_data, all_labels = [], [] + sampled_batches = random.sample(list(data_loader), n_batches) # Randomly sample n_batches from data_loader + + for batch in sampled_batches: + data = batch['data'].view(batch['data'].size(0), -1).to(device) + labels = batch['label'].to(device) + gp_predictions = gp_model(data).mean.argmax(dim=0).cpu().numpy() + kmeans_predictions = kmeans_model.predict(data.cpu().numpy()) + all_labels.append(labels.cpu().numpy()) + all_data.append((gp_predictions, kmeans_predictions)) + + return all_data, np.concatenate(all_labels) + +import random + +def refined_uncertainty_sampling(gp_model, gp_likelihood, kmeans_model, data_loader, n_samples, n_batches, uncertainty_threshold=0.2): + gp_model.eval() + gp_likelihood.eval() + uncertain_sample_indices = [] + + # Calculate the total number of batches in the DataLoader + total_batches = len(data_loader) + + # Ensure that n_batches does not exceed total_batches + n_batches = min(n_batches, total_batches) + + # Randomly sample n_batches from data_loader + sampled_batches = random.sample(list(data_loader), n_batches) + + with torch.no_grad(): + for batch in sampled_batches: + data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device) + gp_predictions = gp_likelihood(gp_model(data_tensor)) + kmeans_predictions = kmeans_model.predict(data_tensor.cpu().numpy()) + + # Calculate the difference between K-means and GP predictions + disagreement = (gp_predictions.mean.argmax(dim=-1).cpu().numpy() != kmeans_predictions).astype(int) + + # Calculate uncertainty based on variance of GP predictions + uncertainty = gp_predictions.variance.cpu().numpy() + + # Select samples where the disagreement is high and the model is uncertain + uncertain_indices = np.where((disagreement > 0) & (uncertainty > uncertainty_threshold))[0] + uncertain_sample_indices.extend(uncertain_indices) + + return uncertain_sample_indices[:n_samples] diff --git a/BML_project/models/__pycache__/ss_gp_model.cpython-311.pyc b/BML_project/models/__pycache__/ss_gp_model.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ea0e5aee16a5d2ee1c342415375672e4d81589b GIT binary patch literal 12179 zcmb_idu$uWnO~C2_evrqil#`)T0JaTvSe9u`x$>*z zMa`v%k`oXWa8RXn0p6iaomRaXcP-8Zt_R=YT=@P7u)COIi3JQ8xa$@EkG>>8&_6Es z%~E`^C#t_susm-~4udT2f*o;Ckz$kJzeug7{B-le>(YiO>EA65l5n zVuE0Fj6R~9(8IqWVpuRt7ZA%U%jD zV^Mk(etm3+?l=P9V+T5pz0^T>9_T!1vz-hs@pCcm7#-mjIy(;>=(0V<)sHpn6q0|F zS!mWPHH#b@3dh5-XaEwyNF*>555+h(5MNqcjB&hD!o@;?;8G}{W<$x+NN}2s1g68$ zAQ!&HJ~cs&Pf1KVfhc5Xi3uu((Be`EZu6IZGxb>mB;JKyV~GhJ^qhW$n9(u1cPtYI zMh_a2E5r@mgb}nfD2~yk2p{Hy@vDPpM`H{d3FYkxl~~}1?e!TA^h4eE3BKs*d_K)% z(Kat0%er|nI<3W%Iv&@!dClj1j7Sxg$vq{Agf3+++!a!cUfb^)nez1)%QUOgme??S z47U1t&R#@P{~Wojg;oox=kTgyslb&@B|wXWo*_4kAA^m4p0^Y4E^ro0%9u9HTF#__ zuPX3r$3d#3@T8DR7_{2z2#v(~QJ2b(xk6b&+ruwvP4I-mI7|?M{211>oF9>?{D{=F zoNtXF3ZpkwQOuJ||3!VSk@7F9LE|-`rz~#PDWt%iSlv9 zmQ4hh@IrH?V&r37Xig!yV3gso07?aB&R4q_3*$nT*~Ob#PWv^OMAVjo5v8VgE*4?q zS8rY}WcG#QJQto`!onzI93~uvynv~DWVE-4?*h+(M6)-QiXAdJ+)s)TrZkpQECG-w z%m)IBae)m+;|kRadh)?2KMJ>s9cKm>V$4#6Rm@jeF3Ltg=L;}5vcu|~Vm*IqU^7XD zVxYk$d$SQFWFs?5*;a);z?4%HOjV?ktN01zXO*x90-;DS9uEZKcp{?VTK?0u-eZ%O z;w%@R%uVN$<59Mc3twaDHu_Q&{<(OVze&etXizE2hWK!3iJ$C?g_ag@XH5=<`MIU( z$y0&Rk-%9lHV+(=ry#w!nFg&v*-2=3b`k5uM)oh>I?WKltyL z%fuhNHFsKm^7`*P`ybpCz2}hk+)CLO7NVgQ)gPAYza`awD^2-iik2ukRW4Euh-&!t zo?o^6_Uip9v2z%84rdHR!>hVygucP{IVOUI{(>Uty{P7(Tr(uq3_Ye?h@xfcC5d_| z?QpHdgr*b1#1$x4(gnpzN}*Uu=@%wq7yZ~%pWaoKDIw@#sL@PAjX(s8|BG_MTKb-O z)ht^)lEoubwTYJf$g*Fs><3}}4R?;qzC)7l(8I2OKlbUjA1#Qzlc;x6@EsC;Q^+^9 zLL!UjvA+TNTV?+d$$vzZ$uCol64m(Yu3sJd?YHkQhzCc|!4ZLK6sb`}jXw5N-?9B< zIQ5F?=|G;2jFG55`iwACgG@khja7pB{S2&0gz~dL+~9dxUFUuPiSH8$A_f23uf&#h zB&i2Z=#}l)HcmtFEI*IiQm<5gMabpg|AZdqaLrtkBoideFuJj3Bi8^N++IxZ!l0BC z`+8g{SDh&9@$6WPTL?y=Of%-x-B3*G$h)DKvt#OpV&9xwAVgvE zm|LE0j=A~dC18t>09ht7!&uV$YZehflD0xpt_q*@P#NXmEjrNyHY|9Q}}{0)aP| zf|2YKb=+`=v9KkZ!#Tu4d=+Tcacx*O+&L#zZ8*GEsszuO@j)Okm~x+tzS?NZ5Id0L zPNOeFd>v8cRcn5VV}L(-)KMJ>&9R}Y>SP?4S&G0Y=R?I|4rWzdE(IxVV6|nD-_gN} z-ATLLCr!m6*nHl>Qu%NTTFze(u{Gf(9!BDF54TeGLfwT_!kjP_2TloMJ|8S;?FFG3^h04=khJpEg?bp1mOE)< z%t`w!k^3-~JH{l%*Az+YIrK_sdGaApv7C%qYc-}++n3+i!^)u5vLea8 z>6mcniG(Rprj;t`gj{RFnQ&;OvlGek1jRUt+QSR*wkuJdDhlHZkF{3Swh4V!p4wI5 zEu^%ifX|Zh6rL7RTlv;>t3L3Tg0KbFo&4p6`1t%-fl%C=t5Dl=7XcfKVm@DvPc!ax z&l`7IyD$WQG*_M=80U-CaSP@&Z9U)B7ZglZFy%?tERn2)pF2^ZjxIX6D^Vm5&XA`P~TfN$*aRT|sw&eaSO7L9{hUGc2EQK9wYq*wL&Dy_Up zpX&9#gjdU*Dw@M`t!4r;N+h5W?jI}zxzffzUGT~+_H{9A4WD?fgWOZ_PvL;ExJW}Lg&yl`b zD;elXF`j~7KBv^~9ZFUG*-|yzO1U&+twQ_!7R*6O7XttwLuma)M?~%QLgzc(oB<@nem0dA4-dvxvAk140xpeWrcBX+nY-@gQsp_vOWqEF?-CtA61W^<dBKjb`JsKllh=zc~R|hd#o6qEjrGmkIKKRfeN89Nuxk{@t2~LlsjN z4^=EP9a-#CF=H4M;84W|`}A-WK$19M85eOHFcZbdF2+K00|1tg3>yzAPgy&*0CQy{^iY@Zt1%OVtqTE|`{Xs5rlaBMT z#YIqp2C!uongL`v$kVDqH1{fQ?lnv%aGQJ8{e+?i1m^PNRRo3XVS)>aZ86Bf7K#VZLLnC-0J4-_E}$g+Bd5j{LxhbgW^B1Q3znRT z#DYABHztY^5T;oc@Fr}0#R^Kr0<&C@QOwZu7}VnYn9USiTx6q+LWX&EL9t8+Ls#*p z5nvjgT~tW+8lX<*=w?w1My;Ua0`>@Au`Wg9Z!WPcU`-~#y5ccTF~pY^KvoPBnS$Ib zXvpC$3{;1^*j8k0<06OaAVFeADZ{kL=*@KsmGht=1CW#rZ=ka}FE@qcwoWooeUC1E76) z0RSLSzzn^?g+p-;&nt>OE20V%D(37BY|gWZn~Ev6#4j$XDnf05<3%vc2fhn(<{QZ> zhTR={7w6)wJoqR^7QxEQ4aJIcvuzl0@`j@R-OSo#vyIYRg;~zRAW-x(9f}S%;RSod z>p*zG%l^!jMG;D~+T_qK?uStJH2mVHVTxTQ9#eji+Ks5)GF2;4wLbwcrx{Vr%l(g? zRmizVb{>|Thaa31E?yFxhehXQbrBur-Oc zE68?bx%aWFYJJc5MsANRkET6U0$B;TgxixYccoqKbVX&_?aA0pWu^=PWO?ul2f!X5 zvg>1_dmnP|yVvsIuyFocg7a0;`6_b0nju~?x}RaPVodv*GK9@q{S1?p;WXv^VEFyv z_eNJoS4Pv6Tc-R{&K3f-J55!}RE|%MwO6RvyV94YD%M7zeVN)TQG4&Yep&Ui zs!x0yK9TA|RF^KPY>@Ote&G;kd^o<*QTg1y%zY!eeb#$zuRG_y3~e?gVN(AvT{x%_l`~5Aya1 zr$P@8K6vxfgO6^Z@t`<7jfSVi!D+EKgnC2p5P2D}>Mrlf>2#HEWhlLy27z|fWe7{j z7=V+Qt{Kw)M&#drH;()V*Q{v|wy39B_H;;|j(c^Yrwe(yWX}o7bK-$c^qfMTQ?jRD z^7KEP5j~^GGrDHPC?k~$-!r4eBf_4e4;ux~8PRhFdCsgE)75qBmr`9HP2QPYv!(0y zr0P@i_x6Z&M^N1nxvpEP>lW*dqq^fl?eVqJw8OJLCi~kZf4k`KK>iNV(XnPpJG`=^ zR&vy)_WZQv?^^!0^~bHEqa8Wg1xI_@Q6oDVB}ZfG){ozkn_iKcUU^U_G`%7=^`oYK zxoJph8WNjEP}7L$7)6dz!7&PnKQs+IxFuYe79w$>i5HuA1e9u6IkR?colA9OY($A` zZ5Uy|w@IR!?pj5v6H%Q4)rtH5G@`0yszIU}QkO-l6;Z7M)rubsA*v<|$EG-uYC%+s zK(#!fTt70b4~w3?$g@|Z_O6`HbmY2K?b~O#mp{*Ub)nO@q`v&}%Pl$C+m{H+wYDJH z>jis#+TQ>JhlZh1{g`sD4&CjSTf3#!Zn5<^YCWFrcv}B9ccDc~l^erpZ#7+%1v2*KY|eJ*eiCNcJMKS0H=SWQj~xOJw!> zywH3C?LH}zJ&5cP$et&5$6Cw!tYF_GkbC0z?aP1o2D{GrjiR4CM_a{u9ne2!W3s0w zLTWqaKh`X-b$FT_F}t6_wA`1HibJ2L}e*uC1?!nJrNI&Nyf>f~k$UlV$+<&D)xR42Z>d zL^!otz6jd6r9}(u(Dm@sCAOdlTllOAD@KpEUt1*jf07b*MXJSHukR1rE{kQMe8_CkLrtOcAXm48KD;D#MMMuatcsN(WFzs%O!VP@N& zZT0dCN@pogtdzh)rDfwnJ}XUu~ruHaGc9>^SIO!Cf6{*8}#N9 zg}RDE?RfT0Bz|eUnJR%)QJ`zb8-!f=>?$-6;OFpWAQr(9IF3PC2*QI4i|~qBF=j)~ zY|+i=zI?N};uy6Qnkv3`=BnrO(XKA6So67Y3_5B;&upOSiL(n~e5Quwu7%?uh$;|l zxVU>z;6wPuzXJbRCekGJ?&#a2-x+^re0e-gI%U!=k#3RnAkrg{9*FRk$Nyk0U9Env zX0;|mP{xa}C}5hdq%(xwdJ$F!Ojr8T-kOx@PMhp)mAtKYFWoz|F(tR1klJwc&~^&7 zoqBLe^bR8LpzIxzyhEQJ620e<_dJ9twQX{3msHzz@9ICr{vjqF8bpT%g)}BSRXK@@@-LgJ)_sHEN_lCuNC(*u>Vs#Iy?pYawz^b-grjJVWQIUQb(J#yN z35h--(mjaok?BE+9u(;_h(05XydlwV;8l`t7Fu2w>25@KX9%(ctA*+Js5RZ%(R77B zLl~T(rK?_cwMed(yTkW&8)Ns!Q1`gpeOcr`51wYbBd(euDA|q&JT7fq&_58t63qu9F-%Wd7Yz9netmdV^9 zn8%*~I#@fqz*_c#>nNXJ4*^}zr=PRp=@JBA^u8>iFC3fF&W0OX4_g^DNK}3W(@Krk zU`7wGm-R4j|9#S!FfxY1ti1F3wuPriIb#fJkd!8c%fv*DSsHy8;m;WztnnxA874D`@@`>$me1$rcQo#p6Lv85=} zOshu@X?UNna;gvqKJbW(&BAGyD2-?S{Wi7Q9&R>%Oii`XD(5l!BJimjd_c1XgG=#v zI2fhj+zVh7@hs2&%>sqhBwMO*Ps1skSP0JUKpofN_>9{4;8|_)q6O#}-3Ky_s8|k- zVU;kW!fGH2=Ar>-;pppg;Skgmqv23d))uN6HZ?iV0)83IE5k;nV_5()D_gz{leL{{ zq^u&`De!RaJSJP#?X!?K)?AuhxUr{lK}@DG31Pxul3&reFuuhQT46!wczmla%~tMh zoc?P}uuE~@1)|srl5(Hm-1jm08%$P##PL|nFO8e{OJ11(F3kNSu>1`3M%2YH;~|W; zcZc2{`p(EZBg-SO6M1+1?QxN;L}Vp^AKO6_J39Tk*f)v#CRNa63fZQXdyAG!o3R~W z4r6<|(z9YyS4^Yzl?(v{)=RfTkKcb&Z0$j?xtUuM0Y!Kw{L+jo>1=fgYdbQx-R4b;-VClJA)4>qfqA!P_lT-77;YLl~T>_%O7-4|&@}_kQHw52ut? zY?;cO5~?Nu*cjVC$3(q-E>TyX9k@IMDBO z0d}q%r(OX9Cs^*_zg>zs5MbbZaDYp}y`qmUs_<;qk+7R7W$~-((Zc;XY!OH49NzFL zmAOX{V8KYiqY?*1Cd`1}+^eVdIQ*VDi!_Hg~5l1{$%Nfa&W%yIQP$R3$8}QtAJ(9St6tv$oK`pDFG*Py!e$vE{u+uk991yhMC#7Y}_B2(pY=wwUAS#|% zD_1I4{VC&*Z8CjWq7RGoQA8hw&HLKS+RXhW(b|u!{mZ5d*`b3YKikRLjoWXbT`xVu ik3KIvpVJNNbd4Ee8=cTs;EJ@Q#jkT`i0wo*-TwiKZh$8M literal 0 HcmV?d00001 diff --git a/BML_project/models/ss_gp_model.py b/BML_project/models/ss_gp_model.py new file mode 100644 index 0000000..c18f06f --- /dev/null +++ b/BML_project/models/ss_gp_model.py @@ -0,0 +1,198 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon Dec 18 18:01:41 2023 + +@author: lrm22005 +""" +import numpy as np +from tqdm import tqdm +import torch +import gpytorch +from sklearn.metrics import precision_recall_fscore_support, roc_auc_score +from sklearn.preprocessing import label_binarize + +num_latents = 6 # This should match the complexity of your data or the number of tasks +num_tasks = 4 # This should match the number of output classes or tasks +num_inducing_points = 50 # This is independent and should be sufficient for the input space + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +class MultitaskGPModel(gpytorch.models.ApproximateGP): + def __init__(self): + # Let's use a different set of inducing points for each latent function + inducing_points = torch.rand(num_latents, num_inducing_points, 127 * 128) # Assuming flattened 128x128 images + + # We have to mark the CholeskyVariationalDistribution as batch + # so that we learn a variational distribution for each task + variational_distribution = gpytorch.variational.CholeskyVariationalDistribution( + inducing_points.size(-2), batch_shape=torch.Size([num_latents]) + ) + + # We have to wrap the VariationalStrategy in a LMCVariationalStrategy + # so that the output will be a MultitaskMultivariateNormal rather than a batch output + variational_strategy = gpytorch.variational.LMCVariationalStrategy( + gpytorch.variational.VariationalStrategy( + self, inducing_points, variational_distribution, learn_inducing_locations=True + ), + num_tasks=num_tasks, + num_latents=num_latents, + latent_dim=-1 + ) + + super().__init__(variational_strategy) + + # The mean and covariance modules should be marked as batch + # so we learn a different set of hyperparameters + self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([num_latents])) + self.covar_module = gpytorch.kernels.ScaleKernel( + gpytorch.kernels.RBFKernel(batch_shape=torch.Size([num_latents])), + batch_shape=torch.Size([num_latents]) + ) + + def forward(self, x): + # The forward function should be written as if we were dealing with each output + # dimension in batch + # Ensure x is correctly shaped. It should have the same last dimension size as inducing_points + # x should be reshaped or sliced to have the shape [?, 1] where ? can be any size + # For example, if x originally has shape [N, D], and D != 1, you need to modify x accordingly + # print(f"Input shape: {x.shape}") + # x = x.view(x.size(0), -1) # Flattening the images + # print(f"Input shape after flattening: {x.shape}") # Debugging input shape + mean_x = self.mean_module(x) + covar_x = self.covar_module(x) + + # Debugging: Print shapes of intermediate outputs + # print(f"Mean shape: {mean_x.shape}, Covariance shape: {covar_x.shape}") + latent_pred = gpytorch.distributions.MultivariateNormal(mean_x, covar_x) + # print(f"Latent prediction shape: {latent_pred.mean.shape}, {latent_pred.covariance_matrix.shape}") + + return latent_pred + + +def train_gp_model(train_loader, val_loader, num_iterations=50, n_classes=4, patience=10, checkpoint_path='model_checkpoint_full.pt'): + model = MultitaskGPModel().to(device) + likelihood = gpytorch.likelihoods.SoftmaxLikelihood(num_features=4, num_classes=4).to(device) + optimizer = torch.optim.Adam(model.parameters(), lr=0.1) + mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_loader.dataset)) + best_val_loss = float('inf') + epochs_no_improve = 0 + + metrics = { + 'precision': [], + 'recall': [], + 'f1_score': [], + 'auc_roc': [], + 'train_loss': [] # Add a list to store training losses + } + + for epoch in tqdm(range(num_iterations), desc='Training', unit='epoch', leave=False): + for train_batch in train_loader: + model.train() + likelihood.train() + optimizer.zero_grad() + train_x = train_batch['data'].reshape(train_batch['data'].size(0), -1).to(device) # Use reshape here + train_y = train_batch['label'].to(device) + output = model(train_x) + loss = -mll(output, train_y) + metrics['train_loss'].append(loss.item()) # Store the training loss + loss.backward() + optimizer.step() + + # Stochastic validation + model.eval() + likelihood.eval() + with torch.no_grad(): + val_indices = torch.randperm(len(val_loader.dataset))[:int(1 * len(val_loader.dataset))] + val_loss = 0.0 + val_labels = [] + val_predictions = [] + for idx in val_indices: + val_batch = val_loader.dataset[idx] + val_x = val_batch['data'].reshape(-1).unsqueeze(0).to(device) # Use reshape here + val_y = torch.tensor([val_batch['label']], device=device) + val_output = model(val_x) + val_loss_batch = -mll(val_output, val_y).sum() + val_loss += val_loss_batch.item() + val_labels.append(val_y.item()) + val_predictions.append(val_output.mean.argmax(dim=-1).item()) + + precision, recall, f1, _ = precision_recall_fscore_support(val_labels, val_predictions, average='macro') + # auc_roc = roc_auc_score(label_binarize(val_labels, classes=np.arange(n_classes)), + # label_binarize(val_predictions, classes=np.arange(n_classes)), + # multi_class='ovr') + + metrics['precision'].append(precision) + metrics['recall'].append(recall) + metrics['f1_score'].append(f1) + # metrics['auc_roc'].append(auc_roc) + val_loss /= len(val_indices) + + if val_loss < best_val_loss: + best_val_loss = val_loss + epochs_no_improve = 0 + torch.save({'model_state_dict': model.state_dict(), + 'likelihood_state_dict': likelihood.state_dict(), + 'optimizer_state_dict': optimizer.state_dict()}, checkpoint_path) + else: + epochs_no_improve += 1 + if epochs_no_improve >= patience: + print(f"Early stopping triggered at epoch {epoch+1}") + break + + checkpoint = torch.load(checkpoint_path) + model.load_state_dict(checkpoint['model_state_dict']) + likelihood.load_state_dict(checkpoint['likelihood_state_dict']) + optimizer.load_state_dict(checkpoint['optimizer_state_dict']) + + return model, likelihood, metrics + +def semi_supervised_labeling(kmeans_model, gp_model, gp_likelihood, data_loader, confidence_threshold=0.8): + gp_model.eval() + gp_likelihood.eval() + labeled_samples = [] + + with torch.no_grad(): + for batch in data_loader: + data_tensor = batch['data'].view(batch['data'].size(0), -1).to(device) + kmeans_predictions = kmeans_model.predict(data_tensor.cpu().numpy()) + gp_predictions = gp_likelihood(gp_model(data_tensor)) + + # Use GP predictions where the model is confident + confident_indices = gp_predictions.confidence().cpu().numpy() > confidence_threshold + for i, confident in enumerate(confident_indices): + if confident: + labeled_samples.append((data_tensor[i], gp_predictions.mean.argmax(dim=-1)[i].item())) + else: + labeled_samples.append((data_tensor[i], kmeans_predictions[i])) + + return labeled_samples + +def calculate_elbo(model, likelihood, data_loader): + """ + Calculates the ELBO (Evidence Lower Bound) score for the model on the given data. + + Args: + - model: The trained Gaussian Process model. + - likelihood: The likelihood associated with the GP model. + - data_loader: DataLoader providing the data over which to calculate ELBO. + + Returns: + - elbo_score: The calculated ELBO score. + """ + model.eval() + likelihood.eval() + mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(data_loader.dataset)) + + with torch.no_grad(): + elbo_score = 0.0 + for batch in data_loader: + train_x = batch['data'].reshape(batch['data'].size(0), -1).to(device) + train_y = batch['label'].to(device) + output = model(train_x) + # Calculate the ELBO as the negative loss + elbo_score += -mll(output, train_y).sum().item() + + # Average the ELBO over all data samples + elbo_score /= len(data_loader.dataset) + + return elbo_score diff --git a/BML_project/ss_main.py b/BML_project/ss_main.py new file mode 100644 index 0000000..a610684 --- /dev/null +++ b/BML_project/ss_main.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon Dec 18 18:47:27 2023 + +@author: lrm22005 +""" +import tqdm +import torch +from utils.data_loader import preprocess_data, split_uids, update_train_loader_with_uncertain_samples +from models.ss_gp_model import MultitaskGPModel, train_gp_model +from utils_gp.ss_evaluation import stochastic_evaluation, evaluate_model_on_all_data +from active_learning.ss_active_learning import stochastic_uncertainty_sampling, run_minibatch_kmeans, stochastic_compare_kmeans_gp_predictions +from utils.visualization import plot_comparative_results, plot_training_performance, plot_results + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +def main(): + # Set parameters like n_classes, batch_size, etc. + n_classes = 4 + batch_size = 1024 + clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled = split_uids() + data_format = 'pt' + # Preprocess data + train_loader, val_loader, test_loader = preprocess_data(data_format, clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled, batch_size) + + kmeans_model = run_minibatch_kmeans(train_loader, n_clusters=n_classes, device=device) + + # Initialize result storage + results = { + 'train_loss': [], + 'validation_metrics': {'precision': [], 'recall': [], 'f1': [], 'auc_roc': []}, + 'test_metrics': None + } + + # Initial model training + model, likelihood, training_metrics = train_gp_model(train_loader, val_loader, num_iterations=50, n_classes=n_classes) + + # Save the training metrics for future visualization + results['train_loss'].extend(training_metrics['train_loss']) + results['validation_metrics']['precision'].extend(training_metrics['precision']) + results['validation_metrics']['recall'].extend(training_metrics['recall']) + results['validation_metrics']['f1'].extend(training_metrics['f1_score']) + # results['validation_metrics']['auc_roc'].extend(training_metrics['auc_roc']) + + active_learning_iterations = 10 + # Active Learning Iterations + for iteration in tqdm(range(active_learning_iterations), desc='Active Learning', unit='iteration', leave=True): + # Perform uncertainty sampling to select new samples from the validation set + uncertain_sample_indices = stochastic_uncertainty_sampling(model, likelihood, val_loader, n_samples=batch_size, n_batches=5) + + # Update the training loader with uncertain samples + train_loader = update_train_loader_with_uncertain_samples(train_loader, uncertain_sample_indices, batch_size) + + # Re-train the model with the updated training data + model, likelihood, val_metrics = train_gp_model(train_loader, val_loader, num_iterations=10, n_classes=n_classes, patience=10, checkpoint_path='model_checkpoint_last.pt') + + # Store the validation metrics after each active learning iteration + results['validation_metrics']['precision'].append(val_metrics['precision']) + results['validation_metrics']['recall'].append(val_metrics['recall']) + results['validation_metrics']['f1'].append(val_metrics['f1']) + # results['validation_metrics']['auc_roc'].append(val_metrics['auc_roc']) + + # Compare K-Means with GP model predictions after retraining + gp_vs_kmeans_data, original_labels = stochastic_compare_kmeans_gp_predictions(kmeans_model, model, train_loader, n_batches=5, device=device) + + plot_comparative_results(gp_vs_kmeans_data, original_labels) + + # Final evaluation on test set + test_metrics = evaluate_model_on_all_data(model, likelihood, test_loader, device, n_classes) + test_kmeans_model = run_minibatch_kmeans(test_loader, n_clusters=n_classes, device=device) + + results['test_metrics'] = test_metrics + test_gp_vs_kmeans_data, test_original_labels = stochastic_compare_kmeans_gp_predictions(test_kmeans_model, model, test_loader, n_batches=5, device=device) + plot_comparative_results(test_gp_vs_kmeans_data, test_original_labels) + + # Visualization of results + plot_training_performance(results['train_loss'], results['validation_metrics']) + plot_results(results['test_metrics']) + + # Print final test metrics + print("Final Test Metrics:", results['test_metrics']) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/BML_project/ss_main_ss.py b/BML_project/ss_main_ss.py new file mode 100644 index 0000000..0d0aed4 --- /dev/null +++ b/BML_project/ss_main_ss.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Jan 4 14:40:13 2024 + +@author: lrm22005 +""" +from tqdm import tqdm +import torch +from utils_gp.data_loader import preprocess_data, split_uids, update_train_loader_with_labeled_samples, update_train_loader_with_uncertain_samples +from models.ss_gp_model import MultitaskGPModel, train_gp_model, semi_supervised_labeling, calculate_elbo +from utils_gp.ss_evaluation import stochastic_evaluation, evaluate_model_on_all_data, threshold_based_labeling, resolve_conflicts +from active_learning.ss_active_learning import run_minibatch_kmeans, stochastic_compare_kmeans_gp_predictions, refined_uncertainty_sampling +from utils_gp.visualization import plot_comparative_results, plot_training_performance, plot_results + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +def main(): + # Set parameters like n_classes, batch_size, etc. + n_classes = 4 + batch_size = 1024 + clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled = split_uids() + data_format = 'pt' + + # Preprocess data + train_loader, val_loader, test_loader = preprocess_data(data_format, clinical_trial_train, clinical_trial_test, clinical_trial_unlabeled, batch_size) + + kmeans_model = run_minibatch_kmeans(train_loader, n_clusters=n_classes, device=device, batch_size=batch_size) + + # Initialize result storage + results = { + 'train_loss': [], + 'validation_metrics': {'precision': [], 'recall': [], 'f1': [], 'auc_roc': []}, + 'test_metrics': None + } + + # Initial model training + model, likelihood, training_metrics = train_gp_model(train_loader, val_loader, num_iterations=50, n_classes=n_classes) + + # Save the training metrics for future visualization + results['train_loss'].extend(training_metrics['train_loss']) + results['validation_metrics']['precision'].extend(training_metrics['precision']) + results['validation_metrics']['recall'].extend(training_metrics['recall']) + results['validation_metrics']['f1'].extend(training_metrics['f1_score']) + + elbo_threshold = -0.5 # Define a threshold for the ELBO score + use_threshold_labeling = False # Initially, do not use threshold-based labeling + + active_learning_iterations = 10 + # Active Learning Iterations + for iteration in tqdm(range(active_learning_iterations), desc='Active Learning', unit='iteration', leave=True): + # Perform uncertainty sampling to select new samples from the validation set + uncertain_sample_indices = refined_uncertainty_sampling(model, likelihood, kmeans_model, val_loader, n_samples=batch_size, n_batches=5) + + # Semi-supervised labeling with K-means and GP model + semi_supervised_samples = semi_supervised_labeling(kmeans_model, model, likelihood, val_loader) + + labeled_samples = semi_supervised_samples # Initially, use only semi-supervised samples + + if use_threshold_labeling: + # Threshold-based labeling to decide when a sample's predicted label should be trusted + threshold_based_samples = threshold_based_labeling(kmeans_model, model, likelihood, val_loader) + + # Combine the two sets of labeled samples + # Implement your logic for resolving conflicts between the two methods here + labeled_samples = resolve_conflicts(semi_supervised_samples, threshold_based_samples) + + # Update the training loader with uncertain and newly labeled samples + train_loader = update_train_loader_with_uncertain_samples(train_loader, uncertain_sample_indices, batch_size) + train_loader = update_train_loader_with_labeled_samples(train_loader, labeled_samples, batch_size) + + # Re-train the model with the updated training data + model, likelihood, training_metrics = train_gp_model(train_loader, val_loader, num_iterations=10, n_classes=n_classes, patience=10, checkpoint_path='model_checkpoint_last.pt') + + # Store the ELBO score after each active learning iteration + current_elbo = calculate_elbo(model, likelihood, train_loader) + results['elbo'].append(current_elbo) + + # Determine if the threshold-based labeling should be used in the next iteration based on the ELBO score + if current_elbo >= elbo_threshold: + use_threshold_labeling = True + + # Compare K-Means with GP model predictions after retraining + gp_vs_kmeans_data, original_labels = stochastic_compare_kmeans_gp_predictions(kmeans_model, model, train_loader, n_batches=5, device=device) + + plot_comparative_results(gp_vs_kmeans_data, original_labels) + + # Final evaluation on test set + test_metrics = evaluate_model_on_all_data(model, likelihood, test_loader, device, n_classes) + test_kmeans_model = run_minibatch_kmeans(test_loader, n_clusters=n_classes, device=device) + + results['test_metrics'] = test_metrics + test_gp_vs_kmeans_data, test_original_labels = stochastic_compare_kmeans_gp_predictions(test_kmeans_model, model, test_loader, n_batches=5, device=device) + plot_comparative_results(test_gp_vs_kmeans_data, test_original_labels) + + # Visualization of results + plot_training_performance(results['train_loss'], results['validation_metrics']) + plot_results(results['test_metrics']) + + # Print final test metrics + print("Final Test Metrics:", results['test_metrics']) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/BML_project/utils_gp/__pycache__/data_loader.cpython-311.pyc b/BML_project/utils_gp/__pycache__/data_loader.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0b1a7ebdb4b25cf36cb492dfb1b7d20074e1bb08 GIT binary patch literal 17950 zcmd6Pdu$tLp4bdIr1+93iF%8+C{vbA+1AUlW6QQ2SuZ~%*|B9OmX$Cxhmt5$q&!2} zmUigV?&T0B@L_%1q*1b7EcdXR(_WmO3S3L>5z|v4ToIu8r>8~s z3O7Y@{r$cnha5`ICffq-=;Lp``Cjuqe&6r=y+(glTx_G@8vEggp+7u9QU3)WvX?0n z`SLMDKA>1?oMLI#5TVCuayN_{$(y4#O; zw|9Cv+dB{2Y%d4qc$5Aiv~FJP>6A9D5}gh>#S^SZpj5rU=d98?|-mZV!{Q(uHa|LF! zI7@#F@lUkp1SL?cp;jxJGHB7fNZg!z(AS^QW-!I%%H(3KQC}Y4b`rAT&g9+-Po~_{ zqMC2smQs|dY02e4O=~8#NS~^E%FDyn7wx16woJ|Z@n`WdHw1`%48KpblmyM%S;v$i zVVI)W;yu2bP6k9Hs ze@Oj^(WleY*R)ZjFAHtBgraZRhGknD6@q1_Hej@sFk0&rm9R}w@7BXkp(2)qU7%8k zhubdw?DbQxKbQb4OE?n6LUBIsB}(8qZ>17W!JZ7QkoJzpHpsNL4aKB4wowo$jOjr~I1PHhNP`TKi{ zPE-lC1u3;c9qc1t{+hzPLph;t(g^2$%jn{FCnl!2*qu;ofNh-*#-h>I5IaA?h4@f_ z3rKi;AhRGK(stR)6ZcS6k}I=CxDK2UzK7! z+}?oz5)QW?L2wkoO8`3B5nyfy=5`!Ia2!A;b%tK%>N62n5+Vi&g9g)Me797wOyg_DHeh>fG0d+gvYb=)xEy%iK~2w<0rDr=ETKlsGkep33(2A zu14X`@!|M_CpPIp^n-~D2j}Ax{juOY8Pvp3I6ggpbK;DDWY~X+i{aExoPqdUCO#hz zM|l6#+yo0q?T-*nwRLXcE8EFPn2!fzvvWO*yZO0DIPRYhvwZ8xNDO#az6T=N`~m1J zzYDme%NJPKRoleNHE1q9Cn|> zF8r=>Vi~>z02${su>r%=+o`ftN9whobpOQt3)kxPf4Td!-A`$$qgV2rQA+^ihM?9U z$OZJAQpAPkA^||1k3PifFHbMl_C)~oVOF!-fd}*sq8tMI0&tc0Ovbp}27!H%OU!JL z#fn^7Zi9S*WAs%hhB;QTfM~<|k>r>Ie0*Upq!{@)ro>!XUo*b}2S?GD`{-OCS>Q<6xAk-a$Vh2L8doba*y24NpK2l*;YtFl5EO zEtw6oKqvu8`A}YCa{(@PZ-Ji$k_mTU}kMHy@MiIvZIu6 zp;?t^kuh-B$dJQPHgr!h!jy5yKEtM-4)X-<$W)PeDaX0+$0q$jATD9pc_Ch@hUvQ* z;xHAae3tjeWB$2t@Rm{oTlHq(W*D~Mf}e$oT=*vJ+E`R!ST1l^sVL~8I=0%vR2X6A zB&pdc3rUaS+95|NEUrxscLBzyZYJFQo)=#)zf5+SV#^T{?haHbg&+TSfTho2OMPDE zdar(qqITaj{Dz{+gNEM{xO6^UvnO>#uJNtcw6E2)|H38Jz!QYy>oup>YEHv$t2rar zoLM@boR^r&&Bg;O19D^M>iAQOeB_+ec<$MG@zn`=_;soAbtrRRVs@u@?_a5qceldh zcO<5Mb6>}5tGuuG>9~05hCDDK`orSPJ=wn?4J=6e7MIS8POro?Z8~e0_sUMsx^v%} zbD!k&$xh$W*)0pz(3BcjeqChhwhYEyOMy1+Ok*V5XtfKAW`}}f`RNW$1x4;90C->F6#9We@OCocL6dQ(Or=)71 zTaB6EGqOzjz_ z#l{O5nBAL|)zG)n@}+ZW#<_F>|I$^S^(x<5l~1ZVAXgpOqHLyp1iWKQnv#6Fr0n6| z2Qx`y+Ew-NzU*pFTGDps_q*TiemE`J8)SO}%!;-0b4Qt2-ked~NYmUa0S8_DVj%Lx(oOV>MJKSpy_ws96+Vf4uE$Ty>vAuoadlw#FUb-MLH8N8p zGBs(&E9yhKb|0?IZtu_f#Qoh;(+Roh#1mF(IwS4wlXv%tO#Ozl{NeF;?mxJ{bT*wm z0pa9ZJqJK)=#e3fai__Fp&bg(AoH5ka7~6}$h4OwkBa3@V)LnV#lh9kmXUUx`V9r( zw+NQ{|ItjjJt;=qd;VvkPy597Gt$96`C#AEpmgw@)O=oUJ`Y*4=Ymu}EY}aiI5s>@ zsj*M@{=z7B4NC1pa{JITuhc#wwT#LwqhiZN2=d-blIIoK^NPsqN;e!6Ph6E6u3~5E zV4|wiPFP~2=^z0~<0h=n{^aT8>9nI^-O;?}Xio8x9vn@@|j84P37YD8jg$;>U0xkU<2VZl>U zbC2BIBig1gTn?!biJ6v}X_1*G1$iuZMr!VpoBKo?kKy`lH6k%_nTd-`eAC^&S|+;> zi}hWIBlWE-x8?c{v91#*s^O(4^pnOXM!Df6Ov0wUL2T?2kB*9V*wdrr@DBo|(=MWn zfTu5!qxgUdQJ@^9#tm!<%YZu4$eKVo$*^XCCZgyxqmpyHh_wQ=ur`2J)(+6dIsn?) zVqkonN^##jACJwVZ~~ggF~tan{a_Fjh}wnkMb7fe6#yRqlO<4oEmJVSlg7mgMtI8h zlLEyT<;R-j5=`)ftcChyh9^)vs8_y#_%S4XlFa}P*st{kR7(~uBQK(_0cu_fxMX_N zXH~3(J)J|2#7_8s-cKGF!admej9xXo7*gz{D6mYxzbH<^_8^;~*5a@aiVe=lD33*W zEHMcp-?SgA!NmEBkjalsasrg(5(w@i_yYt>2)+w|$96IYo_e-f{C?o#;(q^Pm2M$g zv*|an+7$pxRJwLgl6g>)u5V5mGS;Vh%WBZ&sf2VM6gOD zfF)|PrFFgK;#$i^spS>9;erkV+pK4FBJ9B1SNe&BHk!Q5X{th#nFOHqt5IgJS?{8XEwmHFBIcAD9`T& z0KAx3cR>xQ!>hrk6@OhT9z73X`LKEiNTwQkfO|tR13rLEz~N#-8m_ToiH7cycPUO) zxbSBah;;j=M@0aT0Wy6m+&9_uIcSX^1_1rq%=QaXM=FVg^UTj$V3pmQ4U1Q7FFpM^aA zX~G!Sh&8dA&>zEo`9#|@T(dyOHA0OgX7HWl(vSTXN#jcLOX&K*L@R;p+ximCb`HkBe!=z!vlQ`h2s zOtPXvgHU##NkvOzn7zj>Lmu}57ATE_u<3^b;II7_LmTLm8{$S{;5zLB{EpS z+QJpovXC(H^8kRjSJwkezTyRbJLyQ5R=j6l9+pdeNz>uePRI=>AZ@c||LT!v z2Sw+r7%Z=4#Eo32b7DRzL3Ga^Q z*^t1xXDGlnH`LHD&a zLj)vWMGNkqV*+2x8C*z{NaF+-fdvV3i;a4u*r5oL1^&ALP{^%#I3`!_ zlgjqXW&4xnboGJMQL*{}grtKIF*wfFEsblI#?(Q{(k5HlL`z$`$p@#u{o$}=X#_dI z;mXG&qt%;sls*_)cX-wuo>YYKtVRKjG@?f(6!AKJRR7A-?+APp5LQot)0#F5) zpu-d)ACRG5V5;~>AR8Kqpr{!wp2|nMYy}833D8&bLJ{E@50!71q?_A!5XDmD{LSOZW;mD6EFu=3UtbmEt`KA3XHp8 zZ0ygpv^!H5OibUjuAsg5!PJBQ4pM<(5*;FJql^E7B1L!e$HjG>O{L4+Yp&oPL+m@zH%Y{<7Plb!w*-AXzDx?U4eD*;#32TkT3 zyAwA;GL9s7zi*SeoU;GnXzG9VAGQ@a=Q6-m|SU5`L3uK=_t;cg0vC1U?iV2jg zkp;yX0;OOGj5{I4d?yf@5Aj6!;E<5{Y~(=8oqwvj(WX)+(CUViOq`saE;T*>@Z`B4 z;tP?`!9j^e6+gU$$3Bw-Fn0=aZ$sK*>$jUjq^5i?&>A=rYwD84=`vT+yy>cb@Awu~ zYCT0jvTvid{!#Uh-5{I)5XS@J=uL4~P}&ugcLg8X(#`FwwQ}=`_2%BS=3ep44G2>6 zgxowKmU=gQo#Np^$u}hXhPEhs8FT}HRI&GA-?BUHX<2zm_8gWy-A@3Vet3?Y<)H1d zR-W2CIq=Lap1cZSWnpDOyz%<7d0CKZ4yNn(tt79MpF`aX))qJU$L#r?Tn8Czl6PCnb0L^XHpR_wuCZY!WR^+u1-oR0-8~ z!$A+nJ3MBu9NYJT<23(>rc+RV`}K93uYislzSX-80yur)X^?;H?{GR8t-2B{qdIDE zQ-U#N-riquqcG{IHg`GSz&(Sf9J%soc^-u}vJ{*buw`?Mu)aMwud1_c!11y%L~Wn(6_)<>iScbedV_CnWD~o zCl|I@-_G}dQJm4va5z<|R7JL}{JT(qL*V#jN>%2U$j0@;{T0ef#SdolD5{C#UIrJj zwpxOQUl|)5YvmT88;cB{XWnT}>a#R5mHs+%my$(D1GSe$!=4H(0-Q(#n` zog#9CLWg*y<=T0xs%}1lq{V9eajQwPI9vQbkaKmTrtVSEj~yR5Qe!`TV|8Nv$k5u6 zp=Ufx-Z8P%10h{j zDVH^-uCA4}h-ED+_tx89UTb^#X_eG=R&F~BNgGx5@6DuYq^cIVs^y_^qkh+;W2wFm zPdz&I(4xM=B6*L?-s9`uzBO;((_YCtD0>HWt&gph`NT5c%Jh2c>9y9=Pfb$mpxinL zN!UX7%CzKtN%p>^*7>~3BQ|zRRVU=C6JqHJbr!ZLy7&T}_U>Ev9$WJs6OW&hyys=_ zd8o4K@~*plYc8MUIv~4<{o`|I4H!MFtvRrP#;(gN0sshq>wOmZo916Pt51^SitM-| zI<9QEYTi4soRD0HWY;0lQ5c@XwPk{4;m$X}!vFHXK;;lk{ff4owHtqBKXTS=d`8g_ ze`YoT3~GWIltrg-~9-K#0h1F`Eqj+5(-E^w~Vk)7KrM zJGYhJlAf?7=zQ%#PEQ~dXaqnWFpP4aLhnG_NA^G|o&-(2rsU`T3e!v~arh;~xqpvk zRYfHc{S`BAe(qMVSUZW2A9;P7(z>h~XSFjEarL0WV7g4Bxle-@ zGnUdoDUFt54Vdkw*PwhJtSdHowfVgq}Xl}XF(oqQbqjq{NyBvWLB1o&4G~w+)eBt+xhRtxLbgBigON( z9wBrtoCLc{jPn`DDsZR{@!1P)QU3{23k*`+2BzUoSBmgJA|D@MM0M%nQRkqYGFjbw zwRWiZRnH94VYE3>wASGHhkwTL~gY)~=^iz$8Ogve)kv zq8QduP`~I~)i(i?3M0;m8wpEv14dKwdyd9g(Axj|(cFR2l>VNhDf|C!G|qgR2wUKH z4+4f6sRHpUc8xBcY@3b7+qgE(fw&EwfZIlT$A){)fH@&bR62fOSLY*fzD@HkeiaGE z;)!jwwE}wgII9!cS)ItARdW0&MPq|kh__|! zjv7>S2v^Ft&p-EXfC4F2tc)T9&;48Sd}+*|k+xdSML|awW<5ySGa^{42b|Pne8{s8 zq`7?_5XSa_=++9}uLaW8ub?pyU6!y-3pgc$v@?HCF+*&6E~r@1rCpU=N{Kd*_vfYX zfjb}@yy3Ch7D=N9EK1?Y6e*k zye5K2MEwnb7_ma8g=8MX%+n;(uV#W~t@09$$yZ78n+S`_ABNV7>cyh^jf$%GhCUen z!EmZ}<(yP;RIWIhv~E<@yf^hh#3!C4I$j)*iEFb&FQ_|CRPWFR2l02c(igxnxkZ45sZB4{xPT zi1t>IX;qI~jhTTd*Yt+}3LXlXAU*Rv0mpK}oB+Fqjs-$OM}wUObSi@BvEgG7GCn~; zgIopd5|#zF>IG$O+uAk0u&fNi|Owkcqk z!&}8|k~D zc_8vOeAAGmGmbP=p(727V}S_lZ5z(lNMMflfKxPlEy3ew3iHumh>L^Yv`77dh4gtKkK?g4*k$Czz38np-msW*)meW4Sy}jC`8nxjP*WPD4kdI1HZDo&|#q_7?`I z?=tvA<@fn=C_c|cwQdiC-3RO7sBm=py85Y5XK4EX)E?_>Y{*p;8(RxRG{+)m0zN;P zffV)1_tgOBqSBmmQ4$Kk7CQqsg${F@aL;~`qDqN_kg~WB#+;dL;;jCn&qEl%1meE+ z9Q=yxLuyzlOAg84vD+v+z~MKOs0wkt&^UZ5QKKPB!IG-|L45SQNxGxBcIXNBKcE2q zvajB%q8w*w?Oh`1g6u<*eMpA4Geg@Y6HQ)3^Mu~hCSer7d+hakV)$i13NfZ&jN31i zDD|8y7;4=*{~Ppqf$!tdX#Ln!AW@nHGi)VuU?IDeuFe5g!Y)E|B}5A`t{)hIVe;9I z2Q{T0a9@Zlc*p^c-$;>9$adN>!Fkwc@Ig#I^;5Aulat>va3A`&f_RzTDVbMgQK8$@ z>%q?(5Uu0$@ckNp&=U&e_6znopt3Kj$BhTKNEkl&f}X4W&bFKrpu0V1kRSD+Fs*(K z15=P2d4?bqY_43sI<9##eQaJmb;fWywRh*w8IrHDL83|^LOrLz8pFoegqx~NA>w%v ze1fsVWGc>rnIT7z;>?zXwIouy>aVKKh3bM*k?E;!ZMJ)y?pKTHO8F57R42?3n_Uu) z0t_<4kH<4{iP~~g)!@b7xpWaYs0LJ7nqDS4oR?|IQkS$pyt-+rUbh@rvm98t@T5|* zoR%%Xcws;<^yXCTX^VJmT%5cmE-b=XGGZ9TBgK6Yj}(A#qyPjrY4n6<8pz%n3+D47 zIB(=M`5r_HHkVn)7l3G`-$L*izWD`;!~#1Nd}6P0CSY+i<5mNX1>T$cM-kw)0ag=m z9D=X2R2Jdyphv(uZ^Dhx5LJmr7TU`FPe@hD(L+do%&1lMJPG%|u!-*@_$L56amFn# zT){%($8$zlKM$^46E<~i8C0o8RuF&$3FpK zP?fh;q~4t{4_2dAO^M4B*KP9Ikg6K{du)tweaLgEYlVnTW~xB137#OTvb2Y@K@kWP zKLQZGvY_cLhM}1)3W1HHZA(`COH<`yHl!(+mcYq23wF z4-8vvkVUk+2I{r}>>pl$Sfp76@eiYa>K|?4EgG-?6qq5VATfXe0mY*5pF)8XG)Nb1 z&mBI8k~Ys?&YZdT&bjxV`h-sTP9xcbq5$Dj#|I3?lakmRKTK zk~KugC4=@fE*Z5awM4?Av1HgR4BnRsxh>l2$}O;#SbgoMz~)- z0eX)Ri4-LE2K~s9gh;0LGC@uypMdMmhiisHOt2JU1R)x(6;=qLp9y`6B}5bOnhGUv zL+ob;Pa#Pp&8tMx0=HGyX0fk&$xad^Arw~&L0rS*poNsJ5G#M~EiIa-hN*&s-fBHE{Iz>yWW}=JvFjX)s>TQx zQ!Tg|o)c92RYnM{1$j_lswEZ=t|Eq2t*h&|^2LBh2**}=wQ7aog&?%W1~(X_QX66J zhH6X1_&-Z<927PPan&qvF+L72qZ|`c%?w(NGB;H!EO1dst*>)27U3@ygfm6sEEiGj zk??gc5?+hPS=E7M3P$1#%OTawavR|gr`lq{P=w)mj>l0=Scp1tZ{pElIK~1KuhwBD zi)9OkK%)n>G2_)5oXdoS1QRJ_8N=*N)udIU+5`@Igd0`uT2w2hI`Z{^7UwmMUS27x z1MyKkCJlIc$RO{6r*Cig>c-ij<;y&W_~nA>Se}n@Qz*Q_`HuN6$KZ+huyD&4U-6xd z$6{Pa2!|5F@>D#Oh;lK3U!DyMYl*ANlfjE~!37k*1{}+i5MR&76GAw`FY|m5PjrG2 zK;qu@TWUQo#6xQ^Tj5aI8#DxR;t!KQ2McSH$WpfV&%b;Ar*rSkZO&yW=lk>T&dXH2 zLe)!D{quV72Y>pvXSW@Se^~AqQ94HC`cb8RR2pL*-+DHzTwvsJ1`-QGr4ts$J=rLXzFt6^7d)D z^Q6*wQuYog_!{22kZtYGwDzT2`yMSyt$lLqpwc>+X&p_sj>@g$O6&O6{0{BQ(66NF zSGKEVdQhPUC3-MRw`Ay!G~JO}`c3fHLAi54=^T*hQwn`b(ym<>?xL|U9$NQ5!^87= zag0hsQW5*|%P%W(_Dg=x+P?4-zHw=ML0Vjq)^A8{H|4gQ3Q($f^W4_PU35RNOA`%k z*;;qjd1%*SqRqQRj+o5+DOgg!9cY^wH2!n_+3}fn)6;qy5>MNynNHi&P9u){NgVIi z;)B%8h~?=x2{cqh15ig1$?Sr5{Na%dAPW=RW}Qtf>VCa#($Iw&WXgvU!bz1&TzFl9 znzYhIYmE_--U0#c_^)MXIcZ5+MXL^Dl;Nsk1)u*l%5meG4Iu1Zed$2^37EP)B@e-XX8|Bhy=;tz3h7u3r(aQ;V z!+b!C_U}^LoiG2N@kU#zU)PGrOEdv4GwWAD(j@H9Yu<;~ESjzr@fll&)J60D`PVgk zgR0B%Pv!3HT~c_ZB&`T-u-1pReb7-=(uon0&|S{0lOEe(Er^tU{vO&ZdR-$NEAYbC zSOaUXv6L>wSmPcX`va{n*71rBg3;|rj^RGv1c>vbn-kdMPU)30Y0oezC6<$4ex2Jk?$ZD5xVjKqK z=dW#@*-Gf)*U3i>4@WaSqv@W}$Bf)Fq4Z2VIV`)U758+;{aV`n+Or1Py`Z=kw$5d} zZ7Iux-i)^=?d{o~k-hzjw}0!xu9;}+JP>)oTVGG6Es$;tJYuA_fZR5yv;nwk-nA1| z?K#LL`3E!pk+gqA_Kzw4v5bEz?VpnUGm3vkre_s;R-$LKbW?^toTd+_hBHS_q>r4C zj|?bB^k}RFchp!I0&R#s!{hWhCR*}9qM@;`<@%nrebGxaw>}t?j*dNfU22+@n`V`! zSpeA28{8knQo+XosbN%Z7*!fZH|GJDS(_D0d#YWrbZl2EewZG`KasXfNS2A`PPgRw ziR@$)CnH&zFJO$vAOD54z)G=%)V3kFZ74uXAn*5H|90D_hySJf**SUgO=a>;$$Lfi zUQxVPB&zYbvq@_H(W9nJU@9G$dR8k1rsTk!5}3;b7Se$QIk2b%7Bhjj(t)?+z>*SJ zlGdVkofcjH9ctibkK<7 z!z7N6;P_JqHB)VQT1x``5BNeL?2PXZB|ijB0Z@`mXH+2VoHSj+3y_lzX&#O{)J zZhTv?E1CfQnD)RQ4f)hm;tY%?&{<55V1kXiYRQ{u4NIUYoL>YPfD(A&7GMZtI3}P; z{7!=%uRyH3N~N!3M-`tlzu4=Qk5xge&ofqxGD1PX9) z^ZyA)A=tY^zkl)Fi$9%zZ+>$=OVwnkhBVb6Q*MQFV`oi`Yx8^=R&bg|!S`Fu8>>KC@p8UE&(L)`W4Ts8Bc%O)Bo5YdHQ9~u;Llcc*fJ7aoKZ5 z@tnzcX49To*>g_uoRcmF)1F|)y_$Bf{@dDh*&R{bk*#xJ_FB49VY#_aY3|!4s4D0w zkiW9tCGVchy4+GzZ`#!>xq7p8t(m&cbX{jErgWXobWNnYCY~%xT@!NGYf9H^&uWye z3z@Ekbk~C1wWxG0%5`rjb#F-8wNof7b%A$E=^B>nMwGe{NxMF?;YM9BqPxbzKx?D@ z86NV_G0~C-5)F3j!{sCB2JpG@7|j3E^w6xy_&XCdYqR{0CV_@D^#!kRFvE>a&dYy* z<#CSy69%Vu?}7h+s=|AbG_nTBC->x20+cKS(U>Yr6>=-jkMs&6RW47lh^|)Z+t;dZ zBWV&%pkl9Mb1ND*2y_^p6g*qOxF2A0MWbjgDa@4$Zw;t5f4;8oQ^)zd94&uD4(e;< zh=};)(#V{Hp|@k@u*mW8$OdftVAHe`0oQkF11Q{x`@nV27q{|Ri53%Ph8_8kX2hN1FxjEymN=fA|({T>m9@8cb- zX0FMk0pN<^Zwz;AZxA07Cb(MACaaHRoE6{RjyphgN43AboIv!(Om=qjx#{lZnCa#KiwIGEAg?C3MW;M6rrjXjyhzI0=s+<07RJTANX z6<2@8bvo@j{f|eavBk{TmGsyZdF*Xv>}`2yNf}y_T|ZV_Ki-_%ad+$zR?}N#j&9Le zN2ApABguiUM|0BDTha-9flO6!@NgteA4x4s{j<+#rT?IYGdnPf2Yd0^ic{+(DZ+nBN?JGtrS}H?rXp!5--L+< z)>ta9w+a<}WghN7%4a&LKGw`yG_RfY_V5COc+SW8G)G!^B@E6hSoWZM@V_>|Q76RV zC0>${%OLHr4^$)yhxp#FNc)TbfrH)`SY5opy5|MnyIn#PK)Vg#{#uWO1=X|yN1_58 z^up<`_E!*7Bpib?PddtkW5Il7x=&bbTE|l5+;~Nqg1O>o6Ka0|H;+H`O=5?x{mZ#u z%-ye(=~jhq-JAwYbiek4)Az>zdR%sOD6Wppxi73lL(~0{ZSujmT;HSA_dKs{yiccI ze^4#g`juM$bEoU>T&m`SdD+>aI6Jbg#)>_DPCt<7b!f7?UbO^+Ec^i`m_zH+keTx( zVmBfjUtW)J41%Lf5E#6T9M5Bbiw8-qF1*Wk8{ttyc+?PHd73jY=Ofhoe)zv2Djnv7 z%mxz%OA_I9d@5pE3q4Dzs%;Qs(**GQ7&E=7?P zphzHgf^hCnCOdr}azEf%6 zDcN^g@tu|$rtYrXU3r|4ZPSWvdegj1wUKZ*S0t009Dik#+D_uFovh>7PP$39fUB**h_uWsMQVqX z*A`^J0bIa`9JD}+80{9&p-3GhhaA(QK=)4|7X%s*2vD@>!8ZdzFMH}6N}@=4?R8L! z9KD(M-tf(vnfIH=KQ}i=D7dD~HDRHNqW(=PwTr0|?XD<(Gp~GJ zZJP2(qM8=?1o8B9(0u7=H^@3~C^AKg_w#{Q!RMg=(&?i?o{Z4G_`tq41Rc51zH%W) zF1)W?xF+`;h?lhr)nJ`twLNll;E~gOq{r!#Z{njr;RUzXXujE{dx&rOAl+7%uAguF zAl-JCZU=w(Cv@vG7u(m4Z37!1qdE#)e2ULc+~P^PR{bi=r&BGXjUnG$ik0p{8cHsR zGAE^Sm3lI-iISRJ64g8yT(BM2ms_!Vh#OM&fIvHy6Yzq9vz(k3@EI^a3Xl3*SP5&? zyXN-N!0*zVz27KLl`Y!nj2q3pW^?b_&Hn}{e`tNW^tFzT8|b)+jsx)y1=nwv=+Z!m zGf=mQx^>h|+NL%>FU^+^Z>4l})j(HGbQK7~xV51e=$MI)>F5}#Pj4)H6sI?q4Ag0& zP91gH{KqrvGix)Im+$c=J$81h$&3xZ=D$0yqd5c3nP?7(sv(3OGGha;hu)d!ayfYbW%qr zNyF~ZtQe@rL_Ip{vH6wJ{09Y1bl5Y_smIgn(`)wcJBF0U=SNBZQl=S@x@eAB*uKiF8w z9NU-I3Yvl$h}La~BTh?AlA`$LU2k@C4;2}{rLOx2>W{luh+DoDTY6HpW)HX_+@gbBtKx;zlY-hpV#-WRh%+=s~! zaT_T*NO6*gAKi^CbsaziLDg!J9?%bJHT)SVGC!rFZoLKR4ZDtSvGQbRRpAa5*$ zt9S`NEYN$JFpmQcVr=xS; zP__#I-nL)dp=kdcT`4yL??O#aLZvRfCtanV3WOmsv?N1&!hhBl++Uz^{vn#GgfPLOT7fb|xxM1@x zmZb8}%}!9d2)BXyP1LW~ZhRKJ{o`p31Mz6@h|M#+?^Q!wOG4CJWmbJ887oqg4ad7x z${W>y$UxbI9`dea{vZ6 zI2RxBS_F~I({I^HWE>SBcE{2NE!U{(dVI9;U3~M8dM~ht_Zm{O)j)&y0u9~^?%}g03K&BS#H~8 z)!4D!bJiiKN`mJ5V^BP!GO!1}h^*2@ny2b(@*Cz!6QnORCMSd!)CFdymN&@m4rN_o9 zcllI_-yC?FaT@Gxf8VyjqB9Vw{hTaA(!p@cmTw`e70!No7rvSBg2V!3@FE=e69YLkY$}tp+N=6*Rv1{7W|eG5$O7%Fg2~-` ztv5UivZL^*`3gR_c07%4{<2JOUDjKNjMgEub?EV}wTX?xI{~WmDp_v5qfi>LF*7!{ zHuo;p_4GI8IU_b?#)hC8J-V6Nl8xw9GkSIH1_8<2CI9CAGQTzOr_AdiH-p&0WBR!fqi59Y86^#^Z27o8IBB%rFk5e|-Ky9ZhTQPf)>p62zE*WK zYM@aQf$*sFwu*h%40PQ@*L8H=VI0)H{ZlM^k;a>?U@FBc=~T+{%lT|>1z&?%vD)A> zC7C-(dM6&X}>URNq!P;51eGaoH8{0dO91<#F zDKwT<_`D=s!}p<$L{W7eiX9(K(>n;!Xon(2grYlis=ac*rH<>~`z;mIYxi5~g6_R{ aP&eJOLpjB82R*PuIYn agreement_threshold: + new_labels.append(gp_predictions.mean.argmax(dim=-1)[i].item()) + else: + new_labels.append(kmeans_predictions[i]) + + return new_labels + +def resolve_conflicts(semi_supervised_samples, threshold_based_samples): + """ + Resolves conflicts between two sets of labeled samples. + + Args: + - semi_supervised_samples: Labeled samples from the semi_supervised_labeling method. + - threshold_based_samples: Labeled samples from the threshold_based_labeling method. + + Returns: + - resolved_samples: The resolved set of labeled samples. + """ + resolved_samples = [] + + # Create dictionaries for quick lookup + semi_supervised_dict = {segment_name: label for segment_name, label in semi_supervised_samples} + threshold_based_dict = {segment_name: label for segment_name, label in threshold_based_samples} + + # Combine all unique segment names + all_segments = set(semi_supervised_dict.keys()).union(set(threshold_based_dict.keys())) + + for segment_name in all_segments: + if segment_name in semi_supervised_dict and segment_name in threshold_based_dict: + # If there's a conflict, resolve it here. For simplicity, we're taking the label from semi_supervised + # You can implement other strategies like majority vote, confidence weighting, or agreement only + resolved_samples.append((segment_name, semi_supervised_dict[segment_name])) + elif segment_name in semi_supervised_dict: + resolved_samples.append((segment_name, semi_supervised_dict[segment_name])) + elif segment_name in threshold_based_dict: + resolved_samples.append((segment_name, threshold_based_dict[segment_name])) + + return resolved_samples + +def parse_classification_report(report): + """Parse a classification report into a dictionary of metrics.""" + lines = report.split('\n') + main_metrics = lines[-2].split() + + # Assuming the last line is like "accuracy: x macro avg y1 y2 y3 y4" + return { + 'precision': float(main_metrics[3]), + 'recall': float(main_metrics[4]), + 'f1': float(main_metrics[5]), + 'auc_roc': None # AUC-ROC is not part of the classification report by default + } diff --git a/BML_project/utils_gp/visualization.py b/BML_project/utils_gp/visualization.py new file mode 100644 index 0000000..3ecf59b --- /dev/null +++ b/BML_project/utils_gp/visualization.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon Dec 18 18:20:55 2023 + +@author: lrm22005 +""" +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +from sklearn.metrics import confusion_matrix + +def plot_training_performance(train_loss, validation_metrics): + epochs = range(1, len(train_loss) + 1) + + # Plot training loss + plt.figure(figsize=(14, 6)) + plt.subplot(1, 2, 1) + plt.plot(epochs, train_loss, 'b-', label='Training Loss') + plt.title('Training Loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + + # Plot validation metrics + plt.subplot(1, 2, 2) + plt.plot(epochs, validation_metrics['precision'], 'r-', label='Precision') + plt.plot(epochs, validation_metrics['recall'], 'g-', label='Recall') + plt.plot(epochs, validation_metrics['f1'], 'b-', label='F1 Score') + plt.plot(epochs, validation_metrics['auc_roc'], 'y-', label='AUC-ROC') + plt.title('Validation Metrics') + plt.xlabel('Epochs') + plt.ylabel('Metrics') + plt.legend() + + plt.tight_layout() + plt.show() + +def plot_results(results): + plt.figure(figsize=(12, 5)) + plt.subplot(1, 2, 1) + plt.plot(results['train_loss'], label='Train Loss') + plt.title('Training Loss Over Time') + plt.legend() + + plt.subplot(1, 2, 2) + for metric in ['precision', 'recall', 'f1']: + plt.plot(results['validation_metrics'][metric], label=metric.title()) + plt.title('Validation Metrics Over Time') + plt.legend() + plt.show() + + test_metrics = results['test_metrics'] + print("Test Metrics:") + print(f"Precision: {test_metrics['precision']}") + print(f"Recall: {test_metrics['recall']}") + print(f"F1 Score: {test_metrics['f1']}") + print(f"AUC-ROC: {test_metrics['auc_roc']}") + +def plot_comparative_results(gp_vs_kmeans_data, original_labels): + fig, axes = plt.subplots(1, 2, figsize=(14, 7)) + + # Plot 1: Confusion Matrix for GP Predictions vs Original Labels + gp_predictions = [pair[0] for pair in gp_vs_kmeans_data] + gp_predictions = np.concatenate(gp_predictions) + cm_gp = confusion_matrix(original_labels, gp_predictions) + sns.heatmap(cm_gp, annot=True, ax=axes[0], fmt='g') + axes[0].set_title('GP Model Predictions vs Original Labels') + axes[0].set_xlabel('Predicted Labels') + axes[0].set_ylabel('True Labels') + + # Plot 2: Confusion Matrix for K-Means Predictions vs Original Labels + kmeans_predictions = [pair[1] for pair in gp_vs_kmeans_data] + kmeans_predictions = np.concatenate(kmeans_predictions) + cm_kmeans = confusion_matrix(original_labels, kmeans_predictions) + sns.heatmap(cm_kmeans, annot=True, ax=axes[1], fmt='g') + axes[1].set_title('K-Means Predictions vs Original Labels') + axes[1].set_xlabel('Predicted Labels') + axes[1].set_ylabel('True Labels') + + plt.tight_layout() + plt.show() diff --git a/pytorch_file_generation_loader_update.py b/pytorch_file_generation_loader_update.py new file mode 100644 index 0000000..cea6242 --- /dev/null +++ b/pytorch_file_generation_loader_update.py @@ -0,0 +1,374 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon Dec 18 17:11:29 2023 + +@author: lrm22005 +""" + +# import os +# import pandas as pd +# import numpy as np +# import torch +# from PIL import Image +# import torch + +#### From first to the last +# def preprocess_and_save_data(data_path, output_path): +# # Make sure the output directory exists +# if not os.path.exists(output_path): +# os.makedirs(output_path) + +# # Traverse the directories for each UID +# for uid in os.listdir(data_path): +# uid_path = os.path.join(data_path, uid) +# if os.path.isdir(uid_path): +# # Make a corresponding directory in the output path +# uid_output_path = os.path.join(output_path, uid) +# if not os.path.exists(uid_output_path): +# os.makedirs(uid_output_path) + +# # Process each file within the UID directory +# for file in os.listdir(uid_path): +# if file.endswith('.csv') or file.endswith('.png'): +# file_path = os.path.join(uid_path, file) +# if file.endswith('.csv'): +# data = pd.read_csv(file_path).values +# else: # if file.endswith('.png'): +# data = np.array(Image.open(file_path)) + +# data_tensor = torch.tensor(data, dtype=torch.float32) +# output_file_path = os.path.join(uid_output_path, file.replace('.csv', '.pt').replace('.png', '.pt')) +# torch.save(data_tensor, output_file_path) + +# # Define your input and output paths +# input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv' +# output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format' + +# # Run the preprocessing function +# preprocess_and_save_data(input_path, output_path) + +#### From the last to the first +# def preprocess_and_save_data(data_path, output_path): +# # Make sure the output directory exists +# if not os.path.exists(output_path): +# os.makedirs(output_path) + +# # Traverse the directories for each UID +# # Get the list of directories and sort them in descending order +# uids = sorted(os.listdir(data_path), reverse=True) +# for uid in uids: +# uid_path = os.path.join(data_path, uid) +# if os.path.isdir(uid_path): +# # Make a corresponding directory in the output path +# uid_output_path = os.path.join(output_path, uid) +# if not os.path.exists(uid_output_path): +# os.makedirs(uid_output_path) + +# # Process each file within the UID directory +# for file in os.listdir(uid_path): +# if file.endswith('.csv') or file.endswith('.png'): +# file_path = os.path.join(uid_path, file) +# if file.endswith('.csv'): +# data = pd.read_csv(file_path).values +# else: # if file.endswith('.png'): +# data = np.array(Image.open(file_path)) + +# data_tensor = torch.tensor(data, dtype=torch.float32) +# output_file_path = os.path.join(uid_output_path, file.replace('.csv', '.pt').replace('.png', '.pt')) +# torch.save(data_tensor, output_file_path) + +# # Define your input and output paths +# input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv' +# output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format' + +# # Run the preprocessing function +# preprocess_and_save_data(input_path, output_path) +###################################################################################################################################################### +#### First to last +import os +import pandas as pd +import numpy as np +from PIL import Image +import torch +from concurrent.futures import ThreadPoolExecutor + +def preprocess_file(uid_path, file, uid_output_path): + file_path = os.path.join(uid_path, file) + + if file.endswith('.csv'): + data = pd.read_csv(file_path).values + elif file.endswith('.png'): + data = np.array(Image.open(file_path)) + else: + return + + data_tensor = torch.tensor(data, dtype=torch.float32) + base_name, extension = os.path.splitext(file) + output_file_path = os.path.join(uid_output_path, f'{base_name}.pt') + torch.save(data_tensor, output_file_path) + +def preprocess_and_save_data(data_path, output_path): + # Make sure the output directory exists + if not os.path.exists(output_path): + os.makedirs(output_path) + + # Traverse the directories for each UID + for uid in os.listdir(data_path): + uid_path = os.path.join(data_path, uid) + if os.path.isdir(uid_path): + # Make a corresponding directory in the output path + uid_output_path = os.path.join(output_path, uid) + if not os.path.exists(uid_output_path): + os.makedirs(uid_output_path) + + # Create a ThreadPoolExecutor for parallel processing + with ThreadPoolExecutor() as executor: + files_to_process = [file for file in os.listdir(uid_path) if file.endswith(('.csv', '.png'))] + for file in files_to_process: + executor.submit(preprocess_file, uid_path, file, uid_output_path) + +# Define your input and output paths +input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv' +output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format' + +# Run the preprocessing function +preprocess_and_save_data(input_path, output_path) +###################################################################################################################################################### +#### Last to first +import os +import pandas as pd +import numpy as np +import torch +from PIL import Image +from concurrent.futures import ThreadPoolExecutor + +def process_file(uid_path, file, uid_output_path): + if file.endswith('.csv') or file.endswith('.png'): + file_path = os.path.join(uid_path, file) + if file.endswith('.csv'): + data = pd.read_csv(file_path).values + else: # if file.endswith('.png'): + data = np.array(Image.open(file_path)) + + data_tensor = torch.tensor(data, dtype=torch.float32) + output_file_path = os.path.join(uid_output_path, file.replace('.csv', '.pt').replace('.png', '.pt')) + torch.save(data_tensor, output_file_path) + +def preprocess_and_save_data(data_path, output_path): + if not os.path.exists(output_path): + os.makedirs(output_path) + + uids = sorted(os.listdir(data_path), reverse=True) + for uid in uids: + uid_path = os.path.join(data_path, uid) + if os.path.isdir(uid_path): + uid_output_path = os.path.join(output_path, uid) + if not os.path.exists(uid_output_path): + os.makedirs(uid_output_path) + + # Use a ThreadPoolExecutor to process files in parallel + with ThreadPoolExecutor() as executor: + # Create a list of tasks for the executor + tasks = [executor.submit(process_file, uid_path, file, uid_output_path) for file in os.listdir(uid_path)] + # Wait for all tasks to complete + for task in tasks: + task.result() + +# Define your input and output paths +input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv' +output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format' + +# Run the preprocessing function +preprocess_and_save_data(input_path, output_path) +###################################################################################################################################################### +###################################################################################################################################################### +###################################################################################################################################################### + +from torch.utils.data import Dataset, DataLoader +from sklearn.preprocessing import StandardScaler + +class CustomDataset(Dataset): + def __init__(self, data_path, labels_path, UIDs, standardize=True, data_format='csv', read_all_labels=False): + self.data_path = data_path + self.labels_path = labels_path + self.UIDs = UIDs + self.standardize = standardize + self.data_format = data_format + self.read_all_labels = read_all_labels + self.refresh_dataset() + + def refresh_dataset(self): + self.segment_names, self.labels = self.extract_segment_names_and_labels() + + def add_uids(self, new_uids): + unique_new_uids = [uid for uid in new_uids if uid not in self.UIDs] + self.UIDs.extend(unique_new_uids) + self.refresh_dataset() + + def __len__(self): + return len(self.segment_names) + + def __getitem__(self, idx): + segment_name = self.segment_names[idx] + label = self.labels[segment_name] + time_freq_tensor = self.load_data(segment_name) + return {'data': time_freq_tensor, 'label': label, 'segment_name': segment_name} + + def extract_segment_names_and_labels(self): + segment_names = [] + labels = {} + + for UID in self.UIDs: + label_file = os.path.join(self.labels_path, UID + "_final_attemp_4_1_Dong.csv") + if os.path.exists(label_file): + label_data = pd.read_csv(label_file, sep=',', header=0, names=['segment', 'label']) + label_segment_names = label_data['segment'].apply(lambda x: x.split('.')[0]) + for idx, segment_name in enumerate(label_segment_names): + label_val = label_data['label'].values[idx] + if self.read_all_labels: + # Assign -1 if label is not in [0, 1, 2, 3] + labels[segment_name] = label_val if label_val in [0, 1, 2, 3] else -1 + if segment_name not in segment_names: + segment_names.append(segment_name) + else: + # Only add segments with labels in [0, 1, 2, 3] + if label_val in [0, 1, 2, 3] and segment_name not in segment_names: + segment_names.append(segment_name) + labels[segment_name] = label_val + + return segment_names, labels + + def load_data(self, segment_name): + data_path_UID = os.path.join(self.data_path, segment_name.split('_')[0]) + seg_path = os.path.join(data_path_UID, segment_name + '_filt_STFT.csv') + + try: + if self.data_format == 'csv' and seg_path.endswith('.csv'): + time_freq_plot = np.array(pd.read_csv(seg_path, header=None)) + time_freq_tensor = torch.Tensor(time_freq_plot).reshape(1, 128, 128) + elif self.data_format == 'png' and seg_path.endswith('.png'): + img = Image.open(seg_path) + img_data = np.array(img) + time_freq_tensor = torch.Tensor(img_data).unsqueeze(0) + elif self.dta_format == 'pt' and seg_path.endswith('pt'): + time_freq_tensor = torch.load(seg_path) + else: + raise ValueError("Unsupported file format") + if self.standardize: + time_freq_tensor = self.standard_scaling(time_freq_tensor) # Standardize the data + + return time_freq_tensor.clone() + + except Exception as e: + print(f"Error processing segment: {segment_name}. Exception: {str(e)}") + return torch.zeros((1, 128, 128)) # Return zeros in case of an error + + def standard_scaling(self, data): + scaler = StandardScaler() + data = scaler.fit_transform(data.reshape(-1, data.shape[-1])).reshape(data.shape) + return torch.Tensor(data) + +def load_data_split_batched(data_path, labels_path, UIDs, batch_size, standardize=False, data_format='csv', read_all_labels=True, drop_last=False, num_workers=4): + dataset = CustomDataset(data_path, labels_path, UIDs, standardize, data_format, read_all_labels) + dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=drop_last, num_workers=num_workers, prefetch_factor=2) + return dataloader + + + + + + +import os +import pandas as pd +import numpy as np +from PIL import Image +import torch +from concurrent.futures import ThreadPoolExecutor + +def preprocess_file(uid_path, file, uid_output_path): + file_path = os.path.join(uid_path, file) + + if file.endswith('.csv'): + # Ensure that the CSV file is read without an index or header + data = pd.read_csv(file_path, header=None).values + # Check the shape of the data and log if it's not 128x128 + if data.shape != (128, 128): + print(f"Warning: File {file_path} has shape {data.shape} instead of 128x128.") + elif file.endswith('.png'): + data = np.array(Image.open(file_path)) + # Check the shape of the image and log if it's not 128x128 + if data.shape != (128, 128): + print(f"Warning: Image {file_path} has shape {data.shape} instead of 128x128.") + else: + return # Skip files that are not CSV or PNG + + # Convert data to a 128x128 tensor + data_tensor = torch.tensor(data, dtype=torch.float32).view(128, 128) + base_name, extension = os.path.splitext(file) + output_file_path = os.path.join(uid_output_path, f'{base_name}.pt') + torch.save(data_tensor, output_file_path) + +def preprocess_and_save_data(data_path, output_path): + # Make sure the output directory exists + if not os.path.exists(output_path): + os.makedirs(output_path) + + # Traverse the directories for each UID + for uid in os.listdir(data_path): + uid_path = os.path.join(data_path, uid) + if os.path.isdir(uid_path): + # Make a corresponding directory in the output path + uid_output_path = os.path.join(output_path, uid) + if not os.path.exists(uid_output_path): + os.makedirs(uid_output_path) + + # Create a ThreadPoolExecutor for parallel processing + with ThreadPoolExecutor() as executor: + files_to_process = [file for file in os.listdir(uid_path) if file.endswith(('.csv', '.png'))] + for file in files_to_process: + executor.submit(preprocess_file, uid_path, file, uid_output_path) + +# Define your input and output paths +input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv' +output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format' + +# Run the preprocessing function +preprocess_and_save_data(input_path, output_path) + + + +def preprocess_and_save_data(data_path, output_path): + if not os.path.exists(output_path): + os.makedirs(output_path) + all_uids = [uid for uid in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, uid))] + for uid in reversed(all_uids): # Reverse the list of directories + uid_path = os.path.join(data_path, uid) + uid_output_path = os.path.join(output_path, uid) + if not os.path.exists(uid_output_path): + os.makedirs(uid_output_path) + with ThreadPoolExecutor() as executor: + files_to_process = [file for file in os.listdir(uid_path) if file.endswith(('.csv', '.png'))] + for file in files_to_process: + executor.submit(preprocess_file, uid_path, file, uid_output_path) + +def preprocess_file(uid_path, file, uid_output_path): + file_path = os.path.join(uid_path, file) + if file.endswith('.csv'): + data = pd.read_csv(file_path, header=None).values + if data.shape != (128, 128): + print(f"Warning: File {file_path} has shape {data.shape} instead of 128x128.") + elif file.endswith('.png'): + data = np.array(Image.open(file_path)) + if data.shape != (128, 128): + print(f"Warning: Image {file_path} has shape {data.shape} instead of 128x128.") + else: + return + data_tensor = torch.tensor(data, dtype=torch.float32).view(128, 128) + base_name, extension = os.path.splitext(file) + output_file_path = os.path.join(uid_output_path, f'{base_name}.pt') + torch.save(data_tensor, output_file_path) + +# Top-level script execution: +input_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\TFS_csv' +output_path = r'\\grove.ad.uconn.edu\research\ENGR_Chon\Dong\MATLAB_generate_results\NIH_PulseWatch\PT_format' +preprocess_and_save_data(input_path, output_path) diff --git a/semisupervised_method.py b/semisupervised_method.py index 547ffcd..2608cd4 100644 --- a/semisupervised_method.py +++ b/semisupervised_method.py @@ -64,18 +64,13 @@ def __init__(self, data_path, labels_path, UIDs, standardize=True, data_format=' self.standardize = standardize self.data_format = data_format self.read_all_labels = read_all_labels - self.transforms = ToTensor() self.refresh_dataset() def refresh_dataset(self): - # Extract unique segment names and their corresponding labels self.segment_names, self.labels = self.extract_segment_names_and_labels() def add_uids(self, new_uids): - # Ensure new UIDs are unique and not already in the dataset unique_new_uids = [uid for uid in new_uids if uid not in self.UIDs] - - # Add unique new UIDs and refresh the dataset self.UIDs.extend(unique_new_uids) self.refresh_dataset() @@ -85,10 +80,7 @@ def __len__(self): def __getitem__(self, idx): segment_name = self.segment_names[idx] label = self.labels[segment_name] - - # Load data on-the-fly based on the segment_name time_freq_tensor = self.load_data(segment_name) - return {'data': time_freq_tensor, 'label': label, 'segment_name': segment_name} def extract_segment_names_and_labels(self): @@ -117,36 +109,24 @@ def extract_segment_names_and_labels(self): def load_data(self, segment_name): data_path_UID = os.path.join(self.data_path, segment_name.split('_')[0]) - seg_path = os.path.join(data_path_UID, segment_name + '_filt_STFT.csv') - + seg_path = os.path.join(data_path_UID, segment_name + '_filt_STFT.pt') try: - if self.data_format == 'csv' and seg_path.endswith('.csv'): - time_freq_plot = np.array(pd.read_csv(seg_path, header=None)) - time_freq_tensor = torch.Tensor(time_freq_plot).reshape(1, 128, 128) - elif self.data_format == 'png' and seg_path.endswith('.png'): - img = Image.open(seg_path) - img_data = np.array(img) - time_freq_tensor = torch.Tensor(img_data).unsqueeze(0) - else: - raise ValueError("Unsupported file format") - + time_freq_tensor = torch.load(seg_path) if self.standardize: - time_freq_tensor = self.standard_scaling(time_freq_tensor) # Standardize the data - + time_freq_tensor = self.standard_scaling(time_freq_tensor) return time_freq_tensor.clone() - except Exception as e: print(f"Error processing segment: {segment_name}. Exception: {str(e)}") - return torch.zeros((1, 128, 128)) # Return zeros in case of an error + return torch.zeros((1, 128, 128)) def standard_scaling(self, data): scaler = StandardScaler() data = scaler.fit_transform(data.reshape(-1, data.shape[-1])).reshape(data.shape) return torch.Tensor(data) -def load_data_split_batched(data_path, labels_path, UIDs, batch_size, standardize=False, data_format='csv', read_all_labels=True, drop_last=False, num_workers=4): - dataset = CustomDataset(data_path, labels_path, UIDs, standardize, data_format, read_all_labels) - dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=drop_last, num_workers=num_workers) +def load_data_split_batched(data_path, labels_path, UIDs, batch_size, standardize=False, read_all_labels=True, drop_last=False, num_workers=4): + dataset = CustomDataset(data_path, labels_path, UIDs, standardize, read_all_labels) + dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=drop_last, num_workers=num_workers, prefetch_factor=2) return dataloader # To validate the len of the dataloader