diff --git a/README.md b/README.md
new file mode 100644
index 0000000..5a14ecc
--- /dev/null
+++ b/README.md
@@ -0,0 +1,152 @@
+# MicroVI
+This project consists of MicroVI-main, which runs our regular semi-supervised Micro-VI, and MicroVI-retraining, which has the added functionality of retraining on an artificially generated training set, as sampled from the learned latent space. 
+
+The sample script myjob.sh calls the run.py file, which is copied below. The corresponding regression and classification dataset settings are listed first, below which the percent supervision and l_parameter initialization settings are specified. Further, the run number allows the set of training followed by prediction evaluation to be repeated as many times as desired.
+
+Under the commented 'Regression' setting, the final-stage classification or regression models should be set as corresponding to the appropriate dataset.
+
+def main():
+    ###Regression setting:
+    dataset = 'pomp'
+    regression = True
+    num_labels = 1
+    feature='postdiet_Cholesterol'
+    covariate_list = ['Body_weight', 'Diet', 'prediet_Cholesterol']
+    latent_dim = 150
+    alpha = 0.5 #1.0 #0.1 #0.25 # 0.5 #1.0
+    num_epochs = 200
+    covariate_ablation = False #True #drop covariates from scanvi
+    ##retrain_with_sampling = False #generates the latent sampling
+    ##retrain = False #after the above has been run, this will conduct the actual retraining
+
+    latent_visualization_x_all = False #True
+    #full_retrain_process = False
+    
+    full_retrain_process = True #doing away with above, runs with single click
+
+    #dataset = 'doma'
+    #regression = False
+    #num_labels = 3
+    #feature='Age'
+    #covariate_list = ['Body_weight', 'Gender']
+    #latent_dim = 100
+    #alpha = 1.0 #0.1 #0.25 # 0.5 #1.0
+    #num_epochs = 200
+    #covariate_ablation = False #if True: drop covariates from scanvi
+
+    load_saved_model = False  #False
+    generate_splits = False #true if need to generate data splits (first time through for dataset/feature); false to simply get scanvi portion #if true, make sure covariate_ablation is set to false, otherwise it tries to find covariates that don't exist
+    raw_no_normalization = False
+    batch_size = 42
+
+    ##------------RUN SCANVI----------------------------------------------------------
+    l_loc = 8.35
+    l_scale = 1.5
+
+    pct_supervised = 0
+
+    for run_number in range(46, 100): #Do 5-fold cross-validation 100x 0,100
+
+        seed_index_value = -1 # -1 will loop through all 10 splits; otherwise, set seed_index to particular split value, e.g., 2
+        seed_index_list = []
+        if seed_index_value == -1:
+            for i in range(0, 5): #temporarily changed
+                seed_index_list.append(i)
+        else:
+            seed_index_list.append(seed_index_value)
+            #seed_index_list.append(0) #re-do first split, for testing purposes
+
+        r2_list = [] # compile r2 over all 10 splits (or single split)
+
+        for seed_index in seed_index_list:
+            print('------------------------' + 'STARTING SPLIT ' + str(seed_index) + '------------------------')
+            if full_retrain_process == True:
+                latent_visualization_x_all = False
+                #First, train the model from scratch with covariate ablation on or off:
+                covariate_ablation = True 
+                #covariate_ablation = False #NOW RUNNING WITH COVARIATES 
+                retrain_with_sampling = False
+                load_saved_model = False
+                retrain = False
+                run(dataset, regression, num_labels, feature, alpha, num_epochs, latent_dim, covariate_list, batch_size, raw_no_normalization, seed_index, generate_splits, load_saved_model, covariate_ablation, l_loc, l_scale, pct_supervised, retrain_with_sampling, retrain, run_number)
+
+                #Then, load the trained model and sample from the latent space:
+                retrain_with_sampling = True
+                load_saved_model = True
+                retrain = False
+                run(dataset, regression, num_labels, feature, alpha, num_epochs, latent_dim, covariate_list, batch_size, raw_no_normalization, seed_index, generate_splits, load_saved_model, covariate_ablation, l_loc, l_scale, pct_supervised, retrain_with_sampling, retrain, run_number)
+
+                #Next, load the trained model again and retrain using the generated samples:
+                retrain_with_sampling = True
+                load_saved_model = True
+                retrain = True
+                run(dataset, regression, num_labels, feature, alpha, num_epochs, latent_dim, covariate_list, batch_size, raw_no_normalization, seed_index, generate_splits, load_saved_model, covariate_ablation, l_loc, l_scale, pct_supervised, retrain_with_sampling, retrain, run_number)
+
+                #Finally, load this retrained model and proceed with UMAP generation, etc.
+                retrain_with_sampling = False
+                load_saved_model = True
+                retrain = False
+                run(dataset, regression, num_labels, feature, alpha, num_epochs, latent_dim, covariate_list, batch_size, raw_no_normalization, seed_index, generate_splits, load_saved_model, covariate_ablation, l_loc, l_scale, pct_supervised, retrain_with_sampling, retrain, run_number)
+
+            else: #run particular settings as specified:
+                run(dataset, regression, num_labels, feature, alpha, num_epochs, latent_dim, covariate_list, batch_size, raw_no_normalization, seed_index, generate_splits, load_saved_model, covariate_ablation, l_loc, l_scale, pct_supervised, retrain_with_sampling, retrain, run_number)
+
+            #----------REGRESSION-------------------------------
+            setting = 'latent'
+            method = 'scanvi'
+
+            regression = 'linear' 
+            #regression = 'logistic' 
+    
+            regression_model = 'ridge'
+            #regression_model = 'mlp'
+            #regression_model = 'logistic'
+
+            X_train, X_test, y_train, y_test, save_folder = dl.get_latent_train_test(dataset, regression, method, feature, seed_index, alpha_scanvi=alpha, num_epochs=num_epochs)
+            
+            r2 = reg.do_regression(setting, dataset, feature, regression, X_train, X_test, y_train, y_test, save_folder, regression_model=regression_model) 
+            
+            #save_folder = 'G:/My Drive/Work_Local/Jinbo_Mouse_D/SCANVI/New_Dimension/Comparison_Experiments/Master_Results/'
+            #save_folder = 'C:/Users/rigel/Desktop/Backup/Work_Local/Jinbo_Mouse_D/SCANVI/New_Dimension/Comparison_Experiments/Master_Results/' #for backup path $$$
+            save_folder = 'Master_Results/' #for cluster path $$$ 'home/rim17004/micro-vi/MicroVI-retraining/Master_Results/'
+            scanvi_use = save_folder + 'scANVI_Use/' + str(dataset) + '/' + str(feature) + '/'
+
+            if regression == 'linear':
+                score_name = 'All_R2_'
+            else: #logistic, accuracy or f1 scoring instead of r2
+                #score_name = 'All_ACC_'
+                score_name = 'All_F1_'
+
+            save_path = scanvi_use
+            if full_retrain_process == True:
+                retrain_str = '_retrained'
+            else:
+                retrain_str = ''
+
+            cov_tag = ''
+            if covariate_ablation == False: #True cov ablation will mean covariates excluded
+                cov_tag = '_cov'
+            file_name = score_name + regression_model + '_' + 'Alpha' + str(alpha) + '_' + str(num_epochs) + 'epochs' + '_' + setting + '_' + str(pct_supervised) + 'supervised' + retrain_str + cov_tag + '.csv'
+
+            file_path = scanvi_use + file_name
+            print('FILE PATH= ', file_path)
+            file_exists = os.path.isfile(file_path)
+            if file_exists == True:
+                progress_df = pd.read_csv(file_path)
+            else:
+                progress_df = pd.DataFrame(columns =['Run Number'])
+                progress_df['r2'] = 0
+                #progress_df.to_csv(file_path) #saves
+                #print(progress_df)
+                #print(file_name + ' created. Continuing...')
+
+            add_new = {'Run Number': run_number, 'r2': r2}
+            progress_df.loc[len(progress_df)] = add_new
+            progress_df.to_csv(file_path, index=False) #saves
+
+        
+
+
+if __name__ == "__main__":
+    sys.exit(int(main() or 0))
+