Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import os
#import pathos
import pickle
import numpy as np
import pandas as pd
import multiprocessing
import math
import sys
#import seaborn as sns
#!/bin/python3
import os
import numpy as np
import pandas as pd
from scipy.io import loadmat
from queue import Queue
from threading import Thread
from multiprocessing import Pool
from multiprocessing.dummy import Pool as ThreadPool
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import pandas as pd
from sklearn import metrics
import random
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn import svm
from sklearn.model_selection import KFold
from scipy.stats import norm
from sklearn.linear_model import LogisticRegressionCV
#import statsmodels.api as sm
from sklearn.cross_decomposition import CCA
#select training data
from sklearn.utils import shuffle
from scipy.stats import spearmanr
from scipy.stats import ranksums
from sklearn.linear_model import LinearRegression
import os
import os.path
import math
def generate_AUD_without_comorbidity_dataset(train_ratio=0.8,folder="",anothersession=False):
jobid="1"
SUBJECT='Subject'
dataset_suffix="r1"
if anothersession==True:
dataset_suffix="r2"
func_conn_raw = pickle.load(open('..' +os.sep+"fmri_data_processing"+os.sep+'AAL_dataset_'+dataset_suffix+'.pickle','rb'))
psychiatric = pd.read_csv('..' +os.sep+"raw_data"+os.sep+'psychiatric_data_HCP.csv')
func_conn_raw_col=func_conn_raw.columns
psychiatric_col=psychiatric.columns
func_conn = func_conn_raw.merge(psychiatric[SUBJECT],on=SUBJECT)[func_conn_raw_col]
psychiatric=psychiatric.merge(func_conn_raw[SUBJECT],on=SUBJECT)[psychiatric_col]
#Head Motion preprocessing
missing_frame_number = pd.read_csv('..' +os.sep+"raw_data"+os.sep+'missing_frame_number.csv')
missing_frame_number=missing_frame_number[["ID","Rest1_LR","Rest1_RL","Rest1_LR _num","Rest1_RL _num"]]
missing_frame_number.columns=[SUBJECT,"s1r","s2r","s1n","s2n"]
missing_frame_number["totaln"]=missing_frame_number["s1n"]+missing_frame_number["s2n"]
kept_subject=missing_frame_number
#if headMotion_exclude_type!=3:
func_conn = func_conn.merge(kept_subject[SUBJECT],on=SUBJECT)
#Load SUD labels
labels_alc = pd.read_csv('..' +os.sep+"raw_data"+os.sep + 'Full_alc_label.csv')
labels_tob = pd.read_csv('..' +os.sep+"raw_data"+os.sep + 'full_tob_label.csv')
labels_mar = pd.read_csv('..' +os.sep+"raw_data"+os.sep + 'Full_Marijuana_label.csv')
#divide subjects into training set and test set
a=(labels_alc["label"]==True)&(labels_tob["label"]==False)&(labels_mar["label"]==False)
b=(labels_alc["label"]==False)&(labels_tob["label"]==False)&(labels_mar["label"]==False)
selected_subject=a|b
labels_alc_with_comorbidity=labels_alc[selected_subject]
#import pdb; pdb.set_trace()
func_conn_i = func_conn.merge(labels_alc_with_comorbidity[SUBJECT],on=SUBJECT)
labels_alc_with_comorbidity_i = labels_alc_with_comorbidity.merge(func_conn_i[SUBJECT],on=SUBJECT)
if anothersession==False:
list_train_test=[True for i in range(int(func_conn_i.shape[0]*train_ratio))]+[False for i in range(func_conn_i.shape[0]-int(func_conn_i.shape[0]*train_ratio))]
#print(func_conn_i.shape[0]*train_ratio)
else:
list_train_test=[True for i in range(int(func_conn_i.shape[0]))]
Train_test_label=pd.Series(shuffle(list_train_test, random_state=0))
regression_models={}
train_x=func_conn_i[(Train_test_label==True).values]
train_y=labels_alc_with_comorbidity_i[(Train_test_label==True).values]
test_x=func_conn_i[(Train_test_label==False).values]
test_y=labels_alc_with_comorbidity_i[(Train_test_label==False).values]
psychiatric = pd.read_csv('..' +os.sep+"raw_data"+os.sep +'psychiatric_data_HCP.csv')
HCP_summary = pd.read_csv('..' +os.sep+"raw_data"+os.sep +'HCP_summary_S1206.csv')
train_psychiatric = train_x[["Subject"]].merge(psychiatric[["Subject","Age_in_Yrs"]],on="Subject")
train_HCP_summary = train_x[["Subject"]].merge(HCP_summary[["Subject","Gender"]].replace(["M","F"],[0,1]),on="Subject")
test_HCP_summary = test_x[["Subject"]].merge(HCP_summary[["Subject","Gender"]].replace(["M","F"],[0,1]),on="Subject")
test_HCP_summary = test_HCP_summary.merge(psychiatric[["Subject","Age_in_Yrs"]],on="Subject")
test_HCP_summary = test_HCP_summary.merge(kept_subject[[SUBJECT,"totaln"]],on="Subject")
test_HCP_summary ["interactive"]=test_HCP_summary["Gender"]*test_HCP_summary["Age_in_Yrs"]
cov_feature = train_HCP_summary.merge(train_psychiatric,on="Subject")
cov_feature = cov_feature.merge(kept_subject[[SUBJECT,"totaln"]],on="Subject")
cov_feature ["interactive"]=cov_feature["Gender"]*cov_feature["Age_in_Yrs"]
selected_cov=["Gender","Age_in_Yrs"]
selected_cov.append("interactive")
selected_cov.append("totaln")
if(len(selected_cov)!=0):
if anothersession==False:
if not os.path.exists(folder+os.sep+"exclude_gender_conn_train_comorbidity.p") or not os.path.exists(folder+os.sep+"exclude_gender_conn_train_comorbidity_test.p"):
for i_c in range(len(func_conn_i.columns[1:])):
print("model_correction",i_c)
column = func_conn_i.columns[i_c+1]
linear_regressor = LinearRegression(fit_intercept=True) # create object for the class
linear_regressor.fit(cov_feature[selected_cov].to_numpy(), train_x[column]) # perform linear regression
linear_regressor.intercept_=0
train_x.loc[:,[column]] = train_x[column]-linear_regressor.predict(cov_feature[selected_cov].to_numpy()) # make predictions
test_x.loc[:,[column]] = test_x[column]-linear_regressor.predict(test_HCP_summary[selected_cov].to_numpy()) # make predictions
regression_models[column]=linear_regressor
pickle.dump(regression_models, open( folder+os.sep+"regress_models.p", "wb" ) )
pickle.dump( train_x, open( folder+os.sep+"exclude_gender_conn_train_comorbidity.p", "wb" ) )
pickle.dump( test_x, open( folder+os.sep+"exclude_gender_conn_train_comorbidity_test.p", "wb" ) )
else:
train_x=pickle.load( open( folder+os.sep+"exclude_gender_conn_train_comorbidity.p", "rb" ) )
test_x=pickle.load( open( folder+os.sep+"exclude_gender_conn_train_comorbidity_test.p", "rb" ) )
return train_x,train_y,test_x,test_y
else:
if not os.path.exists(folder+os.sep+"exclude_gender_conn_train_comorbidity_other.p"):
regression_models=pickle.load( open( folder+os.sep+"regress_models.p", "rb" ) )
train_x=train_x.astype(float)
for i_c in range(len(func_conn_i.columns[1:])):
print("model_correction",i_c)
column = func_conn_i.columns[i_c+1]
linear_regressor=regression_models[column]
train_x.loc[:,column] = train_x[column].astype(float)-linear_regressor.predict(cov_feature[selected_cov].to_numpy()) # make predictions
pickle.dump( train_x, open( folder+os.sep+"exclude_gender_conn_train_comorbidity_other.p", "wb" ) )
else:
train_x=pickle.load( open( folder+os.sep+"exclude_gender_conn_train_comorbidity_other.p", "rb" ) )
return train_x,train_y
else:
if anothersession==False:
return func_conn_i[(Train_test_label==True).values],labels_alc_with_comorbidity_i[(Train_test_label==True).values],func_conn_i[(Train_test_label==False).values],labels_alc_with_comorbidity_i[(Train_test_label==False).values]
else:
return func_conn_i[(Train_test_label==True).values],labels_alc_with_comorbidity_i[(Train_test_label==True).values]