Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import pickle
import pandas as pd
import os
import numpy as np
from sklearn import metrics
import sys
from dataset_preprocessing import generate_AUD_without_comorbidity_dataset
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class Net(nn.Module):
def __init__(self,column_training,inputsize,clusternumber,structure=[20,5]):
super(Net, self).__init__()
region_roi=pd.read_csv('..' +os.sep+"raw_data"+os.sep+"aal_roi_list_Regions.csv")
ROIlist=(region_roi["Region"]-1).tolist()
mark=[]
candicate_hm_region=set()
for c in range(len(column_training)):
i=column_training[c][0]
j=column_training[c][1]
candicate_hm_region.add(min(ROIlist[i]*8+ROIlist[j],ROIlist[j]*8+ROIlist[i]))
if min(ROIlist[i]*8+ROIlist[j],ROIlist[j]*8+ROIlist[i]) in candicate_hm_region:
mark.append(min(ROIlist[i]*8+ROIlist[j],ROIlist[j]*8+ROIlist[i]))
candicate_hm_region=list(candicate_hm_region)
self.ROImark=torch.IntTensor(mark)
Num_pROI=len(candicate_hm_region)
self.pairwise_ROI=nn.ModuleList()
for mark in range(len(candicate_hm_region)):
self.pairwise_ROI.append(nn.Linear((self.ROImark==candicate_hm_region[mark]).sum(), structure[0]))
self.candicate_hm_region=candicate_hm_region
self.fclist=nn.ModuleList()
cur_dim=structure[0]*len(self.candicate_hm_region)
self.structure=structure
for i in range(len(structure)-1):
self.fclist.append(nn.Linear(cur_dim, structure[i+1]))
cur_dim=structure[i+1]
cur_dim=structure[-1]
self.final = nn.Linear(cur_dim, clusternumber)
self.final2 = nn.Linear(cur_dim, 1)
self.dropout1 = nn.Dropout(0.5)
def forward(self, x):
region_x=self.pairwise_ROI[-1](x[:,self.ROImark==self.candicate_hm_region[-1]])
for i in range(len(self.pairwise_ROI)-1):
region_x=torch.cat([region_x,self.pairwise_ROI[i](x[:,self.ROImark==self.candicate_hm_region[i]])],axis=1)
region_x = F.relu(region_x)
region_x = self.dropout1(region_x)
for i in range(len(self.structure)-1):
region_x = self.fclist[i](region_x)
region_x = F.relu(region_x)
x_final = region_x
x_final=self.final(x_final)
output = x_final
output2=self.final2(x_final)
return output,output2
def conduct_classification(alpha,dim):
folder="selected_result"+os.sep+"selected_fc_features"
cluster_data_folder=folder+os.sep+str(alpha)
cluster_labels=pickle.load(open( cluster_data_folder +os.sep+str(dim)+"_clustering_result.p", "rb" ) )
train_x_raw,train_y_raw,test_x_raw,test_y_raw=generate_AUD_without_comorbidity_dataset(folder=folder)
x_raw_other,y_raw_other=generate_AUD_without_comorbidity_dataset(folder=folder,anothersession=True)
subtype_classifier_train_x_am=train_x_raw[train_y_raw["label"]==True]
subtype_classifier_train_x_hc=train_x_raw[train_y_raw["label"]==False]
cluster_result_withsubject=pd.DataFrame({"Subject":subtype_classifier_train_x_am["Subject"],"Subtype":cluster_labels})
cluster_result_withsubject_hc=pd.DataFrame({"Subject":subtype_classifier_train_x_hc["Subject"],"Subtype":cluster_labels.max()+1})
cluster_result_withsubject=pd.concat([cluster_result_withsubject,cluster_result_withsubject_hc],axis=0)
subtype_classifier_train_x_am=subtype_classifier_train_x_am.drop("Subject",axis=1)
subtype_classifier_train_x_hc=subtype_classifier_train_x_hc.drop("Subject",axis=1)
test_x_raw=test_x_raw.drop("Subject",axis=1)
column_training=subtype_classifier_train_x_am.columns.tolist()
trainlabel=np.concatenate([cluster_labels,[(cluster_labels.max()+1) for i in range(subtype_classifier_train_x_hc.shape[0])]],axis=0)
traindata=np.concatenate([subtype_classifier_train_x_am,subtype_classifier_train_x_hc],axis=0)
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
batch_size=32
epoches=500
learning_rate=0.001
ratio=0.75
seed=0
train_kwargs = {'batch_size': batch_size}
test_kwargs = {}
if use_cuda:
cuda_kwargs = {'num_workers': 1,
'pin_memory': True,
'shuffle': True
}
train_kwargs.update(cuda_kwargs)
test_kwargs.update(cuda_kwargs)
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
traindata, trainlabel = shuffle(traindata, trainlabel, random_state=0)
tensor_x = torch.Tensor(traindata) # transform to torch tensor
tensor_y = torch.Tensor(trainlabel).type(torch.long)
test_y_raw=test_y_raw["label"]#.to_numpy()
test_x_raw=test_x_raw#.to_numpy()
validx,testx,validy,testy = train_test_split(test_x_raw, test_y_raw, test_size=0.5, random_state=0,stratify=test_y_raw)
validx=validx.to_numpy()
testx=testx.to_numpy()
validy=validy.to_numpy()
testy=testy.to_numpy()
test_tensor_x = torch.Tensor(testx) # transform to torch tensor
test_tensor_y = torch.Tensor(testy).type(torch.long)
test_kwargs['batch_size']=testx.shape[0]
test_loader = DataLoader(TensorDataset(test_tensor_x,test_tensor_y),**test_kwargs) # create your dataloader
valid_tensor_x = torch.Tensor(validx) # transform to torch tensor
#import pdb; pdb.set_trace()
valid_tensor_y = torch.Tensor(validy).type(torch.long)
test_kwargs['batch_size']=validx.shape[0]
valid_loader = DataLoader(TensorDataset(valid_tensor_x,valid_tensor_y),**test_kwargs) # create your dataloader
cluster_data_folder="classification"
try:
os.mkdir(cluster_data_folder)
except OSError as error:
print(error)
torch.manual_seed(seed)
train_loader = DataLoader(TensorDataset(tensor_x,tensor_y),**train_kwargs) # create your dataloader
model = Net(column_training,traindata.shape[1],trainlabel.max()+1,structure=[2,5]).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
model.train()
maxvalidauc=-1
minvalidloss=1000
bce = torch.nn.BCEWithLogitsLoss()
mlm = torch.nn.MultiMarginLoss()
for epoch in range(1, epoches + 1):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output,output2 = model(data)
l1=ratio*mlm(output, target)
l2=(1-ratio)*bce(output2, torch.unsqueeze((target<4),dim=1).type_as(output2))
loss = l1+l2
loss.backward()
optimizer.step()
model.eval()
print(l1.item(),l2.item())
with torch.no_grad():
selected=False
for data, target in valid_loader:
data, target = data.to(device), target.to(device)
output,o2 = model(data)
valid_loss=bce(o2, target.unsqueeze(1).type_as(o2)).item()
o2 = torch.sigmoid(o2)
fpr, tpr, thresholds = metrics.roc_curve(target.cpu(), o2.cpu(), pos_label=1,drop_intermediate=False)
valid_auc=metrics.auc(fpr, tpr)
if maxvalidauc<=valid_auc:
maxvalidauc=valid_auc
selected=True
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output,o2 = model(data)
o2 = torch.sigmoid(o2)
fpr, tpr, thresholds = metrics.roc_curve(target.cpu(), o2.cpu(), pos_label=1,drop_intermediate=False)
auc=metrics.auc(fpr, tpr)
if selected==True:
maxtestedauc=auc
print("save model in",epoch)
torch.save(model.state_dict(), cluster_data_folder +os.sep+"best_dnn_model_"+str(seed)+"_"+str(learning_rate)+"_"+str(ratio)+".model")
print(epoch,"auc is ",valid_auc,auc,maxvalidauc,maxtestedauc,loss.item())
if __name__ == "__main__":
conduct_classification(alpha=0.0005,dim=3)