Skip to content
Permalink
d20478c430
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
110 lines (89 sloc) 6 KB
#Annotation file
#memory.limit(size=157370)
#df.Tuk2017<-read.csv("./NCCs/Tukiainen2017_Xchr_Escape.csv", row.names = 1)
#rownames(df.Tuk2017)<-t(matrix(unlist(strsplit(rownames(df.Tuk2017), "[.]")),nrow=2))[,1]
#df.Tuk2017[which(df.Tuk2017[,length(colnames(df.Tuk2017))] == "x"),"Reported_XCI_status"]<-"Variable"
pathtoAnn<-"../code_DTA_NCCiPSC/"
df.Tuk2017<-read.csv(paste0(pathtoAnn,"Tukiainen2017_Xchr_Escape.csv"), row.names = 1)
rownames(df.Tuk2017)<-t(matrix(unlist(strsplit(rownames(df.Tuk2017), "[.]")),nrow=2))[,1]
df.Tuk2017[which(df.Tuk2017[,length(colnames(df.Tuk2017))] == "x"),"Reported_XCI_status"]<-"Variable"
df.Tuk2017[df.Tuk2017$SYMBOL == "KAL1","SYMBOL"]<-"ANOS1"
df.Tuk2017[df.Tuk2017$SYMBOL == "HDHD1","SYMBOL"]<-"PUDP"
df.Sauterad2021<-read.csv(paste0(pathtoAnn,"Sauterad2021_tableS4.csv"), row.names = 1)
df.Katsir2019<-read.csv(paste0(pathtoAnn,"Katsir2019_TableS5.csv"), row.names = 1)
df.Lee <- read.csv(paste0(pathtoAnn,"NCCs/Lee2007_p75.csv"), header = T)
df.KarzbrunDEGs <- read.csv(paste0(pathtoAnn,"NCCs/Karzbrun_NoSE.csv"), header = T)
df.Frith <- read.csv(paste0(pathtoAnn,"NCCs/FrithTsakiridis2018s2_DEGs.csv"), header = T)
df.SimoesCosta <- read.csv(paste0(pathtoAnn,"NCCs/SimoesCosta_NoNPB.csv"))
df.Hsiao <- read.csv(paste0(pathtoAnn,"NCCs/HsiaoGilad2020s1.csv"))
df.Soldatov <- list.files(path = pathtoAnn, pattern = "Soldatov_S", full.names = T) %>%
read_csv(id = "file") %>%
mutate(Soldatov = tools::file_path_sans_ext(str_extract(file, "[HT][a-z2]*.csv"))) %>%
dplyr::select(-file) %>% rename(gene = "...1") %>% mutate(SYMBOL = toupper(gene)) %>% as.data.frame()
df.Soldatov <- df.Soldatov[which(!duplicated(df.Soldatov$gene)),] |>
dplyr::select(c("SYMBOL", "Soldatov")) |> rename("gene" = "SYMBOL")
df.Lee<-df.Lee[which(!duplicated(df.Lee$gene)),]
colnames(df.Lee) <- c("gene", "Lee")
df.KarzbrunDEGs <- df.KarzbrunDEGs[which(!duplicated(df.KarzbrunDEGs$gene)),]
colnames(df.KarzbrunDEGs) <- c("gene", "KarzbrunDEGs")
df.Frith <- df.Frith[which(!duplicated(df.Frith$gene)),]
colnames(df.Frith) <- c("gene", "Frith")
df.SimoesCosta <- df.SimoesCosta[which(!duplicated(df.SimoesCosta$gene)),]
colnames(df.SimoesCosta) <- c("gene", "SimoesCosta", "Stage")
df.Hsiao <- df.Hsiao[which(!duplicated(df.Hsiao$gene)),]
colnames(df.Hsiao) <- c("ENSG", "gene", "Hsiao")
rownames(df.Soldatov)<-df.Soldatov$gene
rownames(df.Lee)<-df.Lee$gene
row.names(df.KarzbrunDEGs) <- df.KarzbrunDEGs$gene
row.names(df.Frith) <- df.Frith$gene
row.names(df.SimoesCosta) <- df.SimoesCosta$gene
row.names(df.Hsiao) <- df.Hsiao$gene
df.Soldatov$Soldatov<-paste0(df.Soldatov$Soldatov, ".S")
df.Lee$Lee<-paste0(df.Lee$Lee, ".L")
df.KarzbrunDEGs$KarzbrunDEGs <- paste0(df.KarzbrunDEGs$KarzbrunDEGs, ".KDEGs")
df.Frith$Frith <- paste0(df.Frith$Frith, ".F")
df.SimoesCosta$SimoesCosta <- paste0(df.SimoesCosta$SimoesCosta, ".C")
df.Hsiao$Hsiao <- paste0(df.Hsiao$Hsiao, ".H")
datExpr0 <- as.data.frame(t(assay(vsd3)))
datExpr<-datExpr0
colnames(datExpr)<-df[rownames(t(datExpr0)),"gene_name"]
df.Soldatov$Soldatov<-as.factor(df.Soldatov$Soldatov)
df.Lee$Lee<-as.factor(df.Lee$Lee)
df.KarzbrunDEGs$KarzbrunDEGs <- as.factor(df.KarzbrunDEGs$KarzbrunDEGs)
df.Frith$Frith <- as.factor(df.Frith$Frith)
df.SimoesCosta$SimoesCosta <- as.factor(df.SimoesCosta$SimoesCosta)
df.Hsiao$Hsiao <- as.factor(df.Hsiao$Hsiao)
require(WGCNA)
df2<-df[,c(1:(LastMetaColumn+1))]
#df2$gene_name<-All.df.ENSG[df2$ENSEMBL,"SYMBOL"]
df3<-cbind(df2,
binarizeCategoricalVariable(df.KarzbrunDEGs[df2$gene_name,"KarzbrunDEGs"], namePrefix = "KarzbrunDEGs_", includeLevelVsAll = T, dropUninformative = T, includePairwise = F, nameForAll = ""),
binarizeCategoricalVariable(df.Lee[df2$gene_name,"Lee"], namePrefix = "Lee_", includeLevelVsAll = T, dropUninformative = T, includePairwise = F, nameForAll = ""),
binarizeCategoricalVariable(df.Frith[df2$gene_name,"Frith"], namePrefix = "Frith_", includeLevelVsAll = T, dropUninformative = T, includePairwise = F, nameForAll = ""),
binarizeCategoricalVariable(df.SimoesCosta[df2$gene_name, "SimoesCosta"], namePrefix = "SimoesCosta_", includeLevelVsAll = T, dropUninformative = T, includePairwise = F, nameForAll = ""),
binarizeCategoricalVariable(df.Soldatov[df2$gene_name, "Soldatov"], namePrefix = "Soldatov_", includeLevelVsAll = T, dropUninformative = T, includePairwise = F, nameForAll = ""),
binarizeCategoricalVariable(df.Hsiao[df2$gene_name, "Hsiao"], namePrefix = "Hsiao_", includeLevelVsAll = T, dropUninformative = T, includePairwise = F, nameForAll = "")
)
#this is correct, because gene_id is last meaningful df.meta column (#6), and ENSEMBL is inserted. Hence annotation starts at 6+2
df3.marker<-df3[,c((LastMetaColumn+2):length(df3))]
df3.marker<-df3.marker[,order(colnames(df3.marker))]
df.Markers.abs<-as.data.frame(apply(df3.marker, 2, function(X) {colMedians(assay(vsd3)[rownames(df3.marker[which(X==1),]),])}))
rownames(df.Markers.abs)<-colnames(assay(vsd3))
require(matrixStats)
mn.mednorm<-assay(vsd3)-rowMedians(assay(vsd3))
rownames(mn.mednorm)<-df[rownames(mn.mednorm),"gene_name"]
df.Markers.med<-as.data.frame(apply(df3.marker, 2, function(X) {colMeans(mn.mednorm[df[rownames(df3.marker[which(X==1),]),"gene_name"],])}))
#
# df3.marker<-df3[,c((LastMetaColumn+2):length(df3))]
#
# df3.marker<-df3.marker[,order(colnames(df3.marker))]
# df.Markers.abs<-as.data.frame(apply(df3.marker, 2, function(X) {colMedians(assay(vsd3)[rownames(df3.marker[which(X==1),]),])}))
# #Error in h(simpleError(msg, call)) :
# #error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': Argument 'dim.' must be an integer vector of length two.
#
# rownames(df.Markers.abs)<-colnames(assay(vsd3))
#
# require(matrixStats)
# mn.mednorm<-assay(vsd3)-rowMedians(assay(vsd3))
# rownames(mn.mednorm)<-df[rownames(mn.mednorm),"gene_name"]
# df.Markers.med<-as.data.frame(apply(df3.marker, 2, function(X) {colMeans(mn.mednorm[df[rownames(df3.marker[which(X==1),]),"gene_name"],])}))