WGCNA_DEG02_DTA_pass.R

#memory.limit(size=157370)
library(WGCNA)
library(tidyverse)
library(DESeq2)
tf.gseaset<-F
catvars<-c("Xa","donor","simple")

outdir2<-paste0(outdir,"/",tf.gseaset,"gseaSet")
dir.create(outdir2, showWarnings = FALSE)
#Perform WGCNA on either all ENSEMBL genes in df (default), only or gene_name mapping to non-duplicate ENTREZIDs (tf.gseaset = T)
if (all(df[,c(grep(pattern = "^d", colnames(df)))] == assay(vsd3)))
{
  if(tf.gseaset){
    ensembl.counts<-df[,c(grep(pattern = "^d", colnames(df)))]
    ENSG.counts<-ensembl.counts
    rownames(ENSG.counts)<-df$ENSEMBL
    datExpr0<-t(ENSG.counts[df.gsea2$ENSEMBL,])
  } else{datExpr0 <- as.data.frame(t(assay(vsd3)))}

  gsg <- goodSamplesGenes(datExpr0, verbose = 3)
  gsg$allOK
  if (!gsg$allOK){
    # Optionally, print the gene and sample names that were removed:
    if (sum(!gsg$goodGenes)>0) {
      printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")))
    }
    if (sum(!gsg$goodSamples)>0) {
      printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")))
    }
    
    # Remove the offending genes and samples from the data:
    datExpr0 <- datExpr0[gsg$goodSamples, gsg$goodGenes]
  }
  datExpr<-datExpr0
  if(tf.gseaset){colnames(datExpr)<-df.gsea2$SYMBOL
  } else{colnames(datExpr)<-df$gene_name}
}

sampleTree <- hclust(dist(datExpr0), method = "average")
sizeGrWindow(12,9)
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,cex.axis = 1.5, cex.main = 2)

# no need to remove samples
datTraits <- as.data.frame(colData(vsd3)[,catvars])

datTraits2<-binarizeCategoricalColumns(datTraits, includeLevelVsAll = T, nameForAll = "", includePairwise = F, dropUninformative = T, dropFirstLevelVsAll = F)
rownames(datTraits2)<-rownames(datTraits)

df.elisa<-read.csv("ELISA_hCG_PIGF_sp.csv",header = T,row.names = "RegistryID")


df.elisa<-df.elisa[rownames(datTraits2),]
if(all(rownames(df.elisa) == rownames(datTraits2))) {datTraits3<-cbind(datTraits2, df.elisa) %>% dplyr::select(-Cell_Line)}


# Re-cluster samples
sampleTree2 <- hclust(dist(datExpr), method = "average")

traitColors = numbers2colors(datTraits3, signed = F)

# Plot the sample dendrogram and the colors underneath.
pdf(paste0(outdir2,"/Dendro_mod.pdf"))
plotDendroAndColors(sampleTree2, traitColors,
                    groupLabels = names(datTraits3),
                    main = "Sample dendrogram and trait heatmap")
dev.off()
save(datExpr, datTraits3, file = "tbl_wgcna-dataInput.RData")


options(stringsAsFactors = FALSE)
collectGarbage()
#enableWGCNAThreads()
# Load the data saved in the first part
#lnames <- load(file = "tbl_wgcna-dataInput.RData")
#The variable lnames contains the names of loaded variables.
#lnames

# Choose a set of soft-thresholding powers
powers = c(seq(from = 1, to=30, by=1))

# Call the network topology analysis function
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5, networkType = "signed hybrid", corFnc = bicor, dataIsExpr = T)

# Plot the results:
require(ggplot2)
ggplot(sft$fitIndices, aes(x=Power, y=SFT.R.sq, label=Power)) + geom_text(color="red") + geom_hline(yintercept=.85, color="red") + labs(title="Scale independence")

ggplot(sft$fitIndices, aes(x=Power, y=`mean.k.`, label=Power)) + geom_text(color="red") + labs(title="Mean connectivity")


net <- blockwiseModules(datExpr, power = sft$powerEstimate,
                        networkType = "signed hybrid", corType = "bicor",
                        TOMType = "signed", minModuleSize = 30,
                        maxBlockSize=30000,
                        reassignThreshold = 0, 
                        mergeCutHeight = 0.25,
                        numericLabels = TRUE, pamRespectsDendro = FALSE,
                        saveTOMs = TRUE,
                        saveTOMFileBase = "TOMTOM",
                        verbose = 3)


# open a graphics window
sizeGrWindow(12, 9)
# Convert labels to colors for plotting
mergedColors = labels2colors(net$colors)

# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
                    "Module colors",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)

moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs
geneTree = net$dendrograms[[1]]
save(MEs, moduleLabels, moduleColors, geneTree,
     file = "TBL-networkConstruction-auto.RData")
grey <- df[which(moduleColors=="grey"),]

nGenes = ncol(datExpr)
nSamples = nrow(datExpr)

# Recalculate MEs with color labels
MEs0 = moduleEigengenes(datExpr, moduleColors)$eigengenes
#MEs = orderMEs(MEs0)
MEs = orderMEs(MEs0, order = c(1:length(MEs0)))
moduleTraitCor = cor(MEs, datTraits3, use = "pairwise.complete.obs", method = "spearman")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)

pdf(paste0(outdir2,"/ModuleTrait.pdf"))
#sizeGrWindow(10,6)
# Will display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
                   signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) = dim(moduleTraitCor)
textMatrix[moduleTraitPvalue>0.05]<-""
#par(mar = c(6, 35, 3, 3))
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
               xLabels = names(datTraits3),
               yLabels = names(MEs),
               yColorLabels = T,
               colors = blueWhiteRed(50),
               textMatrix = textMatrix,
               setStdMargins = F,
               cex.text = 0.6,
               zlim = c(-1,1),
               cex.lab.x = 0.6,
               main = paste("Module-trait relationships"))
dev.off()

# A data frame with module eigengenes can be obtained as follows 
MEsAutomatic=net$MEs 
moduleLabelsAutomatic=net$colors; 
moduleColorsAutomatic = labels2colors(moduleLabelsAutomatic) 

vc.PIGFn = as.data.frame(datTraits3$PIGF_per_ug_RNA)
vc.hCGn = as.data.frame(datTraits3$hCG_per_ug_RNA)
GS.PIGFn=as.numeric(cor(datExpr,vc.PIGFn,use="pairwise.complete.obs")) 
GS.hCGn=as.numeric(cor(datExpr,vc.hCGn,use="pairwise.complete.obs")) 

#df2<-df[order(match(df$gene_name, rownames(t(datExpr)))),]


df.GS.Markers<-as.data.frame(apply(df.Markers.med, 2, function(X) {as.numeric(cor(datExpr,X,use="pairwise.complete.obs"))})) 

mn.GS.Markers<-numbers2colors(df.GS.Markers, signed=T, lim=c(-1,1))
colnames(mn.GS.Markers)<-colnames(df.GS.Markers)

#vtf.ESC2 is identical to vtf.ESC, except that it includes all PAR and PAIR genes, irrespective of whether they escape/are expressed
vtf.ESC2<-(vtf.oESC | vtf.PAR | vtf.PAIR)


mn.mednorm<-assay(vsd3)-rowMedians(assay(vsd3))
rownames(mn.mednorm)<-df[rownames(mn.mednorm),"gene_name"]

df.escape<-as.data.frame(cbind(colMeans(mn.mednorm[df$gene_name[vtf.ESC2],]), 
                               colMeans(mn.mednorm[df$gene_name[vtf.PAR],]), 
                               colMeans(mn.mednorm[df$gene_name[vtf.oESC],]), 
                               colMeans(mn.mednorm[df$gene_name[vtf.PAIR],])))

colnames(df.escape)<-c("All", "PAR","X_escapee", "X_pair")
df.GS.escape<-as.data.frame(apply(df.escape, 2, function(X) {as.numeric(cor(datExpr,X,use="pairwise.complete.obs"))}))

mn.GS.escape<-numbers2colors(df.GS.escape, signed=T)
colnames(mn.GS.escape)<-colnames(df.escape)

datTraits4<-cbind(datTraits3[,c(grep("simple",colnames(datTraits3)),grep("RNA", colnames(datTraits3)))], 
                  df.escape,
                  df.Markers.med[,-c(grep(".[K/L]$", colnames(df.Markers.med)),grep("early", colnames(df.Markers.med)))])
colnames(datTraits4)[c(grep("RNA", colnames(datTraits4)))]<-c("nPlGF","nhCG")

require(Hmisc)
require(corrplot)

#####
pdf(paste0(outdir2,"/Corrplot.pdf"))

cor_5<- rcorr(as.matrix(datTraits4[,-c(grep("simple",colnames(datTraits4)))]),type = "spearman")
M <- cor_5$r
p_mat <- cor_5$P
p_mat[is.na(p_mat)] <- 0
corrplot(M, type = "upper", order = "original", method="square",
         p.mat = p_mat, sig.level = 0.05, insig = "blank", 
         col=blueWhiteRed(100), tl.col="black", tl.srt = 60, na.label = "NA")
dev.off()


df.wald<-df.gsea2
if(!tf.gseaset) {
  stopifnot(all(df$gene_name == colnames(datExpr)))
  require(preprocessCore)
  vc.Wald<-c("fXO_stat", "mXO_stat", "FM_stat", "fXO2_stat", "fmO_stat")
  mn.stat_qn2<-normalize.quantiles(matrix(unlist(df[,vc.Wald]), ncol = length(vc.Wald)))
  colnames(mn.stat_qn2)<-vc.Wald
  df.wald<-as.data.frame(cbind(mn.stat_qn2, 
                               "aveiXO_stat" = rowMeans(mn.stat_qn2[,grep("XO_stat", colnames(mn.stat_qn2))]), 
                               "aveXO_stat" = rowMeans(mn.stat_qn2[,c(1:3)]))) %>% add_column("gene_name" = df$gene_name, .before = 1)
  rownames(df.wald)<-df$ENSEMBL
}

pdf(paste0(outdir2,"/DendroTrait_mod.pdf"))
blocknumber=1
mColors=moduleColorsAutomatic[net$blockGenes[[blocknumber]]] 
datColors=data.frame(mColors,
                     fXO=numbers2colors(df.wald$fXO_stat, signed=T, lim = c(-10,10)),
                     mXO=numbers2colors(df.wald$mXO_stat, signed=T, lim = c(-10,10)),
                     fmXO=numbers2colors(df.wald$FM_stat, signed=T, lim = c(-10,10)),
                     aveXO=numbers2colors(df.wald$aveXO_stat, signed=T, lim = c(-10,10)),
                     nPIGF=numbers2colors(GS.PIGFn, signed=T), 
                     nhCGB=numbers2colors(GS.hCGn, signed=T),
                     mn.GS.Markers[,-c(grep(".[K/L]$", colnames(mn.GS.Markers)),grep("early", colnames(mn.GS.Markers)))])
# Plot the dendrogram and the module colors underneath 
plotDendroAndColors(net$dendrograms[[blocknumber]]
                    ,colors=datColors, 
                    groupLabels=names(datColors),
                    dendroLabels=FALSE, hang=0.03,addGuide=TRUE,guideHang=0.05) 
dev.off()

moduleTraitCor = cor(MEs, datTraits4, use = "pairwise.complete.obs", method = "pearson")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)


pdf(paste0(outdir2,"/ModuleTrait_corr_large.pdf"))
#sizeGrWindow(10,6)
# Will display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 1), "\n(",
                   signif(-log(moduleTraitPvalue, 10), 2)
                   , ")"
                   , sep = "")
dim(textMatrix) = dim(moduleTraitCor)
textMatrix[moduleTraitPvalue>0.05]<-""
#par(mar = c(6, 35, 3, 3))
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
               xLabels = names(datTraits4),
               yLabels = names(MEs),
               #ySymbols = names(MEs),
               yColorLabels = T,
               colors = blueWhiteRed(50),
               textMatrix = textMatrix,
               setStdMargins = F,
               cex.text = 0.5,
               zlim = c(-1,1),
               cex.lab.x = 0.6,
               main = paste("Module-trait relationships"))
dev.off()


### ENRICHMENT TESTING ###


require(msigdbr)
require(clusterProfiler)
require(org.Hs.eg.db)
library(DOSE)

collectGarbage()

#vc.ensByMod<-setNames(names(net$colors), paste("ME", net$colors, "_", sep=""))
vc.ensByMod<-setNames(names(net$colors), moduleColors)
ls.ensByMod <- split(vc.ensByMod, names(vc.ensByMod))

if(!tf.gseaset) {
  ls.test<-lapply(ls.ensByMod, function(X) unique(df.gsea2[df.gsea2$SYMBOL %in% X,"ENTREZID"]))
} else {ls.test<-lapply(ls.ensByMod, function(X) unique(All.df[which(All.df$SYMBOL %in% X),"ENTREZID"]))}


msig_wgcna<- compareCluster(ls.test, fun = enricher, pvalueCutoff = 0.1,
                            pAdjustMethod = "BH", TERM2GENE=msig_Hall_C1_CP_TFT_MOD_HP, universe=unlist(ls.test))

msig_wgcna2<- compareCluster(ls.test, fun = enricher, pvalueCutoff = 0.1,
                             pAdjustMethod = "BH", TERM2GENE=msig_CGP_C6_C7_C8, universe=unlist(ls.test))

go_wgcna<- compareCluster(ls.test, fun = enrichGO, pvalueCutoff = 0.1, OrgDb = org.Hs.eg.db, keyType = "ENTREZID", ont = "ALL",
                          pAdjustMethod = "BH", universe=unlist(ls.test))

collectGarbage()


fx.labels1<- Vectorize(function(X) {
  s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 1 ))
  abbreviate(s, minlength = 30, dot = T)
})

fx.labels2<- Vectorize(function(X) {
  s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 2))
  s<-str_replace_all(s, "_", " ")
  abbreviate(s, minlength = 30, dot = T)
})


gpfx.dotplot<-function(collections, ht.fx, th.fx, labelfx, padj, rown, coln, X){
  X2<-mutate(X, Enrichment = parse_ratio(GeneRatio)/parse_ratio(BgRatio), 
             Score = -log(p.adjust, 10), 
             Collection = toupper(sapply(strsplit(str_replace(Description, "_", ":"), "[:]"), ht.fx, 1)))
  X3<-X2 %>% filter(Collection %in% collections & p.adjust <= padj) %>%
    mutate(Category = toupper(sapply(strsplit(str_replace(Description, "_", ":"), "[:]"), th.fx, 1 )))
  clusterProfiler::dotplot(X3, showCategory = dim(X3)[1], color = "Score", font.size = 5) +
    facet_wrap(vars(Collection), ncol = coln, nrow = rown, scales = "free") +
    #      aes(-log(qvalue,10), fct_reorder(ID, -log(qvalue,10)))) + #facet_wrap(vars(Cluster), nrow=1) +
    # geom_segment(aes(xend=0, yend = Description)) +
    #geom_point(aes(size = as.numeric(summary(strsplit(geneID, "/"))[,1]))) +
    #geom_point(aes(size = Count)) +
    #scale_color_viridis_c(guide=guide_colorbar(reverse=F)) +
    scale_color_gradientn(colours = rainbow(5)) +
    scale_y_discrete(labels = labelfx)} 

pcut2<-0.05
vc.Collections<-c("WP")
glol.w1<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 1, 1, msig_wgcna)

pcut2<-0.05
vc.Collections<-c("KEGG")
glol.w2<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 1, 1 ,msig_wgcna)

pcut2<-0.05
vc.Collections<-c( "HALLMARK")
glol.w3<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 1,1, msig_wgcna)

pcut2<-0.1
vc.Collections<-c("DESCARTES")
glol.w4<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 1, 1, msig_wgcna2)

ls.w.plots<-list(
  glol.w1 +theme(legend.text=element_text(size=5)) + theme(legend.direction = "horizontal", legend.position = "top"),
  glol.w2 +theme(legend.text=element_text(size=5)) + theme(legend.direction = "horizontal", legend.position = "top"),
 glol.w3 +theme(legend.text=element_text(size=5)) + theme(legend.direction = "horizontal", legend.position = "bottom")
)
vn.w.plots<-sapply(ls.w.plots, function(X){max(ggplot_build(X)$data[[1]] %>% filter(PANEL == 1) %>% dplyr::select(y))})


ggsave(paste0(outdir2,"/lol.wmsig.pdf"), grid.arrange(grobs = ls.w.plots,
                                                      #heights = c(9, 9, 9),
                                                      layout_matrix = rbind(c(1, 1, 2, 2),
                                                                            c(1, 1, 3, 3)
                                                                           # c(1, 1, 4, 4)
                                                      )), width = 8, height = 10,
       device = "pdf", units = "in")


fx.semsim2<-function(compClust,ontology, padj){pairwise_termsim(compClust %>% filter(ONTOLOGY == ontology & 
                                                                                       p.adjust <= padj ) %>% 
                                                                  arrange(p.adjust), method="JC", semData = dsem.sim.bp)}

ss.bp.wgcna<-fx.semsim2(go_wgcna, "BP", 0.01)
ss.cc.wgcna<-fx.semsim2(go_wgcna, "CC", 0.05)
ss.mf.wgcna<-fx.semsim2(go_wgcna, "MF", 0.05)


ego.bp.wgcna<-emapplot_cluster(ss.bp.wgcna, showCategory = min(200,dim(ss.bp.wgcna@compareClusterResult)[1]), 
                               cex_label_group = 1, label_style = "shadowtext", cex_line = 0.2, cex_category = 6,
                               label_format = 15, nWords = 4, min_edge = 0.1, repel = T) + 
  scale_fill_manual(values = as.character(unique(ss.bp.wgcna@compareClusterResult$Cluster)))

ego.cc.wgcna<-emapplot_cluster(ss.cc.wgcna, showCategory = min(200,dim(ss.cc.wgcna@compareClusterResult)[1]), 
                               cex_label_group = 1, label_style = "shadowtext", cex_line = 0.2, cex_category = 3,
                               label_format = 15, nWords = 4,  min_edge = 0.1, repel = T) + 
  scale_fill_manual(values = as.character(unique(ss.cc.wgcna@compareClusterResult$Cluster)))

ego.mf.wgcna<-emapplot_cluster(ss.mf.wgcna, showCategory = min(200,dim(ss.mf.wgcna@compareClusterResult)[1]), 
                               cex_label_group = 1, label_style = "shadowtext", cex_line = 0.2, cex_category = 3,
                               label_format = 15, nWords = 4,  min_edge = 0.1, repel = T) + 
  scale_fill_manual(values = as.character(unique(ss.mf.wgcna@compareClusterResult$Cluster)))


ls.ego.plots<-list(
  ego.bp.wgcna,
  ego.cc.wgcna
)
ggsave(paste0(outdir2,"/lego.wgo.pdf"), grid.arrange(grobs = ls.ego.plots, nrow=length(ls.ego.plots), heights = c(5,5)), device = "pdf", width=7.5, height=10, units = "in")

All.df.GENE<-All.df[!duplicated(All.df$SYMBOL),]
rownames(All.df.GENE)<-All.df.GENE$SYMBOL

tb.west<-as_tibble(na.omit(merge(df.west, All.df.GENE, by="row.names"))) %>% dplyr::select(West, ENTREZID) %>% dplyr::rename(Marker = West)
tb.chhabra<-as_tibble(na.omit(merge(df.chhabra, All.df.GENE, by="row.names"))) %>% dplyr::select(Chhabra, ENTREZID) %>% dplyr::rename(Marker = Chhabra)
tb.zhou<-as_tibble(na.omit(merge(df.zhou, All.df.GENE, by="row.names"))) %>% dplyr::select(Zhou, ENTREZID) %>% dplyr::rename(Marker = Zhou)
tb.xiang<-as_tibble(na.omit(merge(df.xiang, All.df.GENE, by="row.names"))) %>% dplyr::select(Xiang, ENTREZID) %>% dplyr::rename(Marker = Xiang)
tb.khan<-as_tibble(na.omit(merge(df.khan, All.df.GENE, by="row.names"))) %>% dplyr::select(Khan, ENTREZID) %>% dplyr::rename(Marker = Khan)
tb.hsiao<-as_tibble(na.omit(merge(df.hsiao, All.df.GENE, by="row.names"))) %>% dplyr::select(Hsiao, ENTREZID) %>% dplyr::rename(Marker = Hsiao)

tb_all<-bind_rows(tb.west, tb.chhabra, tb.zhou, tb.xiang, tb.hsiao, tb.khan) %>% mutate(gs_name = Marker, entrez_gene = as.integer(ENTREZID)) %>% select(gs_name, entrez_gene)

markers_wgcna<-compareCluster(ls.test, fun = enricher, pvalueCutoff = 0.1,
                              pAdjustMethod = "BH", TERM2GENE=tb_all, universe=unlist(ls.test, use.names = F))

ss.markers_w<-pairwise_termsim(markers_wgcna  %>% dplyr::filter(p.adjust < 0.1), method="JC",showCategory = 200)
gg.emap_w<-emapplot(ss.markers_w, pie = 'Count', min_edge = 0.01, cex_line =1, cex_category = 2,
                    label_style = "ggforce", shadowtext = F, layout = 'lgl',
                    group_category = F,  legend_n = 3, showCategory = 200, repel = T) +
  scale_fill_manual(values = unique(as.character(na.omit(droplevels(ss.markers_w@compareClusterResult$Cluster,c("")))))) 
ggsave(filename = paste0(outdir2,"/gg.emap_wmarkers1.pdf"), plot = gg.emap_w ,device = "pdf",width = 8, height = 10, units = "in")

#####

dev.off()
pdf(file="Eigengene.pdf",width = 5, height = 5)
plotEigengeneNetworks(orderMEs(cbind(MEs, datTraits3$PIGF_per_ug_RNA, datTraits3$hCG_per_ug_RNA)),
                      "Eigengene adjacency heatmap", plotDendrograms = F, letterSubPlots = F,
                      setLabels = colnames(MEs),colorLabels = T, plotAdjacency = F,
                      xLabelsAngle = 45, excludeGrey = T, signed = T, printAdjacency = F,cex.adjacency = 0.4)
dev.off()
#######
#require(data.table)

revc.ensByMod<-setNames(moduleColors, names(net$colors))
ls.mods<-list(all = table(revc.ensByMod), 
              PAR = table(revc.ensByMod[df$gene_name[vtf.PAR]]), 
              PAIR = table(revc.ensByMod[df$gene_name[vtf.PAIR]]), 
              oESC = table(revc.ensByMod[df$gene_name[vtf.oESC]]), 
              allESC = table(revc.ensByMod[df$gene_name[vtf.ESC2]]), 
              otherX = table(revc.ensByMod[df$gene_name[vtf.X & !vtf.ESC2]]), 
              Ypair = table(revc.ensByMod[df$gene_name[vtf.PAIRY]]) 
)


df.strata.tab<-t(bind_rows(ls.mods) %>% add_column(Strata = names(ls.mods)) %>% column_to_rownames('Strata'))
df.strata.tab[which(is.na(df.strata.tab),arr.ind = T)]<-0

df.noncat<-(-1)*(df.strata.tab-df.strata.tab[,1])
df.nonmod<-(-1)*(t(t(df.strata.tab)-colSums(df.strata.tab)))

#get all array index pairs, could also have used: expand.grid(seq(ncol(df.strata.tab)),seq(nrow(df.strata.tab)))[,2:1]
mn.arr.ind<-which(!is.na(df.strata.tab), arr.ind = T)

ls.mod.test<-apply(mn.arr.ind, MARGIN = 1, function(x) {
  ft<-fisher.test(matrix(c(df.strata.tab[x[1],x[2]],
                           df.noncat[x[1],x[2]],
                           df.nonmod[x[1],x[2]],
                           colSums(df.strata.tab)[1]-df.nonmod[x[1],x[2]]-df.noncat[x[1],x[2]]-df.strata.tab[x[1],x[2]]),
                         nrow = 2, ncol =2))
})

#retrieve statistic
mn.ftlp<-matrix(apply(mn.arr.ind, MARGIN = 1, function(y) {-log(ls.mod.test[[(y[1]+(y[2]-1)*colMaxs(mn.arr.ind)[1])]]$p.value,10)}),  
                nrow = nrow(df.strata.tab), ncol = ncol(df.strata.tab), byrow = F)
mn.ftor<-matrix(apply(mn.arr.ind, MARGIN = 1, function(y) {ls.mod.test[[(y[1]+(y[2]-1)*colMaxs(mn.arr.ind)[1])]]$estimate}),  
                nrow = nrow(df.strata.tab), ncol = ncol(df.strata.tab), byrow = F)
rownames(mn.ftlp)<-rownames(df.strata.tab)
rownames(mn.ftor)<-rownames(df.strata.tab)
colnames(mn.ftlp)<-colnames(df.strata.tab)
colnames(mn.ftor)<-colnames(df.strata.tab)

mn.ft.lpor<-mn.ftor
mn.ft.lpor[which(mn.ftlp<=(-log(0.05,10)),arr.ind = T)]<-0
gg.group_mod_map<-pheatmap(mn.ft.lpor, cluster_rows = F, cluster_cols = F, colorpanel(50, low = "white", high = "red"))

require(Rfast)
datKME<-signedKME(datExpr, MEs)
rank.datKME<-as.data.frame(colRanks(as.matrix(datKME)))
colnames(rank.datKME)<-colnames(datKME)
rownames(rank.datKME)<-rownames(datKME)


vc.names<-rownames(mn.ft.lpor)

require(RColorBrewer)
df.pheatanno_col2<-cbind(as.data.frame(mn.ft.lpor[,-1]), module = vc.names)
df.pheatanno_row2<-tb_HmXvstDiff %>% select(gene_name, Region, Reported) %>% pivot_wider() %>% as.data.frame() %>% column_to_rownames("gene_name")


colnames(datKME)<-str_replace_all(colnames(datKME), "kME", "")
names(vc.names)<-vc.names
ls.pheatcolor2 = list(Region = c(PAR = "black", nonPAR = "lightgrey"), module = vc.names, 
                      Reported =c("Escape" = "yellow1", "Inactive" = "grey27", "Variable" = "lightgreen", "Unknown" = "grey", "VarOther" = "lightseagreen"))
                      
pheatmap(datKME[rownames(df.pheatanno_row2),], annotation_row = df.pheatanno_row2 # |> dplyr::select(-c("C6XX", "C19XX", "C23XX", "rowname"))
         , annotation_col = df.pheatanno_col2, annotation_colors = ls.pheatcolor2,
         drop_levels = T, fontsize_row = 7, fontsize_col = 7, border_color = NA, filename = paste0(outdir2,"/groupModcorr2.pdf"), 
         angle_col = 45, cellwidth = 7, cellheight = 7)

# run once (take a while)
ADJ1<-abs(cor(datExpr,use="p"))^sft$powerEstimate
#memory.limit(size=157370)
Alldegrees1<-intramodularConnectivity(ADJ1, moduleColors)

#rownames(Alldegrees1)<-colnames(datExpr)

vc.restrictGenes=na.omit(revc.ensByMod[df$gene_name[(vtf.F | vtf.M) & (vtf.ESC2 | vtf.PAIRY)]])
df.plot<-cbind(module = vc.restrictGenes, 
               (Alldegrees1[which(colnames(datExpr) %in% names(vc.restrictGenes)),]),
               datKME[names(vc.restrictGenes),]^sft$powerEstimate)

colnames(df.plot)<-str_replace_all(colnames(df.plot), "kME", "")

df.plot2<-cbind(df.plot$module, as.data.frame(t(rbind(apply(df.plot, 1, function(X){as.numeric(c(X[2:5],X[X[1]]))})))))
colnames(df.plot2)<-c(colnames(df.plot)[1:5],"kME")

df.plot3<-df.plot2 %>% rownames_to_column(var = "Gene") %>% mutate(group = if_else(rownames(df.plot2) %in% df$gene_name[(vtf.PAR | vtf.PAIR | vtf.PAIRY)], Gene, "", ""))

gg.kim.kme<-ggplot(df.plot3, aes(x = kWithin, y = kME, col =module, 
                                 label = group)) + 
  geom_point() +  ylab(paste0("kME ^ ",sft$powerEstimate)) + 
  geom_text(hjust = 0, nudge_x = 3) + 
  scale_color_manual(values=sort(unique(df.plot2$module)))

ggsave(filename = "gg.kim.kme.pdf", plot = gg.kim.kme, width = 4, height = 5, units = "in")
ggsave(filename = "gg.group_mod_map.pdf", plot = gg.group_mod_map)

vc.ENSG<-t(matrix(unlist(strsplit(rownames(df), "[.]")),nrow=2))[,1]

#First Trimester: Gonzalez et al.
GSE109120<-read.table("GSE109120_genecounts.txt", header = T, sep = "\t", row.names = 1)
df.GSE109120<-cbind(df$gene_name, GSE109120[vc.ENSG,])
df.GSE109120<-df.GSE109120[which(!(df.GSE109120[,1] %in% unique(df.GSE109120[duplicated(df.GSE109120[,1]),1]))),]
rownames(df.GSE109120)<-df.GSE109120[,1]
df.GSE109120<-na.omit(df.GSE109120[-1])
datExpr.GSE109120<-as.data.frame(t(varianceStabilizingTransformation(as.matrix(df.GSE109120))))
datExpr.GSE109120.fem<-datExpr.GSE109120[c(grep("F", rownames(datExpr.GSE109120))),]
datExpr.GSE109120.male<-datExpr.GSE109120[c(grep("M", rownames(datExpr.GSE109120))),]
datExpr.GSE109120.rand2<-datExpr.GSE109120[lapply(c(1:length(rownames(datExpr.GSE109120))), "%%", 2) == 0,] 
#BAP2: Sheridan, et al.
#GSE119265<-as.data.frame(read.table(file = "../TB/GSE119265_coding.all.count.tsv", header = TRUE, row.names = 1, sep="\t"))
GSE119265<-as.data.frame(read.csv(file = "sherisan_eope_dataset.counts", header = TRUE, row.names = 1))
df.GSE119265<-cbind(df[rownames(GSE119265),"gene_name"], GSE119265[,c(7:length(colnames(GSE119265)))])
df.GSE119265<-df.GSE119265[rownames(df),] 
df.GSE119265<-df.GSE119265[which(!(df.GSE119265[,1] %in% unique(df.GSE119265[duplicated(df.GSE119265[,1]),1]))),]
rownames(df.GSE119265)<-df.GSE119265[,1]
df.GSE119265<-na.omit(df.GSE119265[-1])
datExpr.GSE119265<-as.data.frame(t(varianceStabilizingTransformation(as.matrix(df.GSE119265))))

#Term1: Buckberry, et al.
GSE77085<-as.data.frame(read.csv(file = "GSE77085_hg19_ucsc_gene_counts.csv", header = TRUE, row.names = 1))
datExpr.GSE77085<-as.data.frame(t(varianceStabilizingTransformation(as.matrix(GSE77085))))

#Term2: Deyssenroth, et al.
SRP095910 <- readRDS("RICHS_cpm.rds")
datExpr.SRP095910 <- as.data.frame(t(varianceStabilizingTransformation(round(SRP095910))))
RICHs.datTraits<-read.csv("RICHs.datTraits.csv",header = T,row.names = 1)

datExpr.SRP095910.male<-datExpr.SRP095910[rownames(RICHs.datTraits[RICHs.datTraits$Sex == "Male",]),]
datExpr.SRP095910.fem<-datExpr.SRP095910[rownames(RICHs.datTraits[RICHs.datTraits$Sex == "Female",]),]
datExpr.SRP095910.sga<-datExpr.SRP095910[rownames(RICHs.datTraits[RICHs.datTraits$F13.BW.group == "SGA",]),]
datExpr.SRP095910.lga<-datExpr.SRP095910[rownames(RICHs.datTraits[RICHs.datTraits$F13.BW.group == "LGA",]),]
datExpr.SRP095910.aga<-datExpr.SRP095910[rownames(RICHs.datTraits[RICHs.datTraits$F13.BW.group == "AGA",]),]
datExpr.SRP095910.rand2<-datExpr.SRP095910[rownames(RICHs.datTraits[lapply(as.numeric(rownames(RICHs.datTraits)), "%%", 2) == 0,]),]
datExpr.SRP095910.rand5<-datExpr.SRP095910[rownames(RICHs.datTraits[lapply(as.numeric(rownames(RICHs.datTraits)), "%%", 5) == 0,]),]

#datExpr.GSE109120<-as.data.frame(t(vst(as.matrix(gnn.GSE109120))))
datExpr.noSV<-as.data.frame(t(assay(vsd)))
colnames(datExpr.noSV)<-df[rownames(t(datExpr.noSV)),"gene_name"]

ls.testnets2<- list( BAP1b = datExpr.noSV,
                    BAP2 = datExpr.GSE119265,
                    FirstTrimester = datExpr.GSE109120,
                    FirstTri.male = datExpr.GSE109120.male,
                    FirstTri.fem  =datExpr.GSE109120.fem,
                    FirstTri.rand2 = datExpr.GSE109120.rand2,
                    Term1 =  datExpr.GSE77085,
                    Term2 =  datExpr.SRP095910,
                    T2.male = datExpr.SRP095910.male,
                    T2.fem = datExpr.SRP095910.fem,
                    T2.rand2 = datExpr.SRP095910.rand2,
                    T2.rand5 = datExpr.SRP095910.rand5,
                    T2.aga = datExpr.SRP095910.sga,
                    T2.sga = datExpr.SRP095910.lga,
                    T2.lga = datExpr.SRP095910.aga
)


for (i in c(1:length(ls.testnets2)))
{
  datExpr0 <- ls.testnets2[[i]]
  gsg <- goodSamplesGenes(datExpr0, verbose = 3)
  gsg$allOK
  
  if (!gsg$allOK){
    # Optionally, print the gene and sample names that were removed:
    if (sum(!gsg$goodGenes)>0) {
      printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")))
    }
    if (sum(!gsg$goodSamples)>0) {
      printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")))
    }
    
    # Remove the offending genes and samples from the data:
    datExpr0 <- datExpr0[gsg$goodSamples, gsg$goodGenes]
  }
  ls.testnets2[[i]]<-datExpr0
}


ls.testnets3<- list(BAP1.mix = datExpr[paste0("d", c(221,222,224:226,227,228, 230:232,323:328)), ],
                    BAP1.XO = datExpr[rownames(datTraits4[datTraits4$simple.XO ==1,]), ])

for (i in c(1:length(ls.testnets3)))
{
  datExpr0 <- ls.testnets3[[i]]
  gsg <- goodSamplesGenes(datExpr0, verbose = 3)
  gsg$allOK
  
  if (!gsg$allOK){
    # Optionally, print the gene and sample names that were removed:
    if (sum(!gsg$goodGenes)>0) {
      printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")))
    }
    if (sum(!gsg$goodSamples)>0) {
      printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")))
    }
    
    # Remove the offending genes and samples from the data:
    datExpr0 <- datExpr0[gsg$goodSamples, gsg$goodGenes]
  }
  ls.testnets3[[i]]<-datExpr0
}

multiExpr3 = list( BAP1 = list(data = datExpr), 
                   BAP1.rand2 = list(data = ls.testnets3[[1]]),
                   BAP1.XO = list(data = ls.testnets3[[2]]))

colorsBAP3 = moduleColors
multiColor3 = list(BAP1 = colorsBAP3);

system.time( {
  mp3 = modulePreservation(multiExpr3, 
                           multiColor3,
                           dataIsExpr = T,
                           networkType = "signed",
                           corFnc = "bicor",
                           referenceNetworks = 1,
                           maxModuleSize = 5000, 
                           nPermutations = 100,
                           randomSeed = 1,
                           quickCor = 0,
                           verbose = 3)
} )

mp<-mp3
df.Zsummary<-as.data.frame(matrix(nrow=1+length(table(colorsBAP3))))
for (i in c(1:length(ls.testnets3)))
{
  ref = 1
  test = i+1
  statsObs = cbind(mp$quality$observed[[ref]][[test]][, -1], mp$preservation$observed[[ref]][[test]][, -1])
  statsZ = cbind(mp$quality$Z[[ref]][[test]][, -1], mp$preservation$Z[[ref]][[test]][, -1]);
  print( cbind(statsObs[, c("medianRank.pres", "medianRank.qual")],
               signif(statsZ[, c("Zsummary.pres", "Zsummary.qual")], 2)))
  df.Zsummary<-as.data.frame(cbind(df.Zsummary, statsObs[, c("medianRank.pres", "medianRank.qual")],
                                   signif(statsZ[, c("Zsummary.pres", "Zsummary.qual")], 2)))
  
  # Module labels and module sizes are also contained in the results
  modColors = rownames(mp$preservation$observed[[ref]][[test]])
  moduleSizes = mp$preservation$Z[[ref]][[test]][, 1];
  # leave grey and gold modules out
  plotMods = !(modColors %in% c("grey", "gold"));
  # Text labels for points
  text = modColors[plotMods];
  # Auxiliary convenience variable
  plotData = cbind(mp$preservation$observed[[ref]][[test]][, 2], mp$preservation$Z[[ref]][[test]][, 2])
  
  mains = c("Preservation Median rank", "Preservation Zsummary");
  for (p in 1:2)
  {
    min = min(plotData[, p], na.rm = TRUE);
    max = max(plotData[, p], na.rm = TRUE);
    # Adjust ploting ranges appropriately
    if (p==2)
    {
      if (min > -max/10) min = -max/10
      ylim = c(min - 0.1 * (max-min), max + 0.1 * (max-min))
    } else
      ylim = c(max + 0.1 * (max-min), min - 0.1 * (max-min))
    plot(moduleSizes[plotMods], plotData[plotMods, p], col = 1, bg = modColors[plotMods], pch = 21,
         main = mains[p],
         cex = 2.4,
         ylab = mains[p], xlab = "Module size", log = "x",
         ylim = ylim,
         xlim = c(10, 5000), cex.lab = 1.2, cex.axis = 1.2, cex.main =1.4)
    labelPoints(moduleSizes[plotMods], plotData[plotMods, p], text, cex = 1, offs = 0.08);
    # For Zsummary, add threshold lines
    if (p==2)
    {
      abline(h=0)
      abline(h=2, col = "blue", lty = 2)
      abline(h=10, col = "darkgreen", lty = 2)
    }}
  
}

df.Zsummary3<-df.Zsummary[,-1]
colnames(df.Zsummary3)<-c(
  paste(rep(names(ls.testnets3), each = 4), colnames(df.Zsummary3), sep="_"))


pdf("Module_Pres1.pdf",width = 3, height = 5)
mn.Zs3<-as.matrix(cbind(
  df.Zsummary3[!rownames(df.Zsummary3) %in% c("gold"),grep("Zsummary.pres", colnames(df.Zsummary3))]))

barplot(rev(mn.Zs3[,2]-mn.Zs3[,1]),col=rev(rownames(mn.Zs3)), xlim=c(-100,50), 
        horiz=T, names.arg=substr(rev(rownames(mn.Zs3)),1,4), cex.names=0.5, width=rev(table(colorsBAP3)))

dev.off()


ls.testnets<-ls.testnets2
setLabels = c("BAP1", "BAP1b", "BAP2", "FirstTrimester", "FirstTri.male", "FirstTri.fem", "FirstTri.rand2", "Term1", "Term2",
              "T2.male", "T2.fem", "T2.rand2", "T2.rand5", "T2.sga", "T2.lga", "T2.aga")
multiExpr = list( BAP1 = list(data = datExpr), 
                  BAP1b = list(data = ls.testnets[[1]]),
                  BAP2 = list(data = ls.testnets[[2]]),
                  FirstTrimester = list(data = ls.testnets[[3]]),
                  FirstTri.male = list(data = ls.testnets[[4]]),
                  FirstTri.fem = list(data = ls.testnets[[5]]),
                  FirstTri.rand2 = list(data = ls.testnets[[6]]),
                  Term1 = list(data = ls.testnets[[7]]),
                  Term2 = list(data = ls.testnets[[8]]),
                  T2.male = list(data = ls.testnets[[9]]),
                  T2.fem = list(data = ls.testnets[[10]]),
                  T2.rand2 = list(data = ls.testnets[[11]]),
                  T2.rand5 = list(data = ls.testnets[[12]]),
                  T2.aga = list(data = ls.testnets[[13]]),
                  T2.sga = list(data = ls.testnets[[14]]),
                  T2.lga = list(data = ls.testnets[[15]])
)

colorsBAP = moduleColors
multiColor = list(BAP1 = colorsBAP);

system.time( {
  mp2 = modulePreservation(multiExpr, 
                           multiColor,
                           dataIsExpr = T,
                           networkType = "signed",
                           corFnc = "bicor",
                           referenceNetworks = 1,
                           maxModuleSize = 5000, 
                           nPermutations = 100,
                           randomSeed = 1,
                           quickCor = 0,
                           verbose = 3)
} )

mp<-mp2
df.Zsummary<-as.data.frame(matrix(nrow=1+length(table(colorsBAP))))
for (i in c(1:length(ls.testnets2)))
{
  ref = 1
  test = i+1
  statsObs = cbind(mp$quality$observed[[ref]][[test]][, -1], mp$preservation$observed[[ref]][[test]][, -1])
  statsZ = cbind(mp$quality$Z[[ref]][[test]][, -1], mp$preservation$Z[[ref]][[test]][, -1]);
  print( cbind(statsObs[, c("medianRank.pres", "medianRank.qual")],
               signif(statsZ[, c("Zsummary.pres", "Zsummary.qual")], 2)))
  df.Zsummary<-as.data.frame(cbind(df.Zsummary, statsObs[, c("medianRank.pres", "medianRank.qual")],
                                   signif(statsZ[, c("Zsummary.pres", "Zsummary.qual")], 2)))
  
  # Module labels and module sizes are also contained in the results
  modColors = rownames(mp$preservation$observed[[ref]][[test]])
  moduleSizes = mp$preservation$Z[[ref]][[test]][, 1];
  # leave grey and gold modules out
  plotMods = !(modColors %in% c("grey", "gold"));
  # Text labels for points
  text = modColors[plotMods];
  # Auxiliary convenience variable
  plotData = cbind(mp$preservation$observed[[ref]][[test]][, 2], mp$preservation$Z[[ref]][[test]][, 2])
  
  mains = c("Preservation Median rank", "Preservation Zsummary");
  for (p in 1:2)
  {
    min = min(plotData[, p], na.rm = TRUE);
    max = max(plotData[, p], na.rm = TRUE);
    # Adjust ploting ranges appropriately
    if (p==2)
    {
      if (min > -max/10) min = -max/10
      ylim = c(min - 0.1 * (max-min), max + 0.1 * (max-min))
    } else
      ylim = c(max + 0.1 * (max-min), min - 0.1 * (max-min))
    plot(moduleSizes[plotMods], plotData[plotMods, p], col = 1, bg = modColors[plotMods], pch = 21,
         main = mains[p],
         cex = 2.4,
         ylab = mains[p], xlab = "Module size", log = "x",
         ylim = ylim,
         xlim = c(10, 5000), cex.lab = 1.2, cex.axis = 1.2, cex.main =1.4)
    labelPoints(moduleSizes[plotMods], plotData[plotMods, p], text, cex = 1, offs = 0.08);
    # For Zsummary, add threshold lines
    if (p==2)
    {
      abline(h=0)
      abline(h=2, col = "blue", lty = 2)
      abline(h=10, col = "darkgreen", lty = 2)
    }}
  
}

df.Zsummary<-df.Zsummary[,-1]
colnames(df.Zsummary)<-c(
  paste(rep(names(ls.testnets), each = 4), colnames(df.Zsummary), sep="_"))
Breaks = c(0 ,seq(from = 2, to = 10, by = 2), seq(from = 15, 
                                                  to = max(as.matrix(df.Zsummary[,grep("Zsummary.pres", colnames(df.Zsummary[,c(5:ncol(df.Zsummary))]))+4])), 
                                                  by = 5))

mn.Zs<-as.matrix(cbind(df.Zsummary[!rownames(df.Zsummary) %in% c("gold"),grep("Zsummary.pres", colnames(df.Zsummary))]))

sizeGrWindow(10,10)
par(mar = c(0, 0, 0, 0))

mn.Zs2<-mn.Zs[,-1]


dev.off()

vc.colour<-as.vector(names(table(colorsBAP)))
names(vc.colour)<-names(table(colorsBAP))
ls.modpres_color<-list(colour = vc.colour)
df.modpres_row<-as.data.frame(cbind(
  vc.colour))
colnames(df.modpres_row)<-c("colour") #"number")

  
pheatmap(mn.Zs2, cellnote = round(mn.Zs2,digits = 1), display_numbers = T, number_format = "%.0f", 
         cluster_rows = F, cluster_cols = F, border_color = NA, cellwidth = 12, cellheight = 12, 
         color =colorpanel(length(Breaks)-1, "white", "orange", "red"), breaks=Breaks,
         annotation_row = df.modpres_row,  annotation_colors = ls.modpres_color, annotation_legend = F, annotation_names_row = T,
        labels_row = paste(rownames(df.Zsummary[!rownames(df.Zsummary) %in% c("gold"),]), table(multiColor3), sep=":  "),
        filename=paste0(outdir2,"/hm_TBLs_mp_new.pdf")
        )

unique_clusters <- go_wgcna |> 
  as.data.frame() |> 
  pull(Cluster) |> 
  unique() |> 
  as.character()

go_wgcna |> 
  as.data.frame() |> 
  mutate(
    object_file = "go_wgcna",
    ID = sprintf("GO%s_%s", ONTOLOGY, ID)
  ) |> 
  dplyr::select(-ONTOLOGY) |> 
  rbind(
    msig_wgcna |> 
      as.data.frame() |> 
      mutate(object_file = "msig_wgcna")
  ) |> 
  rbind(
    msig_wgcna2 |> 
      as.data.frame() |> 
      mutate(object_file = "msig2_wgcna")
  ) |> 
  pivot_wider(
    id_cols = c(ID, Description, object_file), 
    names_from = Cluster, 
    values_from = c(GeneRatio, BgRatio, p.adjust),
    names_glue = "{Cluster}_{.value}",
    names_sort = T
  ) |>
  dplyr::select(
    ID, 
    Description, 
    # object_file, 
    starts_with(sprintf("%s_",unique_clusters))
  ) |> 
  arrange(
    !!as.symbol(sprintf("%s_p.adjust",unique_clusters))
  ) |>
  write_csv("Table_S4.csv")