WGCNA_NCCs_v05_pass.R

rm(gene_sum_exp)
rm(sum_exp)
saveRDS(df, file=paste0(outdir,"/df.rds"))
df<-readRDS(file=paste0(outdir,"/df.rds"))
vsd3<-readRDS(file=paste0(outdir,"/vsd3.rds"))
library(WGCNA)
library(tidyverse)
require(DESeq2)
tf.gseaset<-T
catvars<-c("Xa","donor","simple")
dev.off()

if (all(df[,c(grep(pattern = "^d", colnames(df)))] == assay(vsd3)))
{
  if(tf.gseaset){
    ensembl.vst<-df[,c(colData(vsd3) |> as.data.frame() |> rownames())]
    ENSG.vst<-ensembl.vst
    rownames(ENSG.vst)<-df$ENSEMBL
    datExpr0<-t(ENSG.vst[df.gsea2$ENSEMBL,])
  } else{datExpr0 <- as.data.frame(t(assay(vsd3)))}


  gsg <- goodSamplesGenes(datExpr0, verbose = 3)
  gsg$allOK
  if (!gsg$allOK){
    # Optionally, print the gene and sample names that were removed:
    if (sum(!gsg$goodGenes)>0) {
      printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")))
    }
    if (sum(!gsg$goodSamples)>0) {
      printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")))
    }

    # Remove the offending genes and samples from the data:
    datExpr0 <- datExpr0[gsg$goodSamples, gsg$goodGenes]
  }
  datExpr<-datExpr0
  if(tf.gseaset){colnames(datExpr)<-df.gsea2$SYMBOL
  } else{
    colnames(datExpr)<-df$gene_name}
}

sampleTree <- hclust(dist(datExpr), method = "average")
#sizeGrWindow(12,9)
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,cex.axis = 1.5, cex.main = 2)

# no need to remove samples

df.PAX7SOX10_rna<-read.csv("./MS4_IF_percent_PAX7SOX10.csv") |>
  dplyr::filter(Registry_ID != "") |>
  rename(names = "Registry_ID",
         Percent.PAX7SOX10 = "perc_PAX7SOX10") |>
  dplyr::select(Percent.PAX7SOX10, names)
datTraits <- as.data.frame(colData(vsd3)[rownames(datExpr),catvars])
datTraits2<-binarizeCategoricalColumns(datTraits, includeLevelVsAll = T, nameForAll = "",
                                       includePairwise = F, dropUninformative = T, dropFirstLevelVsAll = F)
rownames(datTraits2)<-rownames(datTraits)
datTraits2<-datTraits2 |> rownames_to_column("names") |> left_join(df.PAX7SOX10_rna, by = "names" ) |> column_to_rownames("names")

# Re-cluster samples
sampleTree2 <- hclust(dist(datExpr), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
traitColors = numbers2colors(datTraits2, signed = F)

options(stringsAsFactors = FALSE)
collectGarbage()

# Choose a set of soft-thresholding powers
powers = c(seq(from = 1, to=20, by=1))
# Call the network topology analysis function
nwt = "signed hybrid"
maxP<-0
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5, networkType = nwt,
                        corFnc = bicor, dataIsExpr = T)#, corOptions = list(maxPOutliers =maxP))
sft$powerEstimate

# Plot the sample dendrogram and the colors underneath.
pdf(paste0(outdir,"/NCCs_Dendro_mod.pdf"))
plotDendroAndColors(sampleTree2, traitColors,
                    groupLabels = names(datTraits2),
                    main = "Sample dendrogram and trait heatmap")
dev.off()

# Plot the results:
require(ggplot2)
ggplot(sft$fitIndices, aes(x=Power, y=SFT.R.sq, label=Power)) + geom_text(color="red") + geom_hline(yintercept=.85, color="red") + labs(title="Scale independence")

ggplot(sft$fitIndices, aes(x=Power, y=`mean.k.`, label=Power)) + geom_text(color="red") + labs(title="Mean connectivity")

tf.deepsplit<-T
vn.mergeCut<-0.25
outdir2<-paste0(outdir,"/power_",sft$powerEstimate, tf.gseaset,"_", nwt, "_deep_", tf.deepsplit, "_cut_", vn.mergeCut)#, "_maxP_", maxP)
dir.create(outdir2, showWarnings = FALSE)
net <- blockwiseModules(datExpr, power = sft$powerEstimate,
                        networkType = "signed hybrid", corType = "bicor",
                        TOMType = "signed", minModuleSize = 30,
                        maxBlockSize=30000, deepSplit = tf.deepsplit,
                        reassignThreshold = 0,
                        mergeCutHeight = vn.mergeCut,
                        numericLabels = TRUE, pamRespectsDendro = FALSE,
                        saveTOMs = F,
                        #saveTOMFileBase = paste0(outdir2,"/TOMTOM_",sft$powerEstimate),
                        verbose = 3)


saveRDS(net, file = paste0(outdir2,"/net.rds"))
net<-readRDS(file = paste0(outdir2,"/net.rds"))
# open a graphics window
#sizeGrWindow(12, 9)
# Convert labels to colors for plotting

require(scales)
vc.wgcna_col1<-c("black", "blue", "yellow", "red", "brown", "turquoise", "purple")
vc.wgcna_col2<-c("green", "navy", "orange", "salmon", "peru", "cyan", "magenta")
vc.wgcna_col3<-c("olivedrab", "skyblue", "khaki", "tomato", "tan", "darkcyan", "pink")
vc.wgcna_col4<-c("darkkhaki", "royalblue", "bisque", "maroon", "slategrey", "lightcyan", "thistle")
vc.wgcna_col5<-c("palegreen", "slateblue", "beige",  "lightsalmon", "sienna", "aquamarine", "violet")

mergedColors = labels2colors(net$colors, colorSeq = c(vc.wgcna_col1,
                                                       vc.wgcna_col2,
                                                       vc.wgcna_col3,
                                                       vc.wgcna_col4,
                                                       vc.wgcna_col5))


# Plot the dendrogram and the module colors underneath
pdf(paste0(outdir2,"/NCCs_Cluster_dendro.pdf"))
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
                    "Module colors",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)
dev.off()
moduleLabels = net$colors
moduleColors = mergedColors

MEs = net$MEs
geneTree = net$dendrograms[[1]]
#save(MEs, moduleLabels, moduleColors, geneTree,
#     file = "NCC-networkConstruction-auto.RData")
grey <- df[which(moduleColors=="grey"),]
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)

 # Recalculate MEs with color labels

MEs0 = moduleEigengenes(datExpr, paste0("_",moduleLabels,"_",moduleColors))$eigengenes
vc.ME_labcol<-setNames(str_split(names(MEs0), "_", simplify = T)[,2],
                       str_split(names(MEs0), "_", simplify = T)[,3])
vc.labcol_ME<-setNames(names(vc.ME_labcol), as.numeric(vc.ME_labcol))
names(MEs0)<-paste0("ME",names(vc.ME_labcol))
MEs = orderMEs(MEs0)

# A data frame with module eigengenes can be obtained as follows
MEsAutomatic=net$MEs
moduleLabelsAutomatic=net$colors
moduleColorsAutomatic = mergedColors

vc.PAX7SOX10 = as.data.frame(datTraits2$Percent.PAX7SOX10)
GS.PAX7SOX10=as.numeric(cor(datExpr,vc.PAX7SOX10,use="pairwise.complete.obs"))

vc.XO = as.data.frame(datTraits2$simple.XO)
GS.XO=as.numeric(cor(datExpr,vc.XO,use="pairwise.complete.obs"))

vc.Euploid <- as.data.frame(datTraits2$simple.EU)
GS.Euploid=as.numeric(cor(datExpr,vc.Euploid,use="pairwise.complete.obs"))


df.GS.Markers<-as.data.frame(apply(df.Markers.med[rownames(datExpr),], 2, function(X) {as.numeric(cor(datExpr,X,use="pairwise.complete.obs"))}))
mn.GS.Markers<-numbers2colors(df.GS.Markers, signed=T, lim=c(-1,1))
colnames(mn.GS.Markers)<-colnames(df.GS.Markers)

#vtf.ESC2 is identical to vtf.ESC, except that it includes all PAR and PAIR genes, irrespective of whether they escape/are expressed
vtf.ESC2<-(vtf.oESC | vtf.PAR | vtf.PAIR)

df.escape<-as.data.frame(cbind(colMeans(mn.mednorm[na.omit(df$gene_name[vtf.ESC2]),]),
                               colMeans(mn.mednorm[na.omit(df$gene_name[vtf.PAR]),]),
                               colMeans(mn.mednorm[na.omit(df$gene_name[vtf.oESC]),]),
                               colMeans(mn.mednorm[na.omit(df$gene_name[vtf.PAIR2]),])))

colnames(df.escape)<-c("All", "PAR","OtherEsc", #"PairX", "PairY",
                       "Pair")
df.GS.escape<-as.data.frame(apply(df.escape[rownames(datExpr),], 2, function(X) {as.numeric(cor(datExpr,X,use="pairwise.complete.obs"))}))

mn.GS.escape<-numbers2colors(df.GS.escape, signed=T)
colnames(mn.GS.escape)<-colnames(df.escape)

datTraits3<-cbind(datTraits2,
                  df.escape[rownames(datExpr),], #[,-c(4,5)],
                  df.Markers.med[rownames(datExpr),c(grep(".[KDEGs/C/L/F/H]$", colnames(df.Markers.med)))])

require(Hmisc)
require(corrplot)

#remove Xa and donor columns, re-ordered columns
datTraits4 <- datTraits3[, -c(grep("simple", colnames(datTraits3)),
                                grep("Xa", colnames(datTraits3)),
                                grep("donor", colnames(datTraits3)))]

cortype = "pearson"
pdf(paste0(outdir2,"/",cortype,"_Corrplot4.pdf"))
cor_5<- rcorr(as.matrix(datTraits4),type = cortype)
M <- cor_5$r
p_mat <- cor_5$P
p_mat[is.na(p_mat)] <- 0
corrplot(M, type = "upper", order = "original", method="square",
         p.mat = p_mat, sig.level = 0.05, insig = "blank",
         col=blueWhiteRed(100), tl.col="black", tl.srt = 60)
dev.off()

df.wald<-df.gsea2
if(!tf.gseaset) {
  require(preprocessCore)
  vc.Wald<-c("fXO_stat", "mXO1_stat", "mXO2_stat")
  mn.stat_qn2<-normalize.quantiles(matrix(unlist(df[,vc.Wald]), ncol = length(vc.Wald)))
  colnames(mn.stat_qn2)<-vc.Wald
  df.wald<-as.data.frame(cbind(mn.stat_qn2,
                               "aveXO_stat" = rowMeans(mn.stat_qn2[,c(1:3)])),
                               "avemXO_stat" = rowMeans(mn.stat_qn2[,c(2:3)])) %>% add_column("gene_name" = df$gene_name, .before = 1)
  rownames(df.wald)<-df$ENSEMBL
}
df.GS.wald<-as.data.frame(apply(df.wald[rownames(datExpr),], 2, function(X) {as.numeric(cor(datExpr,X,use="pairwise.complete.obs"))}))


pdf(paste0(outdir2,"/DendroTrait_mod3.pdf"))
blocknumber=1
mColors=moduleColorsAutomatic[net$blockGenes[[blocknumber]]]
datColors=data.frame(mColors,
                     XO=numbers2colors(GS.XO, signed=T),
                     fXO=numbers2colors(df.wald$fXO, signed=T, lim = c(-5,5)),
                     mXO1=numbers2colors(df.wald$mXO1, signed=T, lim = c(-5, 5)),
                     mXO2=numbers2colors(df.wald$mXO2, signed=T, lim = c(-5,5)),
                     avemXO=numbers2colors(df.wald$avemXO, signed=T, lim = c(-5,5)),
                     mn.GS.Markers,
                     Euploid=numbers2colors(GS.Euploid, signed=T),
                     PAX7SOX10=numbers2colors(GS.PAX7SOX10, signed=T))
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[blocknumber]]
                    ,colors=datColors,
                    groupLabels=names(datColors),
                    dendroLabels=FALSE, hang=0.03,addGuide=TRUE,guideHang=0.05)
dev.off()

cortype<-"pearson"
moduleTraitCor = cor(MEs, datTraits4[,-grep("Hsiao",colnames(datTraits4))], use = "pairwise.complete.obs", method = cortype)
moduleTraitPvalue = corPvalueFisher(moduleTraitCor, nSamples)

pdf(paste0(outdir2,"/ModuleTrait_corr_large3F.pdf"))
#sizeGrWindow(10,6)
# Will display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 1), "\n(",
                   signif(-log(moduleTraitPvalue, 10), 2)
                   , ")"
                   , sep = "")
dim(textMatrix) = dim(moduleTraitCor)
textMatrix[moduleTraitPvalue>0.1]<-""
colnames(textMatrix)<-colnames(moduleTraitCor)
rownames(textMatrix)<-rownames(moduleTraitCor)
#par(mar = c(6, 35, 3, 3))
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor[head(names(MEs),-1),],
               xLabels = colnames(moduleTraitCor),
               yLabels = head(names(MEs),-1),
               ySymbols = vc.ME_labcol[head(gsub("^ME","",names(MEs)),-1)],
               yColorLabels = T,
               colors = blueWhiteRed(50),
               textMatrix = textMatrix[head(names(MEs),-1),],
               setStdMargins = F,
               cex.text = 0.5,
               zlim = c(-1,1),
               cex.lab.x = 0.6,
               main = paste0("Module-trait ",cortype))
dev.off()

### ENRICHMENT TESTING ###
require(msigdbr)
require(clusterProfiler)
require(org.Hs.eg.db)
library(DOSE)

collectGarbage()

vc.ensByMod<-setNames(names(net$colors), moduleLabels)
ls.ensByMod <- split(vc.ensByMod, names(vc.ensByMod))

if(!tf.gseaset) {
  ls.test<-lapply(ls.ensByMod, function(X) unique(df.gsea2[df.gsea2$SYMBOL %in% X,"ENTREZID"]))
} else {ls.test<-lapply(ls.ensByMod, function(X) unique(All.df[which(All.df$SYMBOL %in% X),"ENTREZID"]))}

if(file.exists(paste0(outdir2,"/msig_wgcna.rds")))
  {msig_wgcna<-readRDS(paste0(outdir2,"/msig_wgcna.rds"))
}
if(!file.exists(paste0(outdir2,"/msig_wgcna.rds"))) {
msig_wgcna<- compareCluster(ls.test, fun = enricher, pvalueCutoff = 0.1, readable = T,
                            pAdjustMethod = "BH", TERM2GENE=msig_Hall_C1_CP_TFT_MOD_HP_C8, universe=unlist(ls.test))
saveRDS(msig_wgcna, file = paste0(outdir2,"/msig_wgcna.rds"))
}

collectGarbage()

fx.labels1<- Vectorize(function(X) {
  s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 1 ))
  abbreviate(s, minlength = 25, dot = T)
})

fx.labels2<- Vectorize(function(X) {
  s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 2))
  s<-str_replace_all(s, "_", " ")
  abbreviate(s, minlength = 25, dot = T)
})

fx.labels3<- Vectorize(function(X) {
  #s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 2))
  #s<-str_replace_all(s, "_", " ")
  abbreviate(X, minlength = 25, dot = T)
})

require(viridis)
require(grid)
require(gridExtra)

gpfx.dotplot<-function(collections, ht.fx, th.fx, labelfx, padj, rown, coln, X){
  X2<-mutate(X, Enrichment = parse_ratio(GeneRatio)/parse_ratio(BgRatio),
             Score = pmin(-log(p.adjust, 10), 30),
             Collection = toupper(sapply(strsplit(str_replace(Description, "_", ":"), "[:]"), ht.fx, 1)))
  X3<-X2 %>% filter(Collection %in% collections & p.adjust <= padj) %>%
    mutate(Category = toupper(sapply(strsplit(str_replace(Description, "_", ":"), "[:]"), th.fx, 1 )))
  clusterProfiler::dotplot(X3, showCategory = dim(X3)[1], color = "Score", font.size = 6) +
    facet_wrap(vars(Collection), ncol = coln, nrow = rown, scales = "free_y") +
    scale_color_gradientn(colours = turbo(5)) +
    scale_y_discrete(labels = labelfx, position = "right")}

require(viridis)
pcut2<-0.1
vc.Collections<-c("HALLMARK","KEGG")
glol.w1<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 2, 1, msig_wgcna)
ggsave(paste0(outdir2,"/supp_wgcna.pdf"), plot = glol.w1 + theme(legend.position = "top", legend.direction = "horizontal"), width = 4, height =7)

pcut2<-0.1
vc.Collections<-c("HP")
glol.w2<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 1, 1, msig_wgcna
                      %>%filter(Description %in% toupper(vc.ORPHA881))) +
  theme(text = element_text(size = 6), legend.position="top", legend.box = "horizontal")
ggsave(paste0(outdir2,"/glol.wgcna.hp.pdf"), plot = glol.w2,width = 4, height = 5)

pcut2<-0.01
vc.Collections<-c("WP")
glol.w3<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 1, 1, msig_wgcna)

pcut2<-0.01
vc.Collections<-c("REACTOME")
glol.w4<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 1, 1 ,msig_wgcna)

pcut2<-0.1
vc.Collections<-c( "TARGET_GENES")
glol.w5<-gpfx.dotplot(vc.Collections, tail, head, fx.labels1, pcut2, 1,1, msig_wgcna)

pcut2<-0.1
vc.Collections<-c("HALLMARK")
glol.w6<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 1,1, msig_wgcna)

ls.w.plots<-list(
  #glol.w4 +theme(legend.position="top", legend.box = "horizontal", legend.text=element_text(size=5)),
  glol.w2 +theme(legend.text=element_text(size=5)),
  glol.w1 +guides(color = 'none', size = 'none')
)

vn.w.plots<-sapply(ls.w.plots, function(X){max(ggplot_build(X)$data[[1]] %>% filter(PANEL == 1) %>% dplyr::select(y))})


ggsave(paste0(outdir2,"/lol.wmsig.pdf"), grid.arrange(grobs = ls.w.plots#,
                                                      #heights = c(5, 9, 5),
                                                      #layout_matrix = rbind(c(2, 2, 1, 1),
                                                      #                      c(2, 2, 3, 3),
                                                      #                      c(2, 2, 3, 3))
                                                      ), width = 10, height = 20,
       device = "pdf", units = "in")

if(file.exists(paste0(outdir2,"/go_wgcna.rds")))
{go_wgcna<-readRDS(paste0(outdir2,"/go_wgcna.rds"))
}
if(!file.exists(paste0(outdir2,"/go_wgcna.rds"))) {
go_wgcna<- compareCluster(ls.test, fun = enrichGO, pvalueCutoff = 0.1, pAdjustMethod = "BH",
                          universe=df.gsea2$ENTREZID, OrgDb = org.Hs.eg.db, ont = "ALL",readable = T)
saveRDS(go_wgcna, paste0(outdir2,"/go_wgcna.rds"))
}


gpfx.goplot<-function(ontology, ht.fx, th.fx, labelfx, padj, rown, coln, X){
  X2<-mutate(X, Enrichment = parse_ratio(GeneRatio)/parse_ratio(BgRatio),
             Score = -log(p.adjust, 10))#,
  X3<-X2 %>% filter(ONTOLOGY %in% c("BP") & p.adjust <= pcut2) #%>%
  clusterProfiler::dotplot(X3, showCategory = dim(X3)[1], color = "Score", font.size = 5) +
    scale_color_gradientn(colours = turbo(5)) +
    scale_y_discrete(labels = labelfx)}

pcut2<-0.0001
vc.Collections<-c("BP")
glol.w5<-gpfx.goplot(vc.Collections, head, tail, fx.labels3, pcut2, 1, 1, go_wgcna)


#
require(GOSemSim)
require(enrichplot)
fx.semsim2<-function(compClust,ontology, padj){pairwise_termsim(compClust %>% filter(ONTOLOGY == ontology &
                                                                                       p.adjust <= padj ) %>%
                                                                  arrange(p.adjust), method="JC", semData = dsem.sim.bp)}

ss.bp.wgcna2<-fx.semsim2(go_wgcna |>
                        filter(Cluster %in% as.character(c(1,6,7,13,19,20,26,27,3,5,8,9,11,15)))
                           , "BP", 0.01)
ss.cc.wgcna<-fx.semsim2(go_wgcna, "CC", 0.05)
ss.mf.wgcna<-fx.semsim2(go_wgcna, "MF", 0.05)

fx.semsim3<-function(compClust,collection, padj){pairwise_termsim(compClust %>% filter(str_detect(Description, collection) &
                                                                                       p.adjust <= padj ) %>% arrange(p.adjust) %>%
                mutate(Description = str_to_sentence(toupper(sapply(strsplit(str_replace(Description, "_", ":"), "[:]"), tail, 1 ))))
                                                                  , method="JC", semData = dsem.sim.bp)}
ss.HP.wgcna<-fx.semsim3(msig_wgcna %>%
                     filter(Description %in% toupper(vc.ORPHA881))
                       ,"HP" , 0.1)
ego.HP.wgcna<-emapplot(ss.HP.wgcna, showCategory = dim(ss.HP.wgcna@compareClusterResult)[1],
                               ellipse_style = "polygon", ellipse_pro = 0.9, alpha =0.2, shadowtext = F,
                               cex_label_group = 1, label_style = "shadowtext", cex_category = 1, pie =  "Count",
                               cex_pie2axis =1, #nWords = 10, label_format = 20,
                               min_edge = 0.1, repel = T, #group_category= T,
                               legend_n =4) +
  scale_fill_manual(values = vc.labcol_ME[as.character(unique(ss.HP.wgcna@compareClusterResult$Cluster))])
ggsave(paste0(outdir2,"/wgcna_HP_emap.pdf"),device = 'pdf', width = 6, height = 6, unit = "in")

ego.bp.wgcna<-emapplot_cluster(ss.bp.wgcna2, showCategory = dim(ss.bp.wgcna2@compareClusterResult)[1],
                             ellipse_style = "polygon", ellipse_pro = 0.9, alpha =0.2, shadowtext = F,
                               cex_label_group = 1, label_style = "shadowtext", cex_category = 1, pie =  "Count",
                                cex_pie2axis =10, #nWords = 10, label_format = 20,
                              min_edge = 0.05, repel = T, #group_category= T,
                              legend_n =4) +
  scale_fill_manual(values = vc.labcol_ME[as.character(unique(ss.bp.wgcna2@compareClusterResult$Cluster))])
ggsave(paste0(outdir2,"/wgcna_bp2_emap.pdf"),device = 'pdf', width = 6, height = 6, unit = "in")


pdf(file=paste0(outdir2,"/Eigengene3.pdf"),width = 4, height = 4)
MEsPAX7SOX10<-orderMEs(cbind(MEs, datTraits3$Percent.PAX7SOX10))
plotEigengeneNetworks(MEsPAX7SOX10,plotHeatmaps = T,setMargins = T,greyLabel = 0,
                        "Eigengene adjacency heatmap", plotDendrograms = F, letterSubPlots = F,
                      ySymbols = vc.ME_labcol[gsub("^ME","",names(MEsPAX7SOX10))],
                      colorLabels = T, plotAdjacency = F,
                      xLabelsAngle = 45, excludeGrey = T, signed = T, printAdjacency = F,cex.adjacency = 0.4)
dev.off()


require(Rfast)
datKME<-signedKME(datExpr, MEs)
rank.datKME<-as.data.frame(colRanks(as.matrix(datKME)))
colnames(rank.datKME)<-colnames(datKME)
rownames(rank.datKME)<-rownames(datKME)


revc.ensByMod<-setNames(moduleColors, names(net$colors))
ls.mods<-list(all = table(revc.ensByMod),
              PAR = table(revc.ensByMod[df$gene_name[vtf.PAR]]),
              PAIR = table(revc.ensByMod[df$gene_name[vtf.PAIR2]]),
              oESC = table(revc.ensByMod[df$gene_name[vtf.oESC]]),
              allESC = table(revc.ensByMod[df$gene_name[vtf.ESC2]]),
              onlyX = table(revc.ensByMod[df$gene_name[vtf.X]]),
              onlyY = table(revc.ensByMod[df$gene_name[vtf.Y]])
)


df.strata.tab<-t(bind_rows(ls.mods) %>% add_column(Strata = names(ls.mods)) %>% column_to_rownames('Strata'))
df.strata.tab[which(is.na(df.strata.tab),arr.ind = T)]<-0

df.noncat<-(-1)*(df.strata.tab-df.strata.tab[,1])
df.nonmod<-(-1)*(t(t(df.strata.tab)-colSums(df.strata.tab)))

#get all array index pairs, could also have used: expand.grid(seq(ncol(df.strata.tab)),seq(nrow(df.strata.tab)))[,2:1]
mn.arr.ind<-which(!is.na(df.strata.tab), arr.ind = T)

ls.mod.test<-apply(mn.arr.ind, MARGIN = 1, function(x) {
  ft<-fisher.test(matrix(c(df.strata.tab[x[1],x[2]],
                           df.noncat[x[1],x[2]],
                           df.nonmod[x[1],x[2]],
                           colSums(df.strata.tab)[1]-df.nonmod[x[1],x[2]]-df.noncat[x[1],x[2]]-df.strata.tab[x[1],x[2]]),
                         nrow = 2, ncol =2))
})

#retrieve statistic
mn.ftlp<-matrix(apply(mn.arr.ind, MARGIN = 1, function(y) {-log(ls.mod.test[[(y[1]+(y[2]-1)*colMaxs(mn.arr.ind)[1])]]$p.value,10)}),
                nrow = nrow(df.strata.tab), ncol = ncol(df.strata.tab), byrow = F)
mn.ftor<-matrix(apply(mn.arr.ind, MARGIN = 1, function(y) {ls.mod.test[[(y[1]+(y[2]-1)*colMaxs(mn.arr.ind)[1])]]$estimate}),
                nrow = nrow(df.strata.tab), ncol = ncol(df.strata.tab), byrow = F)
rownames(mn.ftlp)<-rownames(df.strata.tab)
rownames(mn.ftor)<-rownames(df.strata.tab)
colnames(mn.ftlp)<-colnames(df.strata.tab)
colnames(mn.ftor)<-colnames(df.strata.tab)

#require(pheatmap)
#require(gplots)
mn.ft.lpor<-mn.ftlp
mn.ft.lpor[which(mn.ftlp<=(-log(0.05,10)),arr.ind = T)]<-0

vc.names<-rownames(mn.ft.lpor)

require(RColorBrewer)
require(pheatmap)
df.pheatanno_col2<-cbind(as.data.frame(mn.ft.lpor[,-1]), module = vc.ME_labcol[vc.names])
df.pheatanno_row2<-tb_Hm_allelicX %>% dplyr::select(gene_name, Region, Reported) %>%
  unique() %>% as.data.frame() %>% filter(!is.na(gene_name)) %>% column_to_rownames("gene_name") %>%
  mutate(Region = case_when(is.na(Region) ~ "nonPAR", T ~ Region))


colnames(datKME)<-str_replace_all(colnames(datKME), "kME", "")
names(vc.names)<-vc.names
ls.pheatcolor2 = list(Region = c(PAR = "black", nonPAR = "lightgrey"), module = vc.labcol_ME, Reported = col.pal3[names(col.pal3) != "Unknown"])
colnames(datKME)<-vc.ME_labcol[colnames(datKME)]
colnames(datKME)<-vc.labcol_ME[colnames(datKME)]


require(circlize)
require(ComplexHeatmap)
fs = 6

hm.modcorr<-Heatmap(datKME[rownames(df.pheatanno_row2),],
          col = colorRamp2(c(-1, 0, 1), c("blue", "white","red")),
          cluster_rows = T, cluster_columns = T,
          column_names_gp = grid::gpar(fontsize = fs),
          row_names_gp = grid::gpar(fontsize = fs),
         top_annotation = HeatmapAnnotation(moduleL = anno_text(vc.ME_labcol[colnames(datKME)],
                                                                gp = gpar(fontsize = fs)),
                                            module = vc.ME_labcol[colnames(datKME)], col = ls.pheatcolor2,
                                             PAR = anno_barplot(df.pheatanno_col2[colnames(datKME),"PAR"], height = unit(0.5, "cm")),
                                             PAIR = anno_barplot(df.pheatanno_col2[colnames(datKME),"PAIR"], height = unit(0.5, "cm")),
                                             oESC = anno_barplot(df.pheatanno_col2[colnames(datKME),"oESC"], height = unit(0.5, "cm")),
                                             ALL = anno_barplot(df.pheatanno_col2[colnames(datKME),"allESC"], height = unit(0.5, "cm")),
                                       which = "column", height = unit(2.5, "cm")),
          bottom_annotation = HeatmapAnnotation(module = anno_text(vc.ME_labcol[colnames(datKME)],
                                                gp = gpar(fontsize = fs))),
                                         # col = ls.pheatcolor2),
          left_annotation = rowAnnotation(Reported = df.pheatanno_row2$Reported,
                                            Region = df.pheatanno_row2$Region,
                                          module = vc.ME_labcol[revc.ensByMod[rownames(df.pheatanno_row2)]],
                                           col = ls.pheatcolor2),
         width = unit(5, "cm"), height = unit(11, "cm")
          )
  pdf(file=paste0(outdir2,"/modcorr2.pdf"), pointsize = fs)
  draw(hm.modcorr)
  dev.off()

# run once (take a while)
ADJ1<-abs(cor(datExpr,use="p"))^sft$powerEstimate
Alldegrees1<-intramodularConnectivity(ADJ1, moduleColors)

#rownames(Alldegrees1)<-colnames(datExpr)
head(Alldegrees1)
  chooseTopHubInEachModule(
  datExpr,
  moduleColors,
  omitColors = "grey",
  power = sft$powerEstimate,
  type = "signed")

vcn.GS.PAX7SOX10<-setNames(GS.PAX7SOX10, colnames(datExpr))

vc.restrictGenes=na.omit(revc.ensByMod[df$gene_name[(vtf.F | vtf.M1 | vtf.M2) & (vtf.PAR | vtf.PAIR2) & !duplicated(df$gene_name)]])
sort.datKME<-as.matrix(apply(datKME, 2, function(X){cbind(rownames(datKME |> arrange(desc(X))))}))
top15<-setNames(as.vector(sort.datKME[c(1:20),]),rep(colnames(sort.datKME[c(1:20),]),each = 20))

df.plot<-cbind(names(top15),
               vcn.GS.PAX7SOX10[top15],
               Alldegrees1[top15,],
               datKME[top15,]^sft$powerEstimate)
colnames(df.plot)[c(1,2)]<-c("module","PAX7SOX10")

df.plot<- cbind(module = vc.restrictGenes,
                                     PAX7SOX10 = vcn.GS.PAX7SOX10[names(vc.restrictGenes)],
               (Alldegrees1[which(colnames(datExpr) %in% names(vc.restrictGenes)),]),
               datKME[names(vc.restrictGenes),]^2)
s7a<-read_csv(file="s7a.csv")
s7b<-read_csv(file="s7b.csv")

df.plotG<-df.plot |> rownames_to_column("Gene") |>
  left_join(rbind(s7a,s7b[,colnames(s7a)]), by="Gene") |>
  column_to_rownames("Gene")

df.plotG$maxmodule<-colnames(df.plotG[,vc.labcol_ME])[rowMaxs(as.matrix(df.plotG[,vc.labcol_ME]))]
df.plotG[df.plotG$module == "grey","module"]<-df.plotG[df.plotG$module == "grey","maxmodule"]

df.plot2<-cbind(df.plotG$module, as.data.frame(t(rbind(apply(
  df.plotG, 1, function(X){as.numeric(c(X[c(2,
                                            grep("^k[TWOD]",colnames(df.plotG)),
                                            grep("rank$",colnames(df.plotG)))],X[X[1]]))})))))

colnames(df.plot2)<-c(colnames(df.plotG)[c(1,2)],
                      colnames(df.plotG)[grep("^k[TWOD]",colnames(df.plotG))],
                      colnames(df.plotG)[grep("rank$",colnames(df.plotG))],
                      "kME")

df.plot3<-df.plot2 %>% rownames_to_column(var = "Gene") %>%
  mutate(moduleN = vc.ME_labcol[module]) %>%
  mutate(moduleL = paste0(vc.ME_labcol[module],"-",module))

vc.restrictGenes=na.omit(revc.ensByMod[df$gene_name[(vtf.F | vtf.M1 | vtf.M2) & (vtf.PAR | vtf.PAIR2) & !duplicated(df$gene_name)]])
df.plot3[!(df.plot3$Gene %in% c(names(vc.restrictGenes))),"Gene"]<-""
vc.combined_label<-setNames(names(vc.ME_labcol), paste0(vc.ME_labcol,"-",names(vc.ME_labcol)))

df.plot3[is.na(df.plot3)]<-1
gg.kim.kme<-ggplot(df.plot3 |> dplyr::filter(Gene != "")
                   , aes(x = PAX7SOX10, # size = kTotal,
                         size = `Average gene constraint % rank`,
                         col = moduleL, y = kME,
                         label = Gene)) +
  geom_point(alpha = 0.8) +
  geom_text(hjust = 1, vjust = -1, size =2) +
  scale_color_manual(values= vc.combined_label, limits = force)+
  geom_vline(xintercept = 0.25, linetype="dotted",
             color = "black", linewidth=1) +
  geom_hline(yintercept = 0.5, linetype="dotted",
             color = "black", linewidth=1) +
  theme(legend.position = "bottom")#, legend.direction = "horizontal")
ggsave(filename = paste0(outdir2,"/gg.kim.kme_pHI.pdf"), plot = gg.kim.kme, width = 3.5, height = 4, units = "in")


require(ggpubr)
require(ggbeeswarm)
vc.top<-c("RPL10","RPL10A","RPL11","RPL12","RPL13","RPL13A","RPL14","RPL15","RPL17",
          "RPL18","RPL18A","RPL19","RPL21","RPL22","RPL23","RPL23A","RPL24","RPL26",
          "RPL27","RPL27A","RPL28","RPL29","RPL3","RPL30","RPL31","RPL32","RPL34",
          "RPL35","RPL35A","RPL36","RPL36A","RPL37","RPL37A","RPL38","RPL39","RPL3L",
          "RPL4","RPL41","RPL5","RPL6","RPL7","RPL7A","RPL8","RPL9","RPLP0","RPLP1",
          "RPLP2","RPSA","RPS10","RPS11","RPS12","RPS13","RPS14","RPS15","RPS15A",
          "RPS16","RPS17","RPS18","RPS19","RPS2","RPS20","RPS21","RPS23","RPS24",
          "RPS25","RPS26","RPS27","RPS27A","RPS28","RPS29","RPS3","RPS3A","RPS4X",
          "RPS4Y1","RPS4Y2","RPS5","RPS6","RPS7","RPS8","RPS9")
vc.top2<-c("FAU", "UBF",
           "EIF3A","EIF3E","EIF3F","EIF3H","EIF4B","PABPC1","EEF1A1","EEF1B2","EEF1D",
           "EEF1G","EEF2","TCTP","HNRNPA1","NAP1L1","VIM", "NPM1", "RACK1")

df.TOPs<-read.csv("./pnas.1912864117.sd05.csv", header = T)
vc.restrictGenes<-na.omit(revc.ensByMod[df.TOPs$gene])

df.plot4<-cbind(module = vc.restrictGenes,
        PAX7SOX10 = vcn.GS.PAX7SOX10[names(vc.restrictGenes)],
        (Alldegrees1[which(colnames(datExpr) %in% names(vc.restrictGenes)),]),
        datKME[names(vc.restrictGenes),]^2)#sft$powerEstimate)

df.plot4a<-df.plot4 |> rownames_to_column("gene") |> left_join(df.TOPs, by="gene") |>
  left_join(df.plot4 |> group_by(module)|> tally(name = "size"), by="module") |>
  left_join(vc.labcol_ME |> as.data.frame() |> rownames_to_column("mod_numb") |> rename(module = "vc.labcol_ME"), by="module")

df.plot4b<-df.plot4a |> left_join(df |> add_column(vtf.F, vtf.M1, vtf.M2) |>
                                   rename(gene = "gene_name"), by = "gene")

df.plot4c<- df.plot4a |> left_join(df |> rename(gene = "gene_name") |>
                                    dplyr::select("gene" | contains("XO")) |>
                                    pivot_longer(!c(gene), names_to = c("contrast", ".value"), names_sep="_")
                                  , by= "gene" ) |>
  mutate(deg = case_when(padj <= 0.1 & log2FoldChange < 0 ~ "down",
                         padj <= 0.1 & log2FoldChange > 0 ~ "up",
                         T ~ "ns")) |>
  mutate(RPLS = case_when(gene %in% c(vc.top) ~ "ribosomal",
                          #gene %in% c(vc.top2) ~ "non-RP translation",
                          T ~ "other")) |>
  group_by(contrast, gene) |> filter(n()==1) |> ungroup() #|>

ggplot(df.plot4c, aes(x = deg, y = topscore, fill = deg)) +
  #geom_jitter(alpha=0.2) +
  scale_fill_manual(values = c("royalblue","white","red")) + #geom_contour() +
  geom_boxplot(color = "black", varwidth = T, notch = T, outlier.alpha = 0) +
  facet_wrap(ncol = 1, vars(contrast), strip.position = "top") +
  scale_y_continuous(position = "left") +
  coord_cartesian(ylim = c(0,4)) +
  stat_compare_means(ref.group = "ns", hide.ns = T,
                     method = "wilcox",
                     label.y = 4, size = 2, label = "p.format") +
  theme(legend.position = "top", legend.direction = "horizontal",
        legend.key.height = unit(0.1, 'in'),
        legend.key.width = unit(0.2, 'in')) +
  theme(axis.text.x = element_blank(),
        axis.title.x = element_blank(),
    text = element_text(size=6))
ggsave("./test_4h.pdf",width = 2, height = 3)

tb.sigmod <- df.plot4c |> dplyr::select(gene, mod_numb, module, size, contrast, stat, PAX7SOX10, topscore) |>
 filter(size > 4) |>
  group_by(mod_numb, contrast) |> dplyr::summarize(topcorR = as.vector(rcorr(stat, topscore)$r),
                                        topcorP = as.vector(rcorr(stat, topscore)$P)) |>
  filter(!is.na(topcorP)) |> unique() |>
  filter(topcorP <= 0.01 & contrast %in% c("mXO1","mXO2") & mod_numb != 0) |>
  ungroup() |> arrange(contrast, topcorP)

tb.sigmod.PS <- df.plot4c |> dplyr::select(gene, mod_numb, module, size, contrast, stat, PAX7SOX10, topscore) |>
  filter(size > 4) |>
 group_by(mod_numb, contrast) |> dplyr::summarize(topcorR = as.vector(rcorr(PAX7SOX10, topscore)$r),
                                                 topcorP = as.vector(rcorr(PAX7SOX10, topscore)$P)) |>
  filter(!is.na(topcorP)) |> unique() |>
  filter(topcorP <= 0.05 & contrast %in% c("mXO1","mXO2") & mod_numb != 0) |>
  ungroup() |> arrange(contrast, topcorP)

ggscatter(df.plot4c |> filter(mod_numb %in% tb.sigmod.PS$mod_numb) |>
           mutate(mod_numb = fct_relevel(mod_numb, as.character(sort(as.numeric(unique(tb.sigmod.PS$mod_numb))))))
         , x = "PAX7SOX10", y = "topscore", color = rgb(0,0,0,1), shape="RPLS",
          point = T,
          add = "reg.line", conf.int = T, ggtheme = theme_minimal(), size = 0,
          add.params = list(linecolor = "black"),
          facet.by = "mod_numb") +
  geom_density2d(inherit.aes = T, aes(color = module), contour_var = "ndensity") +
  stat_cor(label.sep = " ") +
  scale_color_identity() +
  theme(legend.position = "none", text = element_text(size=6))
ggsave("./test_panel2.pdf",width = 4, height = 3)


dfp.ncc<-compare_means(data = df.plot4c |> filter(!(gene %in% vc.top)), formula = PAX7SOX10 ~ module, ref.group = "grey", hide.ns = T,
                            method = "wilcox", label.y = 0, label = "p.signif") |> mutate(module = group2)  |> filter(p.adj < 0.05) |>
  left_join(df.plot4c |> group_by(module) |> summarise(mean_top = mean(topscore), mean_ncc= mean(PAX7SOX10)), by = "module")

ggplot(df.plot4c
       , aes(x = PAX7SOX10, color = module)) +
  theme(text = element_text(size = 6)) +
  stat_ecdf(geom = "smooth", pad = F) +
  scale_colour_identity()
ggsave("./ncc_ecdf.pdf",width = 1.8, height = 1.8)

dfp.topscore<-compare_means(data = df.plot4c |> filter((gene %in% vc.top) | module == "grey")
                            , formula = topscore ~ module, ref.group = "grey", hide.ns = T,
              method = "wilcox", label.y = 0, label = "p.signif") |> mutate(module = group2)  |> filter(p.adj < 0.05) |>
  left_join(df.plot4c |> group_by(module) |> summarise(mean_top = median(topscore), mean_ncc= median(PAX7SOX10)), by = "module")

ggplot(df.plot4c |> mutate(module2 = case_when(gene %in% vc.top ~ NA, T ~ module)) |>
                  mutate(RPLS = case_when(gene %in% vc.top ~ "wo", T ~ "w"))
       , aes(x = topscore, colour = module)) +
  stat_ecdf(geom = "smooth", pad = F, show.legend = T, inherit.aes = TRUE, mapping = aes(linetype = RPLS)) +
  scale_colour_identity(aesthetics = "colour") + #guides(colour = guide_legend()) +
  geom_text(data = dfp.topscore |> mutate(module = group2, corr = sign(mean_ncc)), size = 2,
          aes(x = 3.5,  y = abs(mean_ncc),
              label = paste(" p ",signif(p.adj,2),
                            " T ",signif(mean_top,2),
                            " R ",signif(mean_ncc,2),sep =","))) +
theme(text = element_text(size = 6), legend.position = "none")

ggsave("./top_ecdf2.pdf",width = 1.8, height = 1.8)

top_wald<- ggscatter(data = df.plot4c |> filter(module != "grey") |>
                      group_by(module, mod_numb, contrast) |>
                      summarise(
                        genes = n(),
                        med_y = median(topscore),
                        sderr_y = sd(topscore)/sqrt(n()),
                        med_x= median(stat),
                        sderr_x = sd(stat)/sqrt(n()))
                    , color = "module", size = "genes",
                    label = "mod_numb", font.label = c(6, "bold"), repel = T,
                    add = "reg.line", add.params = list(color="grey"),
                    x="med_x", y="med_y", #cor.coef.size = 6, cor.coef = T,
                    conf.int = T, show.legend.text = T) +  facet_grid(rows=vars(contrast)) +
  scale_colour_identity() +
  geom_errorbar(aes(ymin = med_y-sderr_y, ymax = med_y+sderr_y)) +
  geom_errorbar(aes(xmin = med_x-sderr_x, xmax = med_x+sderr_x)) +
  stat_cor(label.y = 1.45, label.x = -0.5, mapping = aes(size=6)) +
  stat_regline_equation(label.y = 1.3, label.x = -0.5, mapping = aes(size=6)) +
  coord_cartesian(xlim=c(-5,3), ylim = c(0.5,1.5)) +
  theme(text = element_text(size = 6), legend.position = "none")
ggsave("./wald_top.pdf",width = 3, height = 2.7)

#PRESERVATION ANALYSIS
#vc.ENSG<-t(matrix(unlist(strsplit(rownames(df), "[.]")),nrow=2))[,1]

GSE77513<-read.table("./WGCNA_NCC_sets/GSE77513_Proj_03498_D_htseq_all_samples.txt2", header=T, row.names = 1)
dat.GSE77513 <- GSE77513 %>% mutate(ENSEMBL = str_extract(rownames(GSE77513), "ENSG[0-9]*")) %>%
  left_join(All.df,by="ENSEMBL") %>% filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) %>%
  select(-starts_with("Gene"),-starts_with("EN")) %>% column_to_rownames("SYMBOL")
dat.Zeltner<- t(varianceStabilizingTransformation(as.matrix(dat.GSE77513),blind=T))

GSE120200<-read.table("./WGCNA_NCC_sets/GSE120200_ExpressionMatrix.txt", header=T)
GSE127268<-read.table("./WGCNA_NCC_sets/GSE127268_RawCountMatrix.txt", header=T) |>
  rename(Geneid = "FEATURE_ID") |> select(-starts_with("GENE_SYMBOL"))
dat.GSE120200<-GSE120200 |> left_join(GSE127268, by="Geneid") |>
  rename(ENSEMBL = "Geneid") |>
  left_join(All.df,by="ENSEMBL") %>% filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) %>%
  select(-starts_with("EN")) %>% column_to_rownames("SYMBOL") %>%
  as.data.frame()
dat.Matheus<-t(varianceStabilizingTransformation(as.matrix(dat.GSE120200),blind=T))


GSE108521<-read.csv("./WGCNA_NCC_sets/GSE108521.csv",header = T)
GSE121428<-read.csv("./WGCNA_NCC_sets/GSE121428.csv",header = T)
dat.Laugsch<- GSE108521 |> left_join(GSE121428, by="EnsemblID") |>
  rename(ENSEMBL = "EnsemblID") |>
  left_join(All.df,by="ENSEMBL") %>% filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) %>%
  column_to_rownames("SYMBOL") %>% select(contains("FPKM")) %>%
  as.data.frame() |> t()

GSE134532<-read.table("./WGCNA_NCC_sets/GSE134532.counts.txt", header = T, row.names = 1)
dat.GSE134532<-GSE134532 |> rownames_to_column("SYMBOL") |>
  left_join(All.df,by="SYMBOL") |> filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) |>
  column_to_rownames("SYMBOL") |> select(contains("CNCCdiff")) %>%
  as.data.frame()
dat.Greenberg<- t(varianceStabilizingTransformation(as.matrix(GSE134532),blind = T))

GSE138799<-read.table("./WGCNA_NCC_sets/GSE138799_embryonic_heart_counts.txt", header = T)
dat.GSE138799<-GSE138799 |> left_join(All.df,by="ENSEMBL") |> filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) |>
  column_to_rownames("SYMBOL") %>% select(starts_with("CS")) %>%
  as.data.frame()
dat.VanOudenhove<- t(varianceStabilizingTransformation(as.matrix(dat.GSE138799),blind = T))

GSE197513<-read.csv("./WGCNA_NCC_sets/GSE197513_NCC_CF_FB_scaled_counts.csv", header = T)
dat.GSE197513<-GSE197513 |> left_join(All.df,by="ENSEMBL") |> filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) |>
  column_to_rownames("SYMBOL") %>% select(starts_with("CS")) %>%
  as.data.frame()
dat.Yankee<- t(varianceStabilizingTransformation(as.matrix(dat.GSE197513),blind = T))

GSE145327<-read.table("./WGCNA_NCC_sets/GSE145327_H9ESC_CNCC_chond_RNAseq_counts.txt",header=T)
dat.GSE145327<- GSE145327 |>
  rename(SYMBOL = "Geneid") |>
  left_join(All.df,by="SYMBOL") %>% filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) %>%
  column_to_rownames("SYMBOL") %>% select(contains("NCC"))
dat.Long<-t(varianceStabilizingTransformation(as.matrix(dat.GSE145327),blind = T))

GSE176101<-read.table("./WGCNA_NCC_sets/GSE176101_Raw_gene_counts_matrix.txt", header=T)
dat.GSE176101<-GSE176101 %>% rename(ENSEMBL = "gene_id") %>% left_join(All.df,by="ENSEMBL") %>% filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) %>%
  column_to_rownames("SYMBOL") |> select(-starts_with("EN"),-starts_with("ips")) |> as.data.frame()
dat.Wen<-t(varianceStabilizingTransformation(as.matrix(dat.GSE176101),blind = T))

GSE207112<-read.table("./WGCNA_NCC_sets/GSE207112_counts_TBLs.txt", header = T)
dat.GSE207112<- GSE207112 |> mutate(ENSEMBL = str_extract(Geneid, "ENSG[0-9]*")) |>
  left_join(All.df,by="ENSEMBL")|> filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) |>
  column_to_rownames("SYMBOL") |> select(starts_with("X")) |> as.data.frame()
dat.Ahern<-t(varianceStabilizingTransformation(as.matrix(dat.GSE207112),blind = T))

GSE164665<-read.table("./WGCNA_NCC_sets/GSE164665_Stroma.counts.txt", header = T)
dat.GSE164665<-GSE164665 %>% left_join(All.df,by="SYMBOL") %>% filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) %>%
  column_to_rownames("SYMBOL") |> select(-starts_with("EN")) |> as.data.frame()
dat.Birnbaum<- t(varianceStabilizingTransformation(as.matrix(dat.GSE164665),blind = T))

ls.testnets<- list(
 fem = datExpr[colData(vsd3) |> as_tibble() |> group_by(condition) |>
                    filter(Sex == "Male") |> ungroup() |> pull(RegistryID),],
  male = datExpr[colData(vsd3) |> as_tibble() |> group_by(condition) |>
                    filter(Sex == "Female") |> ungroup() |> pull(RegistryID),]
)

ls.testnetsA<- list(
                    Mix15 = datExpr[colData(vsd3) |> as_tibble() |> group_by(condition) |>
                                      filter(Sex == "Male" & row_number() %% 3 == 0) |> ungroup() |> pull(RegistryID),],
                    MixF20 = datExpr[colData(vsd3) |> as_tibble() |> group_by(condition) |>
                                       filter(Sex == "Female") |> ungroup() |> pull(RegistryID),],
                    OnlyXOs  = datExpr[colData(vsd3) |> as_tibble() |> filter(simple == "XO" & Sex == "Male") |> pull(RegistryID),],
                    MonosomyX_TBLs = dat.Ahern
                    )
ls.testnetsB<-list(MixM15 = datExpr[colData(vsd3) |> as_tibble() |> group_by(condition) |>
                                      filter(Sex == "Male" & row_number() %% 2 == 0) |> ungroup() |> pull(RegistryID),],
                   MixF20 = datExpr[colData(vsd3) |> as_tibble() |> group_by(condition) |>
                                      filter(Sex == "Female") |> ungroup() |> pull(RegistryID),],
                   OnlyXOs  = datExpr[colData(vsd3) |> as_tibble() |> filter(simple == "XO" & Sex == "Male") |> pull(RegistryID),],
                    CS_Face = dat.Yankee,
                    CS_Heart = dat.VanOudenhove,
                    BranchioOculofacial_TFAP2A = dat.Laugsch,
                    BohringOpitz_ASXL1 = dat.Matheus,
                    FamDysAutonomia_IKBKAP = dat.Zeltner,
                    FloatingHarbor_SRCAP = dat.Greenberg,
                    PierreRobin_SOX9 = dat.Long,
                    Waardenburg_SOX10 = dat.Wen,
                    MonosomyX_TBLs = dat.Ahern,
                    Control_Stroma = dat.Birnbaum
)

ls.testnetsC<-list(
  CS_Face = dat.Yankee,
  TFAP2A_WT = t(as.matrix(t(dat.Laugsch) |> as.data.frame() |> select(starts_with("WT")))),
  ASXL1_HOM = t(as.matrix(t(dat.Matheus) |> as.data.frame() |> select(starts_with("HOM")))),
  FHS_HET = t(as.matrix(t(dat.Greenberg) |> as.data.frame() |> select(contains("_FHS")))),
  LONG_WT = t(as.matrix(t(dat.Long) |> as.data.frame() |> select(contains("_WT_"))))
)

fx.clean_nets<-function(ls.testnets)
{
for (i in c(1:length(ls.testnets)))
{
  datExpr0 <- ls.testnets[[i]]
  gsg <- goodSamplesGenes(datExpr0, verbose = 3)
  gsg$allOK
  if (!gsg$allOK){
    # Optionally, print the gene and sample names that were removed:
    if (sum(!gsg$goodGenes)>0) {
      printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")))
    }
    if (sum(!gsg$goodSamples)>0) {
      printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")))
    }

    # Remove the offending genes and samples from the data:
    datExpr0 <- datExpr0[gsg$goodSamples, gsg$goodGenes]
  }
  ls.testnets[[i]]<-datExpr0
}
  return(ls.testnets)
}

ls.testnets<-fx.clean_nets(ls.testnets)
ls.testnetsB<-fx.clean_nets(ls.testnetsB)
ls.testnetsC<-fx.clean_nets(ls.testnetsC)

colorsFull = moduleColors
multiColor = list(Full_set = colorsFull);


multiExpr = list(
                  FEM = list(data = ls.testnets[[1]]),
                  MALE = list(data = ls.testnets[[2]])
)


multiExprA = list(Full_set = list(data = datExpr),
                 Mix27 = list(data = ls.testnetsA[[1]]),
                 OnlyXOs = list(data = ls.testnetsA[[2]]),
                 XO_TBLs = list(data = ls.testnetsA[[3]])
)

multiExprB = list(Full_set = list(data = datExpr),
                 MixF20 = list(data = ls.testnetsB$MixF20),
                 MixM15 = list(data = ls.testnetsB$MixM15),
                 Only_XO = list(data = ls.testnetsB$OnlyXOs),
                 CS_Face = list(data = ls.testnetsB$CS_Face),
                 CS_Heart = list(data = ls.testnetsB$CS_Heart),
                 BranchioOculofacial = list(data = ls.testnetsB$BranchioOculofacial_TFAP2A),
                 BohringOpitz = list(data = ls.testnetsB$BohringOpitz_ASXL1),
                 FamDysAutonomia = list(data = ls.testnetsB$FamDysAutonomia_IKBKAP),
                 FloatingHarbor = list(data = ls.testnetsB$FloatingHarbor_SRCAP),
                 PierreRobin = list(data = ls.testnetsB$PierreRobin_SOX9),
                 Waardenburg = list(data = ls.testnetsB$Waardenburg_SOX10),
                 TBL = list(data = ls.testnetsB$MonosomyX_TBLs),
                 Control_Stroma = list(data = ls.testnetsB$Control_Stroma)
)

multiExprC = list(Full_set = list(data = datExpr),
                  CS_Face = list(data = ls.testnetsC[[1]]),
                  BOFS_WT = list(data = ls.testnetsC[[2]]),
                  BohringOpitz_MUT = list(data = ls.testnetsC[[3]]),
                  FHS_MUT = list(data = ls.testnetsC[[4]]),
                  PRS_WT = list(data = ls.testnetsC[[5]])
)

system.time( {
  mp2 = modulePreservation(multiExprA,
                           multiColor,
                           dataIsExpr = T,
                           networkType = "signed",
                           corFnc = "bicor",
                           corOptions = "use = 'p', maxPOutliers = 0.1",
                           referenceNetworks = 1,
                           maxModuleSize = max(table(colorsFull)),
                           nPermutations = 50,
                           randomSeed = 1,
                           quickCor = 0,
                           verbose = 3)
} )

saveRDS(mp2, file= paste0(outdir2,"/mp2a.rds"))

system.time( {
  mp3 = modulePreservation(multiExprB,
                           multiColor,
                           dataIsExpr = T,
                           networkType = "signed",
                           corOptions = "use = 'p', maxPOutliers = 0.1",
                           corFnc = "bicor",
                           referenceNetworks = 1,
                           maxModuleSize = max(table(colorsFull)),
                           nPermutations = 50,
                           randomSeed = 1,
                           quickCor = 0,
                           verbose = 3)
} )

saveRDS(mp3, file= paste0(outdir2,"/",tf.deepsplit,vn.mergeCut,"_mp3.rds"))

system.time( {
  mp4 = modulePreservation(multiExprC,
                           multiColor,
                           dataIsExpr = T,
                           networkType = "signed",
                           corFnc = "bicor",
                           referenceNetworks = 1,
                           maxModuleSize = max(table(colorsFull)),
                           nPermutations = 100,
                           randomSeed = 1,
                           quickCor = 0,
                           verbose = 3)
} )


mp3<-readRDS(file = paste0(outdir2,"/mp3_25.rds"))


#mp<-mp1
#mp<-mp2
#mp<-mp3
#mp<-mp4

pdf(file="Zsummary-medianRank.pdf", wi=10, h=5)
par(mfrow = c(1,2))
par(mar = c(4.5,4.5,2.5,1))


df.Zsummary3<-as.data.frame(matrix(nrow=1+length(table(colorsFull))))
for (i in c(1:(length(multiExprB)-1)))
{
  ref = 1
  test = i+1
  statsObs = cbind(mp$quality$observed[[ref]][[test]][, -1], mp$preservation$observed[[ref]][[test]][, -1])
  statsZ = cbind(mp$quality$Z[[ref]][[test]][, -1], mp$preservation$Z[[ref]][[test]][, -1]);
  print( cbind(statsObs[, c("medianRank.pres", "medianRank.qual")],
               signif(statsZ[, c("Zsummary.pres", "Zsummary.qual")], 2)))
  df.Zsummary3<-as.data.frame(cbind(df.Zsummary3, statsObs[, c("medianRank.pres", "medianRank.qual")],
                                   signif(statsZ[, c("Zsummary.pres", "Zsummary.qual")], 2)))

  # Module labels and module sizes are also contained in the results
  modColors = rownames(mp$preservation$observed[[ref]][[test]])
  moduleSizes = mp$preservation$Z[[ref]][[test]][, 1];
  # leave grey and gold modules out
  plotMods = !(modColors %in% c("grey", "gold"));
  # Text labels for points
  text = modColors[plotMods];
  # Auxiliary convenience variable
  plotData = cbind(mp$preservation$observed[[ref]][[test]][, 2], mp$preservation$Z[[ref]][[test]][, 2])
  mains = c("Preservation Median rank", "Preservation Zsummary");
  for (p in 1:2)
  {
    min = min(plotData[, p], na.rm = TRUE);
    max = max(plotData[, p], na.rm = TRUE);
    # Adjust ploting ranges appropriately
    if (p==2)
    {
      if (min > -max/10) min = -max/10
      ylim = c(min - 0.1 * (max-min), max + 0.1 * (max-min))
    } else
      ylim = c(max + 0.1 * (max-min), min - 0.1 * (max-min))
    plot(moduleSizes[plotMods], plotData[plotMods, p], col = 1, bg = modColors[plotMods], pch = 21,
         main = mains[p],
         cex = 2.4,
         ylab = mains[p], xlab = "Module size", log = "x",
         ylim = ylim,
         xlim = c(10, 5000), cex.lab = 1.2, cex.axis = 1.2, cex.main =1.4)
    labelPoints(moduleSizes[plotMods], plotData[plotMods, p], text, cex = 1, offs = 0.08);
    # For Zsummary, add threshold lines
    if (p==2)
    {
      abline(h=0)
      abline(h=2, col = "blue", lty = 2)
      abline(h=10, col = "darkgreen", lty = 2)
    }}

}
dev.off()

df.Zsummary3<-df.Zsummary3[,!(colnames(df.Zsummary3) %in% "V1")]
colnames(df.Zsummary3)<-c(
  paste(rep(names(multiExprB)[-1], each = 4), colnames(df.Zsummary3), sep="_"))


mn.Zs<-as.matrix(cbind(df.Zsummary3[!rownames(df.Zsummary3) %in%
                                      c("gold","grey"),grep("Zsummary.pres", colnames(df.Zsummary3))]))

colnames(mn.Zs)<-gsub("_Zsummary.pres.*", "", colnames(mn.Zs))
mn.Zs<-mn.Zs[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1),]

df.Zs<-mn.Zs |> as.data.frame() |> select(!starts_with("Mix27"))

#mn.Zs2<-mn.Zs[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1),]
sizeGrWindow(10,10)
par(mar = c(0, 0, 0, 0))
pdf(paste0(outdir2,"/modpres.bar3c.pdf"),width = 3, height = 9)
barplot(rev(mn.Zs[,grep("Only_XO", colnames(mn.Zs))]-mn.Zs[,grep("MixM15",colnames(mn.Zs))])#/rev(mn.Zs2[,grep("MixM15", colnames(mn.Zs2))])
        ,col=rev(rownames(mn.Zs)), #xlim=c(-100,50),
        xlab = "fraction deltaZ module preservation", xpd =F,
        horiz=T, names.arg=rev(vc.ME_labcol[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)]),
        cex.names=0.2, width=rev(table(colorsFull)[head(gsub(x=colnames(MEs),pattern = "^ME",""), -1)])
)
abline(v = c(-5,5))
dev.off()


mn.Zs2<-as.matrix(df.Zs |> select(order(colnames(df.Zs))) |> select(-Only_XO)) #[,c(1:length(colnames(mn.Zs)))]
mn.Zs2[mn.Zs2<0]<-0

Breaks = c(2 ,seq(from = 3, to = 9, by = 1),
           seq(from = 10, to = max(mn.Zs2), by = 5))

require(gplots)
require(pheatmap)
vc.colour<-as.vector(names(table(colorsFull)))
names(vc.colour)<-names(table(colorsFull))
ls.modpres_color<-list(colour = vc.colour)
df.modpres_row<-as.data.frame(cbind(
 vc.colour))
colnames(df.modpres_row)<-c("colour") #"number")

#df.modpres_row<-cbind(vc.labcol_ME, vc.ME_labcol)
#colnames(df.modpres_row)<-c("colour", "number")

pheatmap(mn.Zs2[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1),], cellnote = round(mn.Zs2,digits = 1), display_numbers = T, number_format = "%.0f",
         cluster_rows = F, cluster_cols = T, border_color = NA, cellwidth = 12, cellheight = 12,
         color =colorpanel(length(Breaks)-1, "white", "orange", "red"), breaks=Breaks, angle_col = 90,
         annotation_row = df.modpres_row,  annotation_colors = ls.modpres_color, annotation_legend = F, annotation_names_row = T,
         labels_row = paste0(vc.ME_labcol[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)],"-",
                            rownames(df.Zsummary3[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1),]), "  (",
                            table(multiColor)[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)], ")" ),
         filename=paste0(outdir2,"/MP_NCCs_newA2.pdf")
)

dev.off()

require(circlize)
df.Zs2<-df.Zs[,-grep("Mix|Onl", colnames(df.Zs))]
df.summary<-as.data.frame(cbind(df.Zs2,
                                #"maxpres" = apply(df.Zs2, 1, max),
                                "maxset" = colnames(df.Zs2)[apply(df.Zs2, 1, which.max)],
                                "moderate" = apply(df.Zs2, 1, function(X){max(X) > 5}),
                                "strongly" = apply(df.Zs2, 1, function(X){max(X) >= 10})
                                ))
rownames(df.summary)<-vc.ME_labcol[rownames(df.summary)]


df.corr<-as.data.frame(moduleTraitCor[head(names(MEs),-1),])
df.corr[moduleTraitPvalue[head(names(MEs),-1),]>0.05]<-0
rownames(df.corr)<-gsub("^ME","", rownames(df.corr))
rownames(df.corr)<-vc.ME_labcol[rownames(df.corr)]


df.corrP<-signif(log(as.data.frame(moduleTraitPvalue[head(names(MEs),-1),]),10)*(-1),2)
df.corrP[moduleTraitPvalue[head(names(MEs),-1),]>0.05]<-0
rownames(df.corrP)<-gsub("^ME","", rownames(df.corrP))
rownames(df.corrP)<-vc.ME_labcol[rownames(df.corrP)]

df.msig<-as.data.frame(cbind(
  "TS" = msig_wgcna@compareClusterResult |> filter(Description %in% toupper(vc.ORPHA881)) |> dplyr::select(Cluster) |> table() |>
    setNames(vc.labcol_ME[levels(msig_wgcna@compareClusterResult$Cluster)]),
  "development" = msig_wgcna@compareClusterResult |> filter(str_detect(Description, "HEART|CREST|CARDIO")) |> dplyr::select(Cluster) |> table() |>
   setNames(vc.labcol_ME[levels(msig_wgcna@compareClusterResult$Cluster)]),
  "cholesterol" = msig_wgcna@compareClusterResult |> filter(str_detect(Description, "CHOLESTEROL|LIPID")) |> dplyr::select(Cluster) |> table() |>
    setNames(vc.labcol_ME[levels(msig_wgcna@compareClusterResult$Cluster)]),
  "mitochondrial" = msig_wgcna@compareClusterResult |> filter(str_detect(Description, "MITOCHON|OXIDATIVE|ELECTRONTRANSPORT")) |> dplyr::select(Cluster) |> table() |>
    setNames(vc.labcol_ME[levels(msig_wgcna@compareClusterResult$Cluster)]),
  "signalling" = msig_wgcna@compareClusterResult |> filter(str_detect(Description, "SIGNALLING")) |> dplyr::select(Cluster) |> table() |>
    setNames(vc.labcol_ME[levels(msig_wgcna@compareClusterResult$Cluster)]),
  "ribosomal" = msig_wgcna@compareClusterResult |> filter(str_detect(Description, "RIBOSOM|TRANSLAT")) |> dplyr::select(Cluster) |> table() |>
    setNames(vc.labcol_ME[levels(msig_wgcna@compareClusterResult$Cluster)])
  ))
rownames(df.msig)<-vc.ME_labcol[rownames(df.msig)]

df.go<-as.data.frame(cbind(
  "development" = go_wgcna@compareClusterResult |> filter(str_detect(Description, "development")) |> dplyr::select(Cluster) |> table() |>
    setNames(vc.labcol_ME[levels(go_wgcna@compareClusterResult$Cluster)]),
  "cholesterol" = go_wgcna@compareClusterResult |> filter(str_detect(Description, "cholesterol|lipid")) |> dplyr::select(Cluster) |> table() |>
    setNames(vc.labcol_ME[levels(go_wgcna@compareClusterResult$Cluster)]),
  "mitochondrial" = go_wgcna@compareClusterResult |> filter(str_detect(Description, "mitochon|oxidative|electrontransport")) |> dplyr::select(Cluster) |> table() |>
    setNames(vc.labcol_ME[levels(go_wgcna@compareClusterResult$Cluster)]),
  "signalling" = go_wgcna@compareClusterResult |> filter(str_detect(Description, "signalling")) |> dplyr::select(Cluster) |> table() |>
    setNames(vc.labcol_ME[levels(go_wgcna@compareClusterResult$Cluster)]),
  "ribosomal" = go_wgcna@compareClusterResult |> filter(str_detect(Description, "ribosom|translat")) |> dplyr::select(Cluster) |> table() |>
    setNames(vc.labcol_ME[levels(go_wgcna@compareClusterResult$Cluster)])
))
rownames(df.go)<-vc.ME_labcol[rownames(df.go)]

require(circlize)
require(gplots)
df.mod_wald<-t(lapply(ls.ensByMod, function(X)
  colMeans(df.gsea2[df.gsea2$SYMBOL %in% X, grep("XO",colnames(df.gsea2))])) |> as.data.frame())
rownames(df.mod_wald)<-gsub("X","",rownames(df.mod_wald))

mn.wald<-df.mod_wald[vc.ME_labcol[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)],]
mn.pres<-df.summary[vc.ME_labcol[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)],] |> dplyr::select_if(is.numeric) |> as.matrix()
mn.msig<-df.msig[vc.ME_labcol[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)],] |> dplyr::select_if(is.numeric)  |> as.matrix()
mn.corr<-df.corr[vc.ME_labcol[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)],] |> dplyr::select_if(is.numeric) |> as.matrix()
mn.corrP<-df.corrP[vc.ME_labcol[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)],] |> dplyr::select_if(is.numeric) |> as.matrix()

require(ComplexHeatmap)
fs = 6
chm<-Heatmap(vc.ME_labcol[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)],
        col = vc.labcol_ME[rownames(mn.pres)], show_heatmap_legend = F,
        column_names_gp = grid::gpar(fontsize = fs)) +
  Heatmap(mn.corr, col = colorRamp2(c(-1, 0, 1), c("blue", "white","red")),
          cluster_columns = T,
          heatmap_legend_param = list(title = "Pearson R"),#, direction = "horizontal", position = "topcenter"),
          column_names_gp = grid::gpar(fontsize = fs),
          left_annotation = rowAnnotation(genes = anno_barplot(
            setNames(as.vector(table(moduleColors)),names(table(moduleColors)))[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)])),
          cell_fun = function(j, i, x, y, width, height, fill)
          {
            if(mn.corrP[i, j] > 0)
              grid.text(sprintf("%.0f", mn.corrP[i, j]), x, y, gp = gpar(fontsize = fs))
          })  +
Heatmap(mn.msig,
        cluster_rows = F, cluster_columns = F,
        col = colorRamp2(c(0, 1, 10), c("white","honeydew2","honeydew4")),
        column_names_gp = grid::gpar(fontsize = fs),
        heatmap_legend_param = list(title = "mSigDB hits"),# direction = "horizontal", position = "topcenter"),
        cell_fun = function(j, i, x, y, width, height, fill)
        {
          if(mn.msig[i, j] > 0)
            grid.text(sprintf("%.0f", mn.msig[i, j]), x, y, gp = gpar(fontsize = fs))
        }) +
  Heatmap(mn.pres,
          col = colorRamp2(Breaks, colorpanel(length(Breaks), "white", "orange", "red")),
          cluster_rows = F, cluster_columns = T,
          heatmap_legend_param = list(title = "Preservation"),# direction = "horizontal", position = "topcenter"),
          column_names_gp = grid::gpar(fontsize = fs),
          cell_fun = function(j, i, x, y, width, height, fill)
          {
            if(mn.pres[i, j] >= 5)
              grid.text(sprintf("%.0f", mn.pres[i, j]), x, y, gp = gpar(fontsize = fs))
          }) +
  Heatmap(mn.wald, col = colorRamp2(c(-2, 0, 2), c("cyan", "white","magenta")),
          cluster_columns = T, cluster_rows = F,
          heatmap_legend_param = list(title = "avg. Wald"),# direction = "horizontal", position = "topcenter"),
          column_names_gp = grid::gpar(fontsize = fs),
          row_names_gp = grid::gpar(fontsize = fs))

pdf(file=paste0(outdir2,"/summary.pdf"), pointsize = 6, width = 6.5, height = 4.5)
draw(chm)
dev.off()

go_wgcna |>
  as.data.frame() |>
  mutate(
    object_file = "go_wgcna",
    ID = sprintf("GO%s_%s", ONTOLOGY, ID)
  ) |>
  dplyr::select(-ONTOLOGY) |>
  rbind(
    setReadable(msig_wgcna, OrgDb = org.Hs.eg.db, keyType = "ENTREZID") |>
      as.data.frame() |>
      mutate(object_file = "msig_all")
  ) |>
  dplyr::select(-object_file) |>
  rename(Module = "Cluster") |>
  arrange(qvalue) |>
  write_csv(paste0(outdir,"/Table_S3.csv"))