Skip to content
Permalink
479373775c
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
886 lines (722 sloc) 38.2 KB
#memory.limit(size=157370)
library(WGCNA)
library(tidyverse)
library(DESeq2)
tf.gseaset<-F
catvars<-c("Xa","donor","simple")
outdir2<-paste0(outdir,"/",tf.gseaset,"gseaSet")
dir.create(outdir2, showWarnings = FALSE)
#Perform WGCNA on either all ENSEMBL genes in df (default), only or gene_name mapping to non-duplicate ENTREZIDs (tf.gseaset = T)
if (all(df[,c(grep(pattern = "^d", colnames(df)))] == assay(vsd3)))
{
if(tf.gseaset){
ensembl.counts<-df[,c(grep(pattern = "^d", colnames(df)))]
ENSG.counts<-ensembl.counts
rownames(ENSG.counts)<-df$ENSEMBL
datExpr0<-t(ENSG.counts[df.gsea2$ENSEMBL,])
} else{datExpr0 <- as.data.frame(t(assay(vsd3)))}
gsg <- goodSamplesGenes(datExpr0, verbose = 3)
gsg$allOK
if (!gsg$allOK){
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0) {
printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")))
}
if (sum(!gsg$goodSamples)>0) {
printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")))
}
# Remove the offending genes and samples from the data:
datExpr0 <- datExpr0[gsg$goodSamples, gsg$goodGenes]
}
datExpr<-datExpr0
if(tf.gseaset){colnames(datExpr)<-df.gsea2$SYMBOL
} else{colnames(datExpr)<-df$gene_name}
}
sampleTree <- hclust(dist(datExpr0), method = "average")
sizeGrWindow(12,9)
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,cex.axis = 1.5, cex.main = 2)
# no need to remove samples
datTraits <- as.data.frame(colData(vsd3)[,catvars])
datTraits2<-binarizeCategoricalColumns(datTraits, includeLevelVsAll = T, nameForAll = "", includePairwise = F, dropUninformative = T, dropFirstLevelVsAll = F)
rownames(datTraits2)<-rownames(datTraits)
df.elisa<-read.csv("ELISA_hCG_PIGF_sp.csv",header = T,row.names = "RegistryID")
df.elisa<-df.elisa[rownames(datTraits2),]
if(all(rownames(df.elisa) == rownames(datTraits2))) {datTraits3<-cbind(datTraits2, df.elisa) %>% dplyr::select(-Cell_Line)}
# Re-cluster samples
sampleTree2 <- hclust(dist(datExpr), method = "average")
traitColors = numbers2colors(datTraits3, signed = F)
# Plot the sample dendrogram and the colors underneath.
pdf(paste0(outdir2,"/Dendro_mod.pdf"))
plotDendroAndColors(sampleTree2, traitColors,
groupLabels = names(datTraits3),
main = "Sample dendrogram and trait heatmap")
dev.off()
save(datExpr, datTraits3, file = "tbl_wgcna-dataInput.RData")
options(stringsAsFactors = FALSE)
collectGarbage()
#enableWGCNAThreads()
# Load the data saved in the first part
#lnames <- load(file = "tbl_wgcna-dataInput.RData")
#The variable lnames contains the names of loaded variables.
#lnames
# Choose a set of soft-thresholding powers
powers = c(seq(from = 1, to=30, by=1))
# Call the network topology analysis function
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5, networkType = "signed hybrid", corFnc = bicor, dataIsExpr = T)
# Plot the results:
require(ggplot2)
ggplot(sft$fitIndices, aes(x=Power, y=SFT.R.sq, label=Power)) + geom_text(color="red") + geom_hline(yintercept=.85, color="red") + labs(title="Scale independence")
ggplot(sft$fitIndices, aes(x=Power, y=`mean.k.`, label=Power)) + geom_text(color="red") + labs(title="Mean connectivity")
net <- blockwiseModules(datExpr, power = sft$powerEstimate,
networkType = "signed hybrid", corType = "bicor",
TOMType = "signed", minModuleSize = 30,
maxBlockSize=30000,
reassignThreshold = 0,
mergeCutHeight = 0.25,
numericLabels = TRUE, pamRespectsDendro = FALSE,
saveTOMs = TRUE,
saveTOMFileBase = "TOMTOM",
verbose = 3)
# open a graphics window
sizeGrWindow(12, 9)
# Convert labels to colors for plotting
mergedColors = labels2colors(net$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs
geneTree = net$dendrograms[[1]]
save(MEs, moduleLabels, moduleColors, geneTree,
file = "TBL-networkConstruction-auto.RData")
grey <- df[which(moduleColors=="grey"),]
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
# Recalculate MEs with color labels
MEs0 = moduleEigengenes(datExpr, moduleColors)$eigengenes
#MEs = orderMEs(MEs0)
MEs = orderMEs(MEs0, order = c(1:length(MEs0)))
moduleTraitCor = cor(MEs, datTraits3, use = "pairwise.complete.obs", method = "spearman")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
pdf(paste0(outdir2,"/ModuleTrait.pdf"))
#sizeGrWindow(10,6)
# Will display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) = dim(moduleTraitCor)
textMatrix[moduleTraitPvalue>0.05]<-""
#par(mar = c(6, 35, 3, 3))
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datTraits3),
yLabels = names(MEs),
yColorLabels = T,
colors = blueWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = F,
cex.text = 0.6,
zlim = c(-1,1),
cex.lab.x = 0.6,
main = paste("Module-trait relationships"))
dev.off()
# A data frame with module eigengenes can be obtained as follows
MEsAutomatic=net$MEs
moduleLabelsAutomatic=net$colors;
moduleColorsAutomatic = labels2colors(moduleLabelsAutomatic)
vc.PIGFn = as.data.frame(datTraits3$PIGF_per_ug_RNA)
vc.hCGn = as.data.frame(datTraits3$hCG_per_ug_RNA)
GS.PIGFn=as.numeric(cor(datExpr,vc.PIGFn,use="pairwise.complete.obs"))
GS.hCGn=as.numeric(cor(datExpr,vc.hCGn,use="pairwise.complete.obs"))
#df2<-df[order(match(df$gene_name, rownames(t(datExpr)))),]
df.GS.Markers<-as.data.frame(apply(df.Markers.med, 2, function(X) {as.numeric(cor(datExpr,X,use="pairwise.complete.obs"))}))
mn.GS.Markers<-numbers2colors(df.GS.Markers, signed=T, lim=c(-1,1))
colnames(mn.GS.Markers)<-colnames(df.GS.Markers)
#vtf.ESC2 is identical to vtf.ESC, except that it includes all PAR and PAIR genes, irrespective of whether they escape/are expressed
vtf.ESC2<-(vtf.oESC | vtf.PAR | vtf.PAIR)
mn.mednorm<-assay(vsd3)-rowMedians(assay(vsd3))
rownames(mn.mednorm)<-df[rownames(mn.mednorm),"gene_name"]
df.escape<-as.data.frame(cbind(colMeans(mn.mednorm[df$gene_name[vtf.ESC2],]),
colMeans(mn.mednorm[df$gene_name[vtf.PAR],]),
colMeans(mn.mednorm[df$gene_name[vtf.oESC],]),
colMeans(mn.mednorm[df$gene_name[vtf.PAIR],])))
colnames(df.escape)<-c("All", "PAR","X_escapee", "X_pair")
df.GS.escape<-as.data.frame(apply(df.escape, 2, function(X) {as.numeric(cor(datExpr,X,use="pairwise.complete.obs"))}))
mn.GS.escape<-numbers2colors(df.GS.escape, signed=T)
colnames(mn.GS.escape)<-colnames(df.escape)
datTraits4<-cbind(datTraits3[,c(grep("simple",colnames(datTraits3)),grep("RNA", colnames(datTraits3)))],
df.escape,
df.Markers.med[,-c(grep(".[K/L]$", colnames(df.Markers.med)),grep("early", colnames(df.Markers.med)))])
colnames(datTraits4)[c(grep("RNA", colnames(datTraits4)))]<-c("nPlGF","nhCG")
require(Hmisc)
require(corrplot)
#####
pdf(paste0(outdir2,"/Corrplot.pdf"))
cor_5<- rcorr(as.matrix(datTraits4[,-c(grep("simple",colnames(datTraits4)))]),type = "spearman")
M <- cor_5$r
p_mat <- cor_5$P
p_mat[is.na(p_mat)] <- 0
corrplot(M, type = "upper", order = "original", method="square",
p.mat = p_mat, sig.level = 0.05, insig = "blank",
col=blueWhiteRed(100), tl.col="black", tl.srt = 60, na.label = "NA")
dev.off()
df.wald<-df.gsea2
if(!tf.gseaset) {
stopifnot(all(df$gene_name == colnames(datExpr)))
require(preprocessCore)
vc.Wald<-c("fXO_stat", "mXO_stat", "FM_stat", "fXO2_stat", "fmO_stat")
mn.stat_qn2<-normalize.quantiles(matrix(unlist(df[,vc.Wald]), ncol = length(vc.Wald)))
colnames(mn.stat_qn2)<-vc.Wald
df.wald<-as.data.frame(cbind(mn.stat_qn2,
"aveiXO_stat" = rowMeans(mn.stat_qn2[,grep("XO_stat", colnames(mn.stat_qn2))]),
"aveXO_stat" = rowMeans(mn.stat_qn2[,c(1:3)]))) %>% add_column("gene_name" = df$gene_name, .before = 1)
rownames(df.wald)<-df$ENSEMBL
}
pdf(paste0(outdir2,"/DendroTrait_mod.pdf"))
blocknumber=1
mColors=moduleColorsAutomatic[net$blockGenes[[blocknumber]]]
datColors=data.frame(mColors,
fXO=numbers2colors(df.wald$fXO_stat, signed=T, lim = c(-10,10)),
mXO=numbers2colors(df.wald$mXO_stat, signed=T, lim = c(-10,10)),
fmXO=numbers2colors(df.wald$FM_stat, signed=T, lim = c(-10,10)),
aveXO=numbers2colors(df.wald$aveXO_stat, signed=T, lim = c(-10,10)),
nPIGF=numbers2colors(GS.PIGFn, signed=T),
nhCGB=numbers2colors(GS.hCGn, signed=T),
mn.GS.Markers[,-c(grep(".[K/L]$", colnames(mn.GS.Markers)),grep("early", colnames(mn.GS.Markers)))])
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[blocknumber]]
,colors=datColors,
groupLabels=names(datColors),
dendroLabels=FALSE, hang=0.03,addGuide=TRUE,guideHang=0.05)
dev.off()
moduleTraitCor = cor(MEs, datTraits4, use = "pairwise.complete.obs", method = "pearson")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
pdf(paste0(outdir2,"/ModuleTrait_corr_large.pdf"))
#sizeGrWindow(10,6)
# Will display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 1), "\n(",
signif(-log(moduleTraitPvalue, 10), 2)
, ")"
, sep = "")
dim(textMatrix) = dim(moduleTraitCor)
textMatrix[moduleTraitPvalue>0.05]<-""
#par(mar = c(6, 35, 3, 3))
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datTraits4),
yLabels = names(MEs),
#ySymbols = names(MEs),
yColorLabels = T,
colors = blueWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = F,
cex.text = 0.5,
zlim = c(-1,1),
cex.lab.x = 0.6,
main = paste("Module-trait relationships"))
dev.off()
### ENRICHMENT TESTING ###
require(msigdbr)
require(clusterProfiler)
require(org.Hs.eg.db)
library(DOSE)
collectGarbage()
#vc.ensByMod<-setNames(names(net$colors), paste("ME", net$colors, "_", sep=""))
vc.ensByMod<-setNames(names(net$colors), moduleColors)
ls.ensByMod <- split(vc.ensByMod, names(vc.ensByMod))
if(!tf.gseaset) {
ls.test<-lapply(ls.ensByMod, function(X) unique(df.gsea2[df.gsea2$SYMBOL %in% X,"ENTREZID"]))
} else {ls.test<-lapply(ls.ensByMod, function(X) unique(All.df[which(All.df$SYMBOL %in% X),"ENTREZID"]))}
msig_wgcna<- compareCluster(ls.test, fun = enricher, pvalueCutoff = 0.1,
pAdjustMethod = "BH", TERM2GENE=msig_Hall_C1_CP_TFT_MOD_HP, universe=unlist(ls.test))
msig_wgcna2<- compareCluster(ls.test, fun = enricher, pvalueCutoff = 0.1,
pAdjustMethod = "BH", TERM2GENE=msig_CGP_C6_C7_C8, universe=unlist(ls.test))
go_wgcna<- compareCluster(ls.test, fun = enrichGO, pvalueCutoff = 0.1, OrgDb = org.Hs.eg.db, keyType = "ENTREZID", ont = "ALL",
pAdjustMethod = "BH", universe=unlist(ls.test))
collectGarbage()
fx.labels1<- Vectorize(function(X) {
s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 1 ))
abbreviate(s, minlength = 30, dot = T)
})
fx.labels2<- Vectorize(function(X) {
s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 2))
s<-str_replace_all(s, "_", " ")
abbreviate(s, minlength = 30, dot = T)
})
gpfx.dotplot<-function(collections, ht.fx, th.fx, labelfx, padj, rown, coln, X){
X2<-mutate(X, Enrichment = parse_ratio(GeneRatio)/parse_ratio(BgRatio),
Score = -log(p.adjust, 10),
Collection = toupper(sapply(strsplit(str_replace(Description, "_", ":"), "[:]"), ht.fx, 1)))
X3<-X2 %>% filter(Collection %in% collections & p.adjust <= padj) %>%
mutate(Category = toupper(sapply(strsplit(str_replace(Description, "_", ":"), "[:]"), th.fx, 1 )))
clusterProfiler::dotplot(X3, showCategory = dim(X3)[1], color = "Score", font.size = 5) +
facet_wrap(vars(Collection), ncol = coln, nrow = rown, scales = "free") +
# aes(-log(qvalue,10), fct_reorder(ID, -log(qvalue,10)))) + #facet_wrap(vars(Cluster), nrow=1) +
# geom_segment(aes(xend=0, yend = Description)) +
#geom_point(aes(size = as.numeric(summary(strsplit(geneID, "/"))[,1]))) +
#geom_point(aes(size = Count)) +
#scale_color_viridis_c(guide=guide_colorbar(reverse=F)) +
scale_color_gradientn(colours = rainbow(5)) +
scale_y_discrete(labels = labelfx)}
pcut2<-0.05
vc.Collections<-c("WP")
glol.w1<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 1, 1, msig_wgcna)
pcut2<-0.05
vc.Collections<-c("KEGG")
glol.w2<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 1, 1 ,msig_wgcna)
pcut2<-0.05
vc.Collections<-c( "HALLMARK")
glol.w3<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 1,1, msig_wgcna)
pcut2<-0.1
vc.Collections<-c("DESCARTES")
glol.w4<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 1, 1, msig_wgcna2)
ls.w.plots<-list(
glol.w1 +theme(legend.text=element_text(size=5)) + theme(legend.direction = "horizontal", legend.position = "top"),
glol.w2 +theme(legend.text=element_text(size=5)) + theme(legend.direction = "horizontal", legend.position = "top"),
glol.w3 +theme(legend.text=element_text(size=5)) + theme(legend.direction = "horizontal", legend.position = "bottom")
)
vn.w.plots<-sapply(ls.w.plots, function(X){max(ggplot_build(X)$data[[1]] %>% filter(PANEL == 1) %>% dplyr::select(y))})
ggsave(paste0(outdir2,"/lol.wmsig.pdf"), grid.arrange(grobs = ls.w.plots,
#heights = c(9, 9, 9),
layout_matrix = rbind(c(1, 1, 2, 2),
c(1, 1, 3, 3)
# c(1, 1, 4, 4)
)), width = 8, height = 10,
device = "pdf", units = "in")
fx.semsim2<-function(compClust,ontology, padj){pairwise_termsim(compClust %>% filter(ONTOLOGY == ontology &
p.adjust <= padj ) %>%
arrange(p.adjust), method="JC", semData = dsem.sim.bp)}
ss.bp.wgcna<-fx.semsim2(go_wgcna, "BP", 0.01)
ss.cc.wgcna<-fx.semsim2(go_wgcna, "CC", 0.05)
ss.mf.wgcna<-fx.semsim2(go_wgcna, "MF", 0.05)
ego.bp.wgcna<-emapplot_cluster(ss.bp.wgcna, showCategory = min(200,dim(ss.bp.wgcna@compareClusterResult)[1]),
cex_label_group = 1, label_style = "shadowtext", cex_line = 0.2, cex_category = 6,
label_format = 15, nWords = 4, min_edge = 0.1, repel = T) +
scale_fill_manual(values = as.character(unique(ss.bp.wgcna@compareClusterResult$Cluster)))
ego.cc.wgcna<-emapplot_cluster(ss.cc.wgcna, showCategory = min(200,dim(ss.cc.wgcna@compareClusterResult)[1]),
cex_label_group = 1, label_style = "shadowtext", cex_line = 0.2, cex_category = 3,
label_format = 15, nWords = 4, min_edge = 0.1, repel = T) +
scale_fill_manual(values = as.character(unique(ss.cc.wgcna@compareClusterResult$Cluster)))
ego.mf.wgcna<-emapplot_cluster(ss.mf.wgcna, showCategory = min(200,dim(ss.mf.wgcna@compareClusterResult)[1]),
cex_label_group = 1, label_style = "shadowtext", cex_line = 0.2, cex_category = 3,
label_format = 15, nWords = 4, min_edge = 0.1, repel = T) +
scale_fill_manual(values = as.character(unique(ss.mf.wgcna@compareClusterResult$Cluster)))
ls.ego.plots<-list(
ego.bp.wgcna,
ego.cc.wgcna
)
ggsave(paste0(outdir2,"/lego.wgo.pdf"), grid.arrange(grobs = ls.ego.plots, nrow=length(ls.ego.plots), heights = c(5,5)), device = "pdf", width=7.5, height=10, units = "in")
All.df.GENE<-All.df[!duplicated(All.df$SYMBOL),]
rownames(All.df.GENE)<-All.df.GENE$SYMBOL
tb.west<-as_tibble(na.omit(merge(df.west, All.df.GENE, by="row.names"))) %>% dplyr::select(West, ENTREZID) %>% dplyr::rename(Marker = West)
tb.chhabra<-as_tibble(na.omit(merge(df.chhabra, All.df.GENE, by="row.names"))) %>% dplyr::select(Chhabra, ENTREZID) %>% dplyr::rename(Marker = Chhabra)
tb.zhou<-as_tibble(na.omit(merge(df.zhou, All.df.GENE, by="row.names"))) %>% dplyr::select(Zhou, ENTREZID) %>% dplyr::rename(Marker = Zhou)
tb.xiang<-as_tibble(na.omit(merge(df.xiang, All.df.GENE, by="row.names"))) %>% dplyr::select(Xiang, ENTREZID) %>% dplyr::rename(Marker = Xiang)
tb.khan<-as_tibble(na.omit(merge(df.khan, All.df.GENE, by="row.names"))) %>% dplyr::select(Khan, ENTREZID) %>% dplyr::rename(Marker = Khan)
tb.hsiao<-as_tibble(na.omit(merge(df.hsiao, All.df.GENE, by="row.names"))) %>% dplyr::select(Hsiao, ENTREZID) %>% dplyr::rename(Marker = Hsiao)
tb_all<-bind_rows(tb.west, tb.chhabra, tb.zhou, tb.xiang, tb.hsiao, tb.khan) %>% mutate(gs_name = Marker, entrez_gene = as.integer(ENTREZID)) %>% select(gs_name, entrez_gene)
markers_wgcna<-compareCluster(ls.test, fun = enricher, pvalueCutoff = 0.1,
pAdjustMethod = "BH", TERM2GENE=tb_all, universe=unlist(ls.test, use.names = F))
ss.markers_w<-pairwise_termsim(markers_wgcna %>% dplyr::filter(p.adjust < 0.1), method="JC",showCategory = 200)
gg.emap_w<-emapplot(ss.markers_w, pie = 'Count', min_edge = 0.01, cex_line =1, cex_category = 2,
label_style = "ggforce", shadowtext = F, layout = 'lgl',
group_category = F, legend_n = 3, showCategory = 200, repel = T) +
scale_fill_manual(values = unique(as.character(na.omit(droplevels(ss.markers_w@compareClusterResult$Cluster,c(""))))))
ggsave(filename = paste0(outdir2,"/gg.emap_wmarkers1.pdf"), plot = gg.emap_w ,device = "pdf",width = 8, height = 10, units = "in")
#####
dev.off()
pdf(file="Eigengene.pdf",width = 5, height = 5)
plotEigengeneNetworks(orderMEs(cbind(MEs, datTraits3$PIGF_per_ug_RNA, datTraits3$hCG_per_ug_RNA)),
"Eigengene adjacency heatmap", plotDendrograms = F, letterSubPlots = F,
setLabels = colnames(MEs),colorLabels = T, plotAdjacency = F,
xLabelsAngle = 45, excludeGrey = T, signed = T, printAdjacency = F,cex.adjacency = 0.4)
dev.off()
#######
#require(data.table)
revc.ensByMod<-setNames(moduleColors, names(net$colors))
ls.mods<-list(all = table(revc.ensByMod),
PAR = table(revc.ensByMod[df$gene_name[vtf.PAR]]),
PAIR = table(revc.ensByMod[df$gene_name[vtf.PAIR]]),
oESC = table(revc.ensByMod[df$gene_name[vtf.oESC]]),
allESC = table(revc.ensByMod[df$gene_name[vtf.ESC2]]),
otherX = table(revc.ensByMod[df$gene_name[vtf.X & !vtf.ESC2]]),
Ypair = table(revc.ensByMod[df$gene_name[vtf.PAIRY]])
)
df.strata.tab<-t(bind_rows(ls.mods) %>% add_column(Strata = names(ls.mods)) %>% column_to_rownames('Strata'))
df.strata.tab[which(is.na(df.strata.tab),arr.ind = T)]<-0
df.noncat<-(-1)*(df.strata.tab-df.strata.tab[,1])
df.nonmod<-(-1)*(t(t(df.strata.tab)-colSums(df.strata.tab)))
#get all array index pairs, could also have used: expand.grid(seq(ncol(df.strata.tab)),seq(nrow(df.strata.tab)))[,2:1]
mn.arr.ind<-which(!is.na(df.strata.tab), arr.ind = T)
ls.mod.test<-apply(mn.arr.ind, MARGIN = 1, function(x) {
ft<-fisher.test(matrix(c(df.strata.tab[x[1],x[2]],
df.noncat[x[1],x[2]],
df.nonmod[x[1],x[2]],
colSums(df.strata.tab)[1]-df.nonmod[x[1],x[2]]-df.noncat[x[1],x[2]]-df.strata.tab[x[1],x[2]]),
nrow = 2, ncol =2))
})
#retrieve statistic
mn.ftlp<-matrix(apply(mn.arr.ind, MARGIN = 1, function(y) {-log(ls.mod.test[[(y[1]+(y[2]-1)*colMaxs(mn.arr.ind)[1])]]$p.value,10)}),
nrow = nrow(df.strata.tab), ncol = ncol(df.strata.tab), byrow = F)
mn.ftor<-matrix(apply(mn.arr.ind, MARGIN = 1, function(y) {ls.mod.test[[(y[1]+(y[2]-1)*colMaxs(mn.arr.ind)[1])]]$estimate}),
nrow = nrow(df.strata.tab), ncol = ncol(df.strata.tab), byrow = F)
rownames(mn.ftlp)<-rownames(df.strata.tab)
rownames(mn.ftor)<-rownames(df.strata.tab)
colnames(mn.ftlp)<-colnames(df.strata.tab)
colnames(mn.ftor)<-colnames(df.strata.tab)
mn.ft.lpor<-mn.ftor
mn.ft.lpor[which(mn.ftlp<=(-log(0.05,10)),arr.ind = T)]<-0
gg.group_mod_map<-pheatmap(mn.ft.lpor, cluster_rows = F, cluster_cols = F, colorpanel(50, low = "white", high = "red"))
require(Rfast)
datKME<-signedKME(datExpr, MEs)
rank.datKME<-as.data.frame(colRanks(as.matrix(datKME)))
colnames(rank.datKME)<-colnames(datKME)
rownames(rank.datKME)<-rownames(datKME)
vc.names<-rownames(mn.ft.lpor)
require(RColorBrewer)
df.pheatanno_col2<-cbind(as.data.frame(mn.ft.lpor[,-1]), module = vc.names)
df.pheatanno_row2<-tb_HmXvstDiff %>% select(gene_name, Region, Reported) %>% pivot_wider() %>% as.data.frame() %>% column_to_rownames("gene_name")
colnames(datKME)<-str_replace_all(colnames(datKME), "kME", "")
names(vc.names)<-vc.names
ls.pheatcolor2 = list(Region = c(PAR = "black", nonPAR = "lightgrey"), module = vc.names,
Reported =c("Escape" = "yellow1", "Inactive" = "grey27", "Variable" = "lightgreen", "Unknown" = "grey", "VarOther" = "lightseagreen"))
pheatmap(datKME[rownames(df.pheatanno_row2),], annotation_row = df.pheatanno_row2 # |> dplyr::select(-c("C6XX", "C19XX", "C23XX", "rowname"))
, annotation_col = df.pheatanno_col2, annotation_colors = ls.pheatcolor2,
drop_levels = T, fontsize_row = 7, fontsize_col = 7, border_color = NA, filename = paste0(outdir2,"/groupModcorr2.pdf"),
angle_col = 45, cellwidth = 7, cellheight = 7)
# run once (take a while)
ADJ1<-abs(cor(datExpr,use="p"))^sft$powerEstimate
#memory.limit(size=157370)
Alldegrees1<-intramodularConnectivity(ADJ1, moduleColors)
#rownames(Alldegrees1)<-colnames(datExpr)
vc.restrictGenes=na.omit(revc.ensByMod[df$gene_name[(vtf.F | vtf.M) & (vtf.ESC2 | vtf.PAIRY)]])
df.plot<-cbind(module = vc.restrictGenes,
(Alldegrees1[which(colnames(datExpr) %in% names(vc.restrictGenes)),]),
datKME[names(vc.restrictGenes),]^sft$powerEstimate)
colnames(df.plot)<-str_replace_all(colnames(df.plot), "kME", "")
df.plot2<-cbind(df.plot$module, as.data.frame(t(rbind(apply(df.plot, 1, function(X){as.numeric(c(X[2:5],X[X[1]]))})))))
colnames(df.plot2)<-c(colnames(df.plot)[1:5],"kME")
df.plot3<-df.plot2 %>% rownames_to_column(var = "Gene") %>% mutate(group = if_else(rownames(df.plot2) %in% df$gene_name[(vtf.PAR | vtf.PAIR | vtf.PAIRY)], Gene, "", ""))
gg.kim.kme<-ggplot(df.plot3, aes(x = kWithin, y = kME, col =module,
label = group)) +
geom_point() + ylab(paste0("kME ^ ",sft$powerEstimate)) +
geom_text(hjust = 0, nudge_x = 3) +
scale_color_manual(values=sort(unique(df.plot2$module)))
ggsave(filename = "gg.kim.kme.pdf", plot = gg.kim.kme, width = 4, height = 5, units = "in")
ggsave(filename = "gg.group_mod_map.pdf", plot = gg.group_mod_map)
vc.ENSG<-t(matrix(unlist(strsplit(rownames(df), "[.]")),nrow=2))[,1]
#First Trimester: Gonzalez et al.
GSE109120<-read.table("GSE109120_genecounts.txt", header = T, sep = "\t", row.names = 1)
df.GSE109120<-cbind(df$gene_name, GSE109120[vc.ENSG,])
df.GSE109120<-df.GSE109120[which(!(df.GSE109120[,1] %in% unique(df.GSE109120[duplicated(df.GSE109120[,1]),1]))),]
rownames(df.GSE109120)<-df.GSE109120[,1]
df.GSE109120<-na.omit(df.GSE109120[-1])
datExpr.GSE109120<-as.data.frame(t(varianceStabilizingTransformation(as.matrix(df.GSE109120))))
datExpr.GSE109120.fem<-datExpr.GSE109120[c(grep("F", rownames(datExpr.GSE109120))),]
datExpr.GSE109120.male<-datExpr.GSE109120[c(grep("M", rownames(datExpr.GSE109120))),]
datExpr.GSE109120.rand2<-datExpr.GSE109120[lapply(c(1:length(rownames(datExpr.GSE109120))), "%%", 2) == 0,]
#BAP2: Sheridan, et al.
#GSE119265<-as.data.frame(read.table(file = "../TB/GSE119265_coding.all.count.tsv", header = TRUE, row.names = 1, sep="\t"))
GSE119265<-as.data.frame(read.csv(file = "sherisan_eope_dataset.counts", header = TRUE, row.names = 1))
df.GSE119265<-cbind(df[rownames(GSE119265),"gene_name"], GSE119265[,c(7:length(colnames(GSE119265)))])
df.GSE119265<-df.GSE119265[rownames(df),]
df.GSE119265<-df.GSE119265[which(!(df.GSE119265[,1] %in% unique(df.GSE119265[duplicated(df.GSE119265[,1]),1]))),]
rownames(df.GSE119265)<-df.GSE119265[,1]
df.GSE119265<-na.omit(df.GSE119265[-1])
datExpr.GSE119265<-as.data.frame(t(varianceStabilizingTransformation(as.matrix(df.GSE119265))))
#Term1: Buckberry, et al.
GSE77085<-as.data.frame(read.csv(file = "GSE77085_hg19_ucsc_gene_counts.csv", header = TRUE, row.names = 1))
datExpr.GSE77085<-as.data.frame(t(varianceStabilizingTransformation(as.matrix(GSE77085))))
#Term2: Deyssenroth, et al.
SRP095910 <- readRDS("RICHS_cpm.rds")
datExpr.SRP095910 <- as.data.frame(t(varianceStabilizingTransformation(round(SRP095910))))
RICHs.datTraits<-read.csv("RICHs.datTraits.csv",header = T,row.names = 1)
datExpr.SRP095910.male<-datExpr.SRP095910[rownames(RICHs.datTraits[RICHs.datTraits$Sex == "Male",]),]
datExpr.SRP095910.fem<-datExpr.SRP095910[rownames(RICHs.datTraits[RICHs.datTraits$Sex == "Female",]),]
datExpr.SRP095910.sga<-datExpr.SRP095910[rownames(RICHs.datTraits[RICHs.datTraits$F13.BW.group == "SGA",]),]
datExpr.SRP095910.lga<-datExpr.SRP095910[rownames(RICHs.datTraits[RICHs.datTraits$F13.BW.group == "LGA",]),]
datExpr.SRP095910.aga<-datExpr.SRP095910[rownames(RICHs.datTraits[RICHs.datTraits$F13.BW.group == "AGA",]),]
datExpr.SRP095910.rand2<-datExpr.SRP095910[rownames(RICHs.datTraits[lapply(as.numeric(rownames(RICHs.datTraits)), "%%", 2) == 0,]),]
datExpr.SRP095910.rand5<-datExpr.SRP095910[rownames(RICHs.datTraits[lapply(as.numeric(rownames(RICHs.datTraits)), "%%", 5) == 0,]),]
#datExpr.GSE109120<-as.data.frame(t(vst(as.matrix(gnn.GSE109120))))
datExpr.noSV<-as.data.frame(t(assay(vsd)))
colnames(datExpr.noSV)<-df[rownames(t(datExpr.noSV)),"gene_name"]
ls.testnets2<- list( BAP1b = datExpr.noSV,
BAP2 = datExpr.GSE119265,
FirstTrimester = datExpr.GSE109120,
FirstTri.male = datExpr.GSE109120.male,
FirstTri.fem =datExpr.GSE109120.fem,
FirstTri.rand2 = datExpr.GSE109120.rand2,
Term1 = datExpr.GSE77085,
Term2 = datExpr.SRP095910,
T2.male = datExpr.SRP095910.male,
T2.fem = datExpr.SRP095910.fem,
T2.rand2 = datExpr.SRP095910.rand2,
T2.rand5 = datExpr.SRP095910.rand5,
T2.aga = datExpr.SRP095910.sga,
T2.sga = datExpr.SRP095910.lga,
T2.lga = datExpr.SRP095910.aga
)
for (i in c(1:length(ls.testnets2)))
{
datExpr0 <- ls.testnets2[[i]]
gsg <- goodSamplesGenes(datExpr0, verbose = 3)
gsg$allOK
if (!gsg$allOK){
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0) {
printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")))
}
if (sum(!gsg$goodSamples)>0) {
printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")))
}
# Remove the offending genes and samples from the data:
datExpr0 <- datExpr0[gsg$goodSamples, gsg$goodGenes]
}
ls.testnets2[[i]]<-datExpr0
}
ls.testnets3<- list(BAP1.mix = datExpr[paste0("d", c(221,222,224:226,227,228, 230:232,323:328)), ],
BAP1.XO = datExpr[rownames(datTraits4[datTraits4$simple.XO ==1,]), ])
for (i in c(1:length(ls.testnets3)))
{
datExpr0 <- ls.testnets3[[i]]
gsg <- goodSamplesGenes(datExpr0, verbose = 3)
gsg$allOK
if (!gsg$allOK){
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0) {
printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")))
}
if (sum(!gsg$goodSamples)>0) {
printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")))
}
# Remove the offending genes and samples from the data:
datExpr0 <- datExpr0[gsg$goodSamples, gsg$goodGenes]
}
ls.testnets3[[i]]<-datExpr0
}
multiExpr3 = list( BAP1 = list(data = datExpr),
BAP1.rand2 = list(data = ls.testnets3[[1]]),
BAP1.XO = list(data = ls.testnets3[[2]]))
colorsBAP3 = moduleColors
multiColor3 = list(BAP1 = colorsBAP3);
system.time( {
mp3 = modulePreservation(multiExpr3,
multiColor3,
dataIsExpr = T,
networkType = "signed",
corFnc = "bicor",
referenceNetworks = 1,
maxModuleSize = 5000,
nPermutations = 100,
randomSeed = 1,
quickCor = 0,
verbose = 3)
} )
mp<-mp3
df.Zsummary<-as.data.frame(matrix(nrow=1+length(table(colorsBAP3))))
for (i in c(1:length(ls.testnets3)))
{
ref = 1
test = i+1
statsObs = cbind(mp$quality$observed[[ref]][[test]][, -1], mp$preservation$observed[[ref]][[test]][, -1])
statsZ = cbind(mp$quality$Z[[ref]][[test]][, -1], mp$preservation$Z[[ref]][[test]][, -1]);
print( cbind(statsObs[, c("medianRank.pres", "medianRank.qual")],
signif(statsZ[, c("Zsummary.pres", "Zsummary.qual")], 2)))
df.Zsummary<-as.data.frame(cbind(df.Zsummary, statsObs[, c("medianRank.pres", "medianRank.qual")],
signif(statsZ[, c("Zsummary.pres", "Zsummary.qual")], 2)))
# Module labels and module sizes are also contained in the results
modColors = rownames(mp$preservation$observed[[ref]][[test]])
moduleSizes = mp$preservation$Z[[ref]][[test]][, 1];
# leave grey and gold modules out
plotMods = !(modColors %in% c("grey", "gold"));
# Text labels for points
text = modColors[plotMods];
# Auxiliary convenience variable
plotData = cbind(mp$preservation$observed[[ref]][[test]][, 2], mp$preservation$Z[[ref]][[test]][, 2])
mains = c("Preservation Median rank", "Preservation Zsummary");
for (p in 1:2)
{
min = min(plotData[, p], na.rm = TRUE);
max = max(plotData[, p], na.rm = TRUE);
# Adjust ploting ranges appropriately
if (p==2)
{
if (min > -max/10) min = -max/10
ylim = c(min - 0.1 * (max-min), max + 0.1 * (max-min))
} else
ylim = c(max + 0.1 * (max-min), min - 0.1 * (max-min))
plot(moduleSizes[plotMods], plotData[plotMods, p], col = 1, bg = modColors[plotMods], pch = 21,
main = mains[p],
cex = 2.4,
ylab = mains[p], xlab = "Module size", log = "x",
ylim = ylim,
xlim = c(10, 5000), cex.lab = 1.2, cex.axis = 1.2, cex.main =1.4)
labelPoints(moduleSizes[plotMods], plotData[plotMods, p], text, cex = 1, offs = 0.08);
# For Zsummary, add threshold lines
if (p==2)
{
abline(h=0)
abline(h=2, col = "blue", lty = 2)
abline(h=10, col = "darkgreen", lty = 2)
}}
}
df.Zsummary3<-df.Zsummary[,-1]
colnames(df.Zsummary3)<-c(
paste(rep(names(ls.testnets3), each = 4), colnames(df.Zsummary3), sep="_"))
pdf("Module_Pres1.pdf",width = 3, height = 5)
mn.Zs3<-as.matrix(cbind(
df.Zsummary3[!rownames(df.Zsummary3) %in% c("gold"),grep("Zsummary.pres", colnames(df.Zsummary3))]))
barplot(rev(mn.Zs3[,2]-mn.Zs3[,1]),col=rev(rownames(mn.Zs3)), xlim=c(-100,50),
horiz=T, names.arg=substr(rev(rownames(mn.Zs3)),1,4), cex.names=0.5, width=rev(table(colorsBAP3)))
dev.off()
ls.testnets<-ls.testnets2
setLabels = c("BAP1", "BAP1b", "BAP2", "FirstTrimester", "FirstTri.male", "FirstTri.fem", "FirstTri.rand2", "Term1", "Term2",
"T2.male", "T2.fem", "T2.rand2", "T2.rand5", "T2.sga", "T2.lga", "T2.aga")
multiExpr = list( BAP1 = list(data = datExpr),
BAP1b = list(data = ls.testnets[[1]]),
BAP2 = list(data = ls.testnets[[2]]),
FirstTrimester = list(data = ls.testnets[[3]]),
FirstTri.male = list(data = ls.testnets[[4]]),
FirstTri.fem = list(data = ls.testnets[[5]]),
FirstTri.rand2 = list(data = ls.testnets[[6]]),
Term1 = list(data = ls.testnets[[7]]),
Term2 = list(data = ls.testnets[[8]]),
T2.male = list(data = ls.testnets[[9]]),
T2.fem = list(data = ls.testnets[[10]]),
T2.rand2 = list(data = ls.testnets[[11]]),
T2.rand5 = list(data = ls.testnets[[12]]),
T2.aga = list(data = ls.testnets[[13]]),
T2.sga = list(data = ls.testnets[[14]]),
T2.lga = list(data = ls.testnets[[15]])
)
colorsBAP = moduleColors
multiColor = list(BAP1 = colorsBAP);
system.time( {
mp2 = modulePreservation(multiExpr,
multiColor,
dataIsExpr = T,
networkType = "signed",
corFnc = "bicor",
referenceNetworks = 1,
maxModuleSize = 5000,
nPermutations = 100,
randomSeed = 1,
quickCor = 0,
verbose = 3)
} )
mp<-mp2
df.Zsummary<-as.data.frame(matrix(nrow=1+length(table(colorsBAP))))
for (i in c(1:length(ls.testnets2)))
{
ref = 1
test = i+1
statsObs = cbind(mp$quality$observed[[ref]][[test]][, -1], mp$preservation$observed[[ref]][[test]][, -1])
statsZ = cbind(mp$quality$Z[[ref]][[test]][, -1], mp$preservation$Z[[ref]][[test]][, -1]);
print( cbind(statsObs[, c("medianRank.pres", "medianRank.qual")],
signif(statsZ[, c("Zsummary.pres", "Zsummary.qual")], 2)))
df.Zsummary<-as.data.frame(cbind(df.Zsummary, statsObs[, c("medianRank.pres", "medianRank.qual")],
signif(statsZ[, c("Zsummary.pres", "Zsummary.qual")], 2)))
# Module labels and module sizes are also contained in the results
modColors = rownames(mp$preservation$observed[[ref]][[test]])
moduleSizes = mp$preservation$Z[[ref]][[test]][, 1];
# leave grey and gold modules out
plotMods = !(modColors %in% c("grey", "gold"));
# Text labels for points
text = modColors[plotMods];
# Auxiliary convenience variable
plotData = cbind(mp$preservation$observed[[ref]][[test]][, 2], mp$preservation$Z[[ref]][[test]][, 2])
mains = c("Preservation Median rank", "Preservation Zsummary");
for (p in 1:2)
{
min = min(plotData[, p], na.rm = TRUE);
max = max(plotData[, p], na.rm = TRUE);
# Adjust ploting ranges appropriately
if (p==2)
{
if (min > -max/10) min = -max/10
ylim = c(min - 0.1 * (max-min), max + 0.1 * (max-min))
} else
ylim = c(max + 0.1 * (max-min), min - 0.1 * (max-min))
plot(moduleSizes[plotMods], plotData[plotMods, p], col = 1, bg = modColors[plotMods], pch = 21,
main = mains[p],
cex = 2.4,
ylab = mains[p], xlab = "Module size", log = "x",
ylim = ylim,
xlim = c(10, 5000), cex.lab = 1.2, cex.axis = 1.2, cex.main =1.4)
labelPoints(moduleSizes[plotMods], plotData[plotMods, p], text, cex = 1, offs = 0.08);
# For Zsummary, add threshold lines
if (p==2)
{
abline(h=0)
abline(h=2, col = "blue", lty = 2)
abline(h=10, col = "darkgreen", lty = 2)
}}
}
df.Zsummary<-df.Zsummary[,-1]
colnames(df.Zsummary)<-c(
paste(rep(names(ls.testnets), each = 4), colnames(df.Zsummary), sep="_"))
Breaks = c(0 ,seq(from = 2, to = 10, by = 2), seq(from = 15,
to = max(as.matrix(df.Zsummary[,grep("Zsummary.pres", colnames(df.Zsummary[,c(5:ncol(df.Zsummary))]))+4])),
by = 5))
mn.Zs<-as.matrix(cbind(df.Zsummary[!rownames(df.Zsummary) %in% c("gold"),grep("Zsummary.pres", colnames(df.Zsummary))]))
sizeGrWindow(10,10)
par(mar = c(0, 0, 0, 0))
mn.Zs2<-mn.Zs[,-1]
dev.off()
vc.colour<-as.vector(names(table(colorsBAP)))
names(vc.colour)<-names(table(colorsBAP))
ls.modpres_color<-list(colour = vc.colour)
df.modpres_row<-as.data.frame(cbind(
vc.colour))
colnames(df.modpres_row)<-c("colour") #"number")
pheatmap(mn.Zs2, cellnote = round(mn.Zs2,digits = 1), display_numbers = T, number_format = "%.0f",
cluster_rows = F, cluster_cols = F, border_color = NA, cellwidth = 12, cellheight = 12,
color =colorpanel(length(Breaks)-1, "white", "orange", "red"), breaks=Breaks,
annotation_row = df.modpres_row, annotation_colors = ls.modpres_color, annotation_legend = F, annotation_names_row = T,
labels_row = paste(rownames(df.Zsummary[!rownames(df.Zsummary) %in% c("gold"),]), table(multiColor3), sep=": "),
filename=paste0(outdir2,"/hm_TBLs_mp_new.pdf")
)
unique_clusters <- go_wgcna |>
as.data.frame() |>
pull(Cluster) |>
unique() |>
as.character()
go_wgcna |>
as.data.frame() |>
mutate(
object_file = "go_wgcna",
ID = sprintf("GO%s_%s", ONTOLOGY, ID)
) |>
dplyr::select(-ONTOLOGY) |>
rbind(
msig_wgcna |>
as.data.frame() |>
mutate(object_file = "msig_wgcna")
) |>
rbind(
msig_wgcna2 |>
as.data.frame() |>
mutate(object_file = "msig2_wgcna")
) |>
pivot_wider(
id_cols = c(ID, Description, object_file),
names_from = Cluster,
values_from = c(GeneRatio, BgRatio, p.adjust),
names_glue = "{Cluster}_{.value}",
names_sort = T
) |>
dplyr::select(
ID,
Description,
# object_file,
starts_with(sprintf("%s_",unique_clusters))
) |>
arrange(
!!as.symbol(sprintf("%s_p.adjust",unique_clusters))
) |>
write_csv("Table_S4.csv")