Skip to content
Permalink
d20478c430
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
1240 lines (1051 sloc) 59.3 KB
rm(gene_sum_exp)
rm(sum_exp)
saveRDS(df, file=paste0(outdir,"/df.rds"))
df<-readRDS(file=paste0(outdir,"/df.rds"))
vsd3<-readRDS(file=paste0(outdir,"/vsd3.rds"))
library(WGCNA)
library(tidyverse)
require(DESeq2)
tf.gseaset<-T
catvars<-c("Xa","donor","simple")
dev.off()
if (all(df[,c(grep(pattern = "^d", colnames(df)))] == assay(vsd3)))
{
if(tf.gseaset){
ensembl.vst<-df[,c(colData(vsd3) |> as.data.frame() |> rownames())]
ENSG.vst<-ensembl.vst
rownames(ENSG.vst)<-df$ENSEMBL
datExpr0<-t(ENSG.vst[df.gsea2$ENSEMBL,])
} else{datExpr0 <- as.data.frame(t(assay(vsd3)))}
gsg <- goodSamplesGenes(datExpr0, verbose = 3)
gsg$allOK
if (!gsg$allOK){
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0) {
printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")))
}
if (sum(!gsg$goodSamples)>0) {
printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")))
}
# Remove the offending genes and samples from the data:
datExpr0 <- datExpr0[gsg$goodSamples, gsg$goodGenes]
}
datExpr<-datExpr0
if(tf.gseaset){colnames(datExpr)<-df.gsea2$SYMBOL
} else{
colnames(datExpr)<-df$gene_name}
}
sampleTree <- hclust(dist(datExpr), method = "average")
#sizeGrWindow(12,9)
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,cex.axis = 1.5, cex.main = 2)
# no need to remove samples
df.PAX7SOX10_rna<-read.csv("./MS4_IF_percent_PAX7SOX10.csv") |>
dplyr::filter(Registry_ID != "") |>
rename(names = "Registry_ID",
Percent.PAX7SOX10 = "perc_PAX7SOX10") |>
dplyr::select(Percent.PAX7SOX10, names)
datTraits <- as.data.frame(colData(vsd3)[rownames(datExpr),catvars])
datTraits2<-binarizeCategoricalColumns(datTraits, includeLevelVsAll = T, nameForAll = "",
includePairwise = F, dropUninformative = T, dropFirstLevelVsAll = F)
rownames(datTraits2)<-rownames(datTraits)
datTraits2<-datTraits2 |> rownames_to_column("names") |> left_join(df.PAX7SOX10_rna, by = "names" ) |> column_to_rownames("names")
# Re-cluster samples
sampleTree2 <- hclust(dist(datExpr), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
traitColors = numbers2colors(datTraits2, signed = F)
options(stringsAsFactors = FALSE)
collectGarbage()
# Choose a set of soft-thresholding powers
powers = c(seq(from = 1, to=20, by=1))
# Call the network topology analysis function
nwt = "signed hybrid"
maxP<-0
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5, networkType = nwt,
corFnc = bicor, dataIsExpr = T)#, corOptions = list(maxPOutliers =maxP))
sft$powerEstimate
# Plot the sample dendrogram and the colors underneath.
pdf(paste0(outdir,"/NCCs_Dendro_mod.pdf"))
plotDendroAndColors(sampleTree2, traitColors,
groupLabels = names(datTraits2),
main = "Sample dendrogram and trait heatmap")
dev.off()
# Plot the results:
require(ggplot2)
ggplot(sft$fitIndices, aes(x=Power, y=SFT.R.sq, label=Power)) + geom_text(color="red") + geom_hline(yintercept=.85, color="red") + labs(title="Scale independence")
ggplot(sft$fitIndices, aes(x=Power, y=`mean.k.`, label=Power)) + geom_text(color="red") + labs(title="Mean connectivity")
tf.deepsplit<-T
vn.mergeCut<-0.25
outdir2<-paste0(outdir,"/power_",sft$powerEstimate, tf.gseaset,"_", nwt, "_deep_", tf.deepsplit, "_cut_", vn.mergeCut)#, "_maxP_", maxP)
dir.create(outdir2, showWarnings = FALSE)
net <- blockwiseModules(datExpr, power = sft$powerEstimate,
networkType = "signed hybrid", corType = "bicor",
TOMType = "signed", minModuleSize = 30,
maxBlockSize=30000, deepSplit = tf.deepsplit,
reassignThreshold = 0,
mergeCutHeight = vn.mergeCut,
numericLabels = TRUE, pamRespectsDendro = FALSE,
saveTOMs = F,
#saveTOMFileBase = paste0(outdir2,"/TOMTOM_",sft$powerEstimate),
verbose = 3)
saveRDS(net, file = paste0(outdir2,"/net.rds"))
net<-readRDS(file = paste0(outdir2,"/net.rds"))
# open a graphics window
#sizeGrWindow(12, 9)
# Convert labels to colors for plotting
require(scales)
vc.wgcna_col1<-c("black", "blue", "yellow", "red", "brown", "turquoise", "purple")
vc.wgcna_col2<-c("green", "navy", "orange", "salmon", "peru", "cyan", "magenta")
vc.wgcna_col3<-c("olivedrab", "skyblue", "khaki", "tomato", "tan", "darkcyan", "pink")
vc.wgcna_col4<-c("darkkhaki", "royalblue", "bisque", "maroon", "slategrey", "lightcyan", "thistle")
vc.wgcna_col5<-c("palegreen", "slateblue", "beige", "lightsalmon", "sienna", "aquamarine", "violet")
mergedColors = labels2colors(net$colors, colorSeq = c(vc.wgcna_col1,
vc.wgcna_col2,
vc.wgcna_col3,
vc.wgcna_col4,
vc.wgcna_col5))
# Plot the dendrogram and the module colors underneath
pdf(paste0(outdir2,"/NCCs_Cluster_dendro.pdf"))
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
dev.off()
moduleLabels = net$colors
moduleColors = mergedColors
MEs = net$MEs
geneTree = net$dendrograms[[1]]
#save(MEs, moduleLabels, moduleColors, geneTree,
# file = "NCC-networkConstruction-auto.RData")
grey <- df[which(moduleColors=="grey"),]
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
# Recalculate MEs with color labels
MEs0 = moduleEigengenes(datExpr, paste0("_",moduleLabels,"_",moduleColors))$eigengenes
vc.ME_labcol<-setNames(str_split(names(MEs0), "_", simplify = T)[,2],
str_split(names(MEs0), "_", simplify = T)[,3])
vc.labcol_ME<-setNames(names(vc.ME_labcol), as.numeric(vc.ME_labcol))
names(MEs0)<-paste0("ME",names(vc.ME_labcol))
MEs = orderMEs(MEs0)
# A data frame with module eigengenes can be obtained as follows
MEsAutomatic=net$MEs
moduleLabelsAutomatic=net$colors
moduleColorsAutomatic = mergedColors
vc.PAX7SOX10 = as.data.frame(datTraits2$Percent.PAX7SOX10)
GS.PAX7SOX10=as.numeric(cor(datExpr,vc.PAX7SOX10,use="pairwise.complete.obs"))
vc.XO = as.data.frame(datTraits2$simple.XO)
GS.XO=as.numeric(cor(datExpr,vc.XO,use="pairwise.complete.obs"))
vc.Euploid <- as.data.frame(datTraits2$simple.EU)
GS.Euploid=as.numeric(cor(datExpr,vc.Euploid,use="pairwise.complete.obs"))
df.GS.Markers<-as.data.frame(apply(df.Markers.med[rownames(datExpr),], 2, function(X) {as.numeric(cor(datExpr,X,use="pairwise.complete.obs"))}))
mn.GS.Markers<-numbers2colors(df.GS.Markers, signed=T, lim=c(-1,1))
colnames(mn.GS.Markers)<-colnames(df.GS.Markers)
#vtf.ESC2 is identical to vtf.ESC, except that it includes all PAR and PAIR genes, irrespective of whether they escape/are expressed
vtf.ESC2<-(vtf.oESC | vtf.PAR | vtf.PAIR)
df.escape<-as.data.frame(cbind(colMeans(mn.mednorm[na.omit(df$gene_name[vtf.ESC2]),]),
colMeans(mn.mednorm[na.omit(df$gene_name[vtf.PAR]),]),
colMeans(mn.mednorm[na.omit(df$gene_name[vtf.oESC]),]),
colMeans(mn.mednorm[na.omit(df$gene_name[vtf.PAIR2]),])))
colnames(df.escape)<-c("All", "PAR","OtherEsc", #"PairX", "PairY",
"Pair")
df.GS.escape<-as.data.frame(apply(df.escape[rownames(datExpr),], 2, function(X) {as.numeric(cor(datExpr,X,use="pairwise.complete.obs"))}))
mn.GS.escape<-numbers2colors(df.GS.escape, signed=T)
colnames(mn.GS.escape)<-colnames(df.escape)
datTraits3<-cbind(datTraits2,
df.escape[rownames(datExpr),], #[,-c(4,5)],
df.Markers.med[rownames(datExpr),c(grep(".[KDEGs/C/L/F/H]$", colnames(df.Markers.med)))])
require(Hmisc)
require(corrplot)
#remove Xa and donor columns, re-ordered columns
datTraits4 <- datTraits3[, -c(grep("simple", colnames(datTraits3)),
grep("Xa", colnames(datTraits3)),
grep("donor", colnames(datTraits3)))]
cortype = "pearson"
pdf(paste0(outdir2,"/",cortype,"_Corrplot4.pdf"))
cor_5<- rcorr(as.matrix(datTraits4),type = cortype)
M <- cor_5$r
p_mat <- cor_5$P
p_mat[is.na(p_mat)] <- 0
corrplot(M, type = "upper", order = "original", method="square",
p.mat = p_mat, sig.level = 0.05, insig = "blank",
col=blueWhiteRed(100), tl.col="black", tl.srt = 60)
dev.off()
df.wald<-df.gsea2
if(!tf.gseaset) {
require(preprocessCore)
vc.Wald<-c("fXO_stat", "mXO1_stat", "mXO2_stat")
mn.stat_qn2<-normalize.quantiles(matrix(unlist(df[,vc.Wald]), ncol = length(vc.Wald)))
colnames(mn.stat_qn2)<-vc.Wald
df.wald<-as.data.frame(cbind(mn.stat_qn2,
"aveXO_stat" = rowMeans(mn.stat_qn2[,c(1:3)])),
"avemXO_stat" = rowMeans(mn.stat_qn2[,c(2:3)])) %>% add_column("gene_name" = df$gene_name, .before = 1)
rownames(df.wald)<-df$ENSEMBL
}
df.GS.wald<-as.data.frame(apply(df.wald[rownames(datExpr),], 2, function(X) {as.numeric(cor(datExpr,X,use="pairwise.complete.obs"))}))
pdf(paste0(outdir2,"/DendroTrait_mod3.pdf"))
blocknumber=1
mColors=moduleColorsAutomatic[net$blockGenes[[blocknumber]]]
datColors=data.frame(mColors,
XO=numbers2colors(GS.XO, signed=T),
fXO=numbers2colors(df.wald$fXO, signed=T, lim = c(-5,5)),
mXO1=numbers2colors(df.wald$mXO1, signed=T, lim = c(-5, 5)),
mXO2=numbers2colors(df.wald$mXO2, signed=T, lim = c(-5,5)),
avemXO=numbers2colors(df.wald$avemXO, signed=T, lim = c(-5,5)),
mn.GS.Markers,
Euploid=numbers2colors(GS.Euploid, signed=T),
PAX7SOX10=numbers2colors(GS.PAX7SOX10, signed=T))
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[blocknumber]]
,colors=datColors,
groupLabels=names(datColors),
dendroLabels=FALSE, hang=0.03,addGuide=TRUE,guideHang=0.05)
dev.off()
cortype<-"pearson"
moduleTraitCor = cor(MEs, datTraits4[,-grep("Hsiao",colnames(datTraits4))], use = "pairwise.complete.obs", method = cortype)
moduleTraitPvalue = corPvalueFisher(moduleTraitCor, nSamples)
pdf(paste0(outdir2,"/ModuleTrait_corr_large3F.pdf"))
#sizeGrWindow(10,6)
# Will display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 1), "\n(",
signif(-log(moduleTraitPvalue, 10), 2)
, ")"
, sep = "")
dim(textMatrix) = dim(moduleTraitCor)
textMatrix[moduleTraitPvalue>0.1]<-""
colnames(textMatrix)<-colnames(moduleTraitCor)
rownames(textMatrix)<-rownames(moduleTraitCor)
#par(mar = c(6, 35, 3, 3))
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor[head(names(MEs),-1),],
xLabels = colnames(moduleTraitCor),
yLabels = head(names(MEs),-1),
ySymbols = vc.ME_labcol[head(gsub("^ME","",names(MEs)),-1)],
yColorLabels = T,
colors = blueWhiteRed(50),
textMatrix = textMatrix[head(names(MEs),-1),],
setStdMargins = F,
cex.text = 0.5,
zlim = c(-1,1),
cex.lab.x = 0.6,
main = paste0("Module-trait ",cortype))
dev.off()
### ENRICHMENT TESTING ###
require(msigdbr)
require(clusterProfiler)
require(org.Hs.eg.db)
library(DOSE)
collectGarbage()
vc.ensByMod<-setNames(names(net$colors), moduleLabels)
ls.ensByMod <- split(vc.ensByMod, names(vc.ensByMod))
if(!tf.gseaset) {
ls.test<-lapply(ls.ensByMod, function(X) unique(df.gsea2[df.gsea2$SYMBOL %in% X,"ENTREZID"]))
} else {ls.test<-lapply(ls.ensByMod, function(X) unique(All.df[which(All.df$SYMBOL %in% X),"ENTREZID"]))}
if(file.exists(paste0(outdir2,"/msig_wgcna.rds")))
{msig_wgcna<-readRDS(paste0(outdir2,"/msig_wgcna.rds"))
}
if(!file.exists(paste0(outdir2,"/msig_wgcna.rds"))) {
msig_wgcna<- compareCluster(ls.test, fun = enricher, pvalueCutoff = 0.1, readable = T,
pAdjustMethod = "BH", TERM2GENE=msig_Hall_C1_CP_TFT_MOD_HP_C8, universe=unlist(ls.test))
saveRDS(msig_wgcna, file = paste0(outdir2,"/msig_wgcna.rds"))
}
collectGarbage()
fx.labels1<- Vectorize(function(X) {
s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 1 ))
abbreviate(s, minlength = 25, dot = T)
})
fx.labels2<- Vectorize(function(X) {
s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 2))
s<-str_replace_all(s, "_", " ")
abbreviate(s, minlength = 25, dot = T)
})
fx.labels3<- Vectorize(function(X) {
#s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 2))
#s<-str_replace_all(s, "_", " ")
abbreviate(X, minlength = 25, dot = T)
})
require(viridis)
require(grid)
require(gridExtra)
gpfx.dotplot<-function(collections, ht.fx, th.fx, labelfx, padj, rown, coln, X){
X2<-mutate(X, Enrichment = parse_ratio(GeneRatio)/parse_ratio(BgRatio),
Score = pmin(-log(p.adjust, 10), 30),
Collection = toupper(sapply(strsplit(str_replace(Description, "_", ":"), "[:]"), ht.fx, 1)))
X3<-X2 %>% filter(Collection %in% collections & p.adjust <= padj) %>%
mutate(Category = toupper(sapply(strsplit(str_replace(Description, "_", ":"), "[:]"), th.fx, 1 )))
clusterProfiler::dotplot(X3, showCategory = dim(X3)[1], color = "Score", font.size = 6) +
facet_wrap(vars(Collection), ncol = coln, nrow = rown, scales = "free_y") +
scale_color_gradientn(colours = turbo(5)) +
scale_y_discrete(labels = labelfx, position = "right")}
require(viridis)
pcut2<-0.1
vc.Collections<-c("HALLMARK","KEGG")
glol.w1<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 2, 1, msig_wgcna)
ggsave(paste0(outdir2,"/supp_wgcna.pdf"), plot = glol.w1 + theme(legend.position = "top", legend.direction = "horizontal"), width = 4, height =7)
pcut2<-0.1
vc.Collections<-c("HP")
glol.w2<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 1, 1, msig_wgcna
%>%filter(Description %in% toupper(vc.ORPHA881))) +
theme(text = element_text(size = 6), legend.position="top", legend.box = "horizontal")
ggsave(paste0(outdir2,"/glol.wgcna.hp.pdf"), plot = glol.w2,width = 4, height = 5)
pcut2<-0.01
vc.Collections<-c("WP")
glol.w3<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 1, 1, msig_wgcna)
pcut2<-0.01
vc.Collections<-c("REACTOME")
glol.w4<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 1, 1 ,msig_wgcna)
pcut2<-0.1
vc.Collections<-c( "TARGET_GENES")
glol.w5<-gpfx.dotplot(vc.Collections, tail, head, fx.labels1, pcut2, 1,1, msig_wgcna)
pcut2<-0.1
vc.Collections<-c("HALLMARK")
glol.w6<-gpfx.dotplot(vc.Collections, head, tail, fx.labels2, pcut2, 1,1, msig_wgcna)
ls.w.plots<-list(
#glol.w4 +theme(legend.position="top", legend.box = "horizontal", legend.text=element_text(size=5)),
glol.w2 +theme(legend.text=element_text(size=5)),
glol.w1 +guides(color = 'none', size = 'none')
)
vn.w.plots<-sapply(ls.w.plots, function(X){max(ggplot_build(X)$data[[1]] %>% filter(PANEL == 1) %>% dplyr::select(y))})
ggsave(paste0(outdir2,"/lol.wmsig.pdf"), grid.arrange(grobs = ls.w.plots#,
#heights = c(5, 9, 5),
#layout_matrix = rbind(c(2, 2, 1, 1),
# c(2, 2, 3, 3),
# c(2, 2, 3, 3))
), width = 10, height = 20,
device = "pdf", units = "in")
if(file.exists(paste0(outdir2,"/go_wgcna.rds")))
{go_wgcna<-readRDS(paste0(outdir2,"/go_wgcna.rds"))
}
if(!file.exists(paste0(outdir2,"/go_wgcna.rds"))) {
go_wgcna<- compareCluster(ls.test, fun = enrichGO, pvalueCutoff = 0.1, pAdjustMethod = "BH",
universe=df.gsea2$ENTREZID, OrgDb = org.Hs.eg.db, ont = "ALL",readable = T)
saveRDS(go_wgcna, paste0(outdir2,"/go_wgcna.rds"))
}
gpfx.goplot<-function(ontology, ht.fx, th.fx, labelfx, padj, rown, coln, X){
X2<-mutate(X, Enrichment = parse_ratio(GeneRatio)/parse_ratio(BgRatio),
Score = -log(p.adjust, 10))#,
X3<-X2 %>% filter(ONTOLOGY %in% c("BP") & p.adjust <= pcut2) #%>%
clusterProfiler::dotplot(X3, showCategory = dim(X3)[1], color = "Score", font.size = 5) +
scale_color_gradientn(colours = turbo(5)) +
scale_y_discrete(labels = labelfx)}
pcut2<-0.0001
vc.Collections<-c("BP")
glol.w5<-gpfx.goplot(vc.Collections, head, tail, fx.labels3, pcut2, 1, 1, go_wgcna)
#
require(GOSemSim)
require(enrichplot)
fx.semsim2<-function(compClust,ontology, padj){pairwise_termsim(compClust %>% filter(ONTOLOGY == ontology &
p.adjust <= padj ) %>%
arrange(p.adjust), method="JC", semData = dsem.sim.bp)}
ss.bp.wgcna2<-fx.semsim2(go_wgcna |>
filter(Cluster %in% as.character(c(1,6,7,13,19,20,26,27,3,5,8,9,11,15)))
, "BP", 0.01)
ss.cc.wgcna<-fx.semsim2(go_wgcna, "CC", 0.05)
ss.mf.wgcna<-fx.semsim2(go_wgcna, "MF", 0.05)
fx.semsim3<-function(compClust,collection, padj){pairwise_termsim(compClust %>% filter(str_detect(Description, collection) &
p.adjust <= padj ) %>% arrange(p.adjust) %>%
mutate(Description = str_to_sentence(toupper(sapply(strsplit(str_replace(Description, "_", ":"), "[:]"), tail, 1 ))))
, method="JC", semData = dsem.sim.bp)}
ss.HP.wgcna<-fx.semsim3(msig_wgcna %>%
filter(Description %in% toupper(vc.ORPHA881))
,"HP" , 0.1)
ego.HP.wgcna<-emapplot(ss.HP.wgcna, showCategory = dim(ss.HP.wgcna@compareClusterResult)[1],
ellipse_style = "polygon", ellipse_pro = 0.9, alpha =0.2, shadowtext = F,
cex_label_group = 1, label_style = "shadowtext", cex_category = 1, pie = "Count",
cex_pie2axis =1, #nWords = 10, label_format = 20,
min_edge = 0.1, repel = T, #group_category= T,
legend_n =4) +
scale_fill_manual(values = vc.labcol_ME[as.character(unique(ss.HP.wgcna@compareClusterResult$Cluster))])
ggsave(paste0(outdir2,"/wgcna_HP_emap.pdf"),device = 'pdf', width = 6, height = 6, unit = "in")
ego.bp.wgcna<-emapplot_cluster(ss.bp.wgcna2, showCategory = dim(ss.bp.wgcna2@compareClusterResult)[1],
ellipse_style = "polygon", ellipse_pro = 0.9, alpha =0.2, shadowtext = F,
cex_label_group = 1, label_style = "shadowtext", cex_category = 1, pie = "Count",
cex_pie2axis =10, #nWords = 10, label_format = 20,
min_edge = 0.05, repel = T, #group_category= T,
legend_n =4) +
scale_fill_manual(values = vc.labcol_ME[as.character(unique(ss.bp.wgcna2@compareClusterResult$Cluster))])
ggsave(paste0(outdir2,"/wgcna_bp2_emap.pdf"),device = 'pdf', width = 6, height = 6, unit = "in")
pdf(file=paste0(outdir2,"/Eigengene3.pdf"),width = 4, height = 4)
MEsPAX7SOX10<-orderMEs(cbind(MEs, datTraits3$Percent.PAX7SOX10))
plotEigengeneNetworks(MEsPAX7SOX10,plotHeatmaps = T,setMargins = T,greyLabel = 0,
"Eigengene adjacency heatmap", plotDendrograms = F, letterSubPlots = F,
ySymbols = vc.ME_labcol[gsub("^ME","",names(MEsPAX7SOX10))],
colorLabels = T, plotAdjacency = F,
xLabelsAngle = 45, excludeGrey = T, signed = T, printAdjacency = F,cex.adjacency = 0.4)
dev.off()
require(Rfast)
datKME<-signedKME(datExpr, MEs)
rank.datKME<-as.data.frame(colRanks(as.matrix(datKME)))
colnames(rank.datKME)<-colnames(datKME)
rownames(rank.datKME)<-rownames(datKME)
revc.ensByMod<-setNames(moduleColors, names(net$colors))
ls.mods<-list(all = table(revc.ensByMod),
PAR = table(revc.ensByMod[df$gene_name[vtf.PAR]]),
PAIR = table(revc.ensByMod[df$gene_name[vtf.PAIR2]]),
oESC = table(revc.ensByMod[df$gene_name[vtf.oESC]]),
allESC = table(revc.ensByMod[df$gene_name[vtf.ESC2]]),
onlyX = table(revc.ensByMod[df$gene_name[vtf.X]]),
onlyY = table(revc.ensByMod[df$gene_name[vtf.Y]])
)
df.strata.tab<-t(bind_rows(ls.mods) %>% add_column(Strata = names(ls.mods)) %>% column_to_rownames('Strata'))
df.strata.tab[which(is.na(df.strata.tab),arr.ind = T)]<-0
df.noncat<-(-1)*(df.strata.tab-df.strata.tab[,1])
df.nonmod<-(-1)*(t(t(df.strata.tab)-colSums(df.strata.tab)))
#get all array index pairs, could also have used: expand.grid(seq(ncol(df.strata.tab)),seq(nrow(df.strata.tab)))[,2:1]
mn.arr.ind<-which(!is.na(df.strata.tab), arr.ind = T)
ls.mod.test<-apply(mn.arr.ind, MARGIN = 1, function(x) {
ft<-fisher.test(matrix(c(df.strata.tab[x[1],x[2]],
df.noncat[x[1],x[2]],
df.nonmod[x[1],x[2]],
colSums(df.strata.tab)[1]-df.nonmod[x[1],x[2]]-df.noncat[x[1],x[2]]-df.strata.tab[x[1],x[2]]),
nrow = 2, ncol =2))
})
#retrieve statistic
mn.ftlp<-matrix(apply(mn.arr.ind, MARGIN = 1, function(y) {-log(ls.mod.test[[(y[1]+(y[2]-1)*colMaxs(mn.arr.ind)[1])]]$p.value,10)}),
nrow = nrow(df.strata.tab), ncol = ncol(df.strata.tab), byrow = F)
mn.ftor<-matrix(apply(mn.arr.ind, MARGIN = 1, function(y) {ls.mod.test[[(y[1]+(y[2]-1)*colMaxs(mn.arr.ind)[1])]]$estimate}),
nrow = nrow(df.strata.tab), ncol = ncol(df.strata.tab), byrow = F)
rownames(mn.ftlp)<-rownames(df.strata.tab)
rownames(mn.ftor)<-rownames(df.strata.tab)
colnames(mn.ftlp)<-colnames(df.strata.tab)
colnames(mn.ftor)<-colnames(df.strata.tab)
#require(pheatmap)
#require(gplots)
mn.ft.lpor<-mn.ftlp
mn.ft.lpor[which(mn.ftlp<=(-log(0.05,10)),arr.ind = T)]<-0
vc.names<-rownames(mn.ft.lpor)
require(RColorBrewer)
require(pheatmap)
df.pheatanno_col2<-cbind(as.data.frame(mn.ft.lpor[,-1]), module = vc.ME_labcol[vc.names])
df.pheatanno_row2<-tb_Hm_allelicX %>% dplyr::select(gene_name, Region, Reported) %>%
unique() %>% as.data.frame() %>% filter(!is.na(gene_name)) %>% column_to_rownames("gene_name") %>%
mutate(Region = case_when(is.na(Region) ~ "nonPAR", T ~ Region))
colnames(datKME)<-str_replace_all(colnames(datKME), "kME", "")
names(vc.names)<-vc.names
ls.pheatcolor2 = list(Region = c(PAR = "black", nonPAR = "lightgrey"), module = vc.labcol_ME, Reported = col.pal3[names(col.pal3) != "Unknown"])
colnames(datKME)<-vc.ME_labcol[colnames(datKME)]
colnames(datKME)<-vc.labcol_ME[colnames(datKME)]
require(circlize)
require(ComplexHeatmap)
fs = 6
hm.modcorr<-Heatmap(datKME[rownames(df.pheatanno_row2),],
col = colorRamp2(c(-1, 0, 1), c("blue", "white","red")),
cluster_rows = T, cluster_columns = T,
column_names_gp = grid::gpar(fontsize = fs),
row_names_gp = grid::gpar(fontsize = fs),
top_annotation = HeatmapAnnotation(moduleL = anno_text(vc.ME_labcol[colnames(datKME)],
gp = gpar(fontsize = fs)),
module = vc.ME_labcol[colnames(datKME)], col = ls.pheatcolor2,
PAR = anno_barplot(df.pheatanno_col2[colnames(datKME),"PAR"], height = unit(0.5, "cm")),
PAIR = anno_barplot(df.pheatanno_col2[colnames(datKME),"PAIR"], height = unit(0.5, "cm")),
oESC = anno_barplot(df.pheatanno_col2[colnames(datKME),"oESC"], height = unit(0.5, "cm")),
ALL = anno_barplot(df.pheatanno_col2[colnames(datKME),"allESC"], height = unit(0.5, "cm")),
which = "column", height = unit(2.5, "cm")),
bottom_annotation = HeatmapAnnotation(module = anno_text(vc.ME_labcol[colnames(datKME)],
gp = gpar(fontsize = fs))),
# col = ls.pheatcolor2),
left_annotation = rowAnnotation(Reported = df.pheatanno_row2$Reported,
Region = df.pheatanno_row2$Region,
module = vc.ME_labcol[revc.ensByMod[rownames(df.pheatanno_row2)]],
col = ls.pheatcolor2),
width = unit(5, "cm"), height = unit(11, "cm")
)
pdf(file=paste0(outdir2,"/modcorr2.pdf"), pointsize = fs)
draw(hm.modcorr)
dev.off()
# run once (take a while)
ADJ1<-abs(cor(datExpr,use="p"))^sft$powerEstimate
Alldegrees1<-intramodularConnectivity(ADJ1, moduleColors)
#rownames(Alldegrees1)<-colnames(datExpr)
head(Alldegrees1)
chooseTopHubInEachModule(
datExpr,
moduleColors,
omitColors = "grey",
power = sft$powerEstimate,
type = "signed")
vcn.GS.PAX7SOX10<-setNames(GS.PAX7SOX10, colnames(datExpr))
vc.restrictGenes=na.omit(revc.ensByMod[df$gene_name[(vtf.F | vtf.M1 | vtf.M2) & (vtf.PAR | vtf.PAIR2) & !duplicated(df$gene_name)]])
sort.datKME<-as.matrix(apply(datKME, 2, function(X){cbind(rownames(datKME |> arrange(desc(X))))}))
top15<-setNames(as.vector(sort.datKME[c(1:20),]),rep(colnames(sort.datKME[c(1:20),]),each = 20))
df.plot<-cbind(names(top15),
vcn.GS.PAX7SOX10[top15],
Alldegrees1[top15,],
datKME[top15,]^sft$powerEstimate)
colnames(df.plot)[c(1,2)]<-c("module","PAX7SOX10")
df.plot<- cbind(module = vc.restrictGenes,
PAX7SOX10 = vcn.GS.PAX7SOX10[names(vc.restrictGenes)],
(Alldegrees1[which(colnames(datExpr) %in% names(vc.restrictGenes)),]),
datKME[names(vc.restrictGenes),]^2)
s7a<-read_csv(file="s7a.csv")
s7b<-read_csv(file="s7b.csv")
df.plotG<-df.plot |> rownames_to_column("Gene") |>
left_join(rbind(s7a,s7b[,colnames(s7a)]), by="Gene") |>
column_to_rownames("Gene")
df.plotG$maxmodule<-colnames(df.plotG[,vc.labcol_ME])[rowMaxs(as.matrix(df.plotG[,vc.labcol_ME]))]
df.plotG[df.plotG$module == "grey","module"]<-df.plotG[df.plotG$module == "grey","maxmodule"]
df.plot2<-cbind(df.plotG$module, as.data.frame(t(rbind(apply(
df.plotG, 1, function(X){as.numeric(c(X[c(2,
grep("^k[TWOD]",colnames(df.plotG)),
grep("rank$",colnames(df.plotG)))],X[X[1]]))})))))
colnames(df.plot2)<-c(colnames(df.plotG)[c(1,2)],
colnames(df.plotG)[grep("^k[TWOD]",colnames(df.plotG))],
colnames(df.plotG)[grep("rank$",colnames(df.plotG))],
"kME")
df.plot3<-df.plot2 %>% rownames_to_column(var = "Gene") %>%
mutate(moduleN = vc.ME_labcol[module]) %>%
mutate(moduleL = paste0(vc.ME_labcol[module],"-",module))
vc.restrictGenes=na.omit(revc.ensByMod[df$gene_name[(vtf.F | vtf.M1 | vtf.M2) & (vtf.PAR | vtf.PAIR2) & !duplicated(df$gene_name)]])
df.plot3[!(df.plot3$Gene %in% c(names(vc.restrictGenes))),"Gene"]<-""
vc.combined_label<-setNames(names(vc.ME_labcol), paste0(vc.ME_labcol,"-",names(vc.ME_labcol)))
df.plot3[is.na(df.plot3)]<-1
gg.kim.kme<-ggplot(df.plot3 |> dplyr::filter(Gene != "")
, aes(x = PAX7SOX10, # size = kTotal,
size = `Average gene constraint % rank`,
col = moduleL, y = kME,
label = Gene)) +
geom_point(alpha = 0.8) +
geom_text(hjust = 1, vjust = -1, size =2) +
scale_color_manual(values= vc.combined_label, limits = force)+
geom_vline(xintercept = 0.25, linetype="dotted",
color = "black", linewidth=1) +
geom_hline(yintercept = 0.5, linetype="dotted",
color = "black", linewidth=1) +
theme(legend.position = "bottom")#, legend.direction = "horizontal")
ggsave(filename = paste0(outdir2,"/gg.kim.kme_pHI.pdf"), plot = gg.kim.kme, width = 3.5, height = 4, units = "in")
require(ggpubr)
require(ggbeeswarm)
vc.top<-c("RPL10","RPL10A","RPL11","RPL12","RPL13","RPL13A","RPL14","RPL15","RPL17",
"RPL18","RPL18A","RPL19","RPL21","RPL22","RPL23","RPL23A","RPL24","RPL26",
"RPL27","RPL27A","RPL28","RPL29","RPL3","RPL30","RPL31","RPL32","RPL34",
"RPL35","RPL35A","RPL36","RPL36A","RPL37","RPL37A","RPL38","RPL39","RPL3L",
"RPL4","RPL41","RPL5","RPL6","RPL7","RPL7A","RPL8","RPL9","RPLP0","RPLP1",
"RPLP2","RPSA","RPS10","RPS11","RPS12","RPS13","RPS14","RPS15","RPS15A",
"RPS16","RPS17","RPS18","RPS19","RPS2","RPS20","RPS21","RPS23","RPS24",
"RPS25","RPS26","RPS27","RPS27A","RPS28","RPS29","RPS3","RPS3A","RPS4X",
"RPS4Y1","RPS4Y2","RPS5","RPS6","RPS7","RPS8","RPS9")
vc.top2<-c("FAU", "UBF",
"EIF3A","EIF3E","EIF3F","EIF3H","EIF4B","PABPC1","EEF1A1","EEF1B2","EEF1D",
"EEF1G","EEF2","TCTP","HNRNPA1","NAP1L1","VIM", "NPM1", "RACK1")
df.TOPs<-read.csv("./pnas.1912864117.sd05.csv", header = T)
vc.restrictGenes<-na.omit(revc.ensByMod[df.TOPs$gene])
df.plot4<-cbind(module = vc.restrictGenes,
PAX7SOX10 = vcn.GS.PAX7SOX10[names(vc.restrictGenes)],
(Alldegrees1[which(colnames(datExpr) %in% names(vc.restrictGenes)),]),
datKME[names(vc.restrictGenes),]^2)#sft$powerEstimate)
df.plot4a<-df.plot4 |> rownames_to_column("gene") |> left_join(df.TOPs, by="gene") |>
left_join(df.plot4 |> group_by(module)|> tally(name = "size"), by="module") |>
left_join(vc.labcol_ME |> as.data.frame() |> rownames_to_column("mod_numb") |> rename(module = "vc.labcol_ME"), by="module")
df.plot4b<-df.plot4a |> left_join(df |> add_column(vtf.F, vtf.M1, vtf.M2) |>
rename(gene = "gene_name"), by = "gene")
df.plot4c<- df.plot4a |> left_join(df |> rename(gene = "gene_name") |>
dplyr::select("gene" | contains("XO")) |>
pivot_longer(!c(gene), names_to = c("contrast", ".value"), names_sep="_")
, by= "gene" ) |>
mutate(deg = case_when(padj <= 0.1 & log2FoldChange < 0 ~ "down",
padj <= 0.1 & log2FoldChange > 0 ~ "up",
T ~ "ns")) |>
mutate(RPLS = case_when(gene %in% c(vc.top) ~ "ribosomal",
#gene %in% c(vc.top2) ~ "non-RP translation",
T ~ "other")) |>
group_by(contrast, gene) |> filter(n()==1) |> ungroup() #|>
ggplot(df.plot4c, aes(x = deg, y = topscore, fill = deg)) +
#geom_jitter(alpha=0.2) +
scale_fill_manual(values = c("royalblue","white","red")) + #geom_contour() +
geom_boxplot(color = "black", varwidth = T, notch = T, outlier.alpha = 0) +
facet_wrap(ncol = 1, vars(contrast), strip.position = "top") +
scale_y_continuous(position = "left") +
coord_cartesian(ylim = c(0,4)) +
stat_compare_means(ref.group = "ns", hide.ns = T,
method = "wilcox",
label.y = 4, size = 2, label = "p.format") +
theme(legend.position = "top", legend.direction = "horizontal",
legend.key.height = unit(0.1, 'in'),
legend.key.width = unit(0.2, 'in')) +
theme(axis.text.x = element_blank(),
axis.title.x = element_blank(),
text = element_text(size=6))
ggsave("./test_4h.pdf",width = 2, height = 3)
tb.sigmod <- df.plot4c |> dplyr::select(gene, mod_numb, module, size, contrast, stat, PAX7SOX10, topscore) |>
filter(size > 4) |>
group_by(mod_numb, contrast) |> dplyr::summarize(topcorR = as.vector(rcorr(stat, topscore)$r),
topcorP = as.vector(rcorr(stat, topscore)$P)) |>
filter(!is.na(topcorP)) |> unique() |>
filter(topcorP <= 0.01 & contrast %in% c("mXO1","mXO2") & mod_numb != 0) |>
ungroup() |> arrange(contrast, topcorP)
tb.sigmod.PS <- df.plot4c |> dplyr::select(gene, mod_numb, module, size, contrast, stat, PAX7SOX10, topscore) |>
filter(size > 4) |>
group_by(mod_numb, contrast) |> dplyr::summarize(topcorR = as.vector(rcorr(PAX7SOX10, topscore)$r),
topcorP = as.vector(rcorr(PAX7SOX10, topscore)$P)) |>
filter(!is.na(topcorP)) |> unique() |>
filter(topcorP <= 0.05 & contrast %in% c("mXO1","mXO2") & mod_numb != 0) |>
ungroup() |> arrange(contrast, topcorP)
ggscatter(df.plot4c |> filter(mod_numb %in% tb.sigmod.PS$mod_numb) |>
mutate(mod_numb = fct_relevel(mod_numb, as.character(sort(as.numeric(unique(tb.sigmod.PS$mod_numb))))))
, x = "PAX7SOX10", y = "topscore", color = rgb(0,0,0,1), shape="RPLS",
point = T,
add = "reg.line", conf.int = T, ggtheme = theme_minimal(), size = 0,
add.params = list(linecolor = "black"),
facet.by = "mod_numb") +
geom_density2d(inherit.aes = T, aes(color = module), contour_var = "ndensity") +
stat_cor(label.sep = " ") +
scale_color_identity() +
theme(legend.position = "none", text = element_text(size=6))
ggsave("./test_panel2.pdf",width = 4, height = 3)
dfp.ncc<-compare_means(data = df.plot4c |> filter(!(gene %in% vc.top)), formula = PAX7SOX10 ~ module, ref.group = "grey", hide.ns = T,
method = "wilcox", label.y = 0, label = "p.signif") |> mutate(module = group2) |> filter(p.adj < 0.05) |>
left_join(df.plot4c |> group_by(module) |> summarise(mean_top = mean(topscore), mean_ncc= mean(PAX7SOX10)), by = "module")
ggplot(df.plot4c
, aes(x = PAX7SOX10, color = module)) +
theme(text = element_text(size = 6)) +
stat_ecdf(geom = "smooth", pad = F) +
scale_colour_identity()
ggsave("./ncc_ecdf.pdf",width = 1.8, height = 1.8)
dfp.topscore<-compare_means(data = df.plot4c |> filter((gene %in% vc.top) | module == "grey")
, formula = topscore ~ module, ref.group = "grey", hide.ns = T,
method = "wilcox", label.y = 0, label = "p.signif") |> mutate(module = group2) |> filter(p.adj < 0.05) |>
left_join(df.plot4c |> group_by(module) |> summarise(mean_top = median(topscore), mean_ncc= median(PAX7SOX10)), by = "module")
ggplot(df.plot4c |> mutate(module2 = case_when(gene %in% vc.top ~ NA, T ~ module)) |>
mutate(RPLS = case_when(gene %in% vc.top ~ "wo", T ~ "w"))
, aes(x = topscore, colour = module)) +
stat_ecdf(geom = "smooth", pad = F, show.legend = T, inherit.aes = TRUE, mapping = aes(linetype = RPLS)) +
scale_colour_identity(aesthetics = "colour") + #guides(colour = guide_legend()) +
geom_text(data = dfp.topscore |> mutate(module = group2, corr = sign(mean_ncc)), size = 2,
aes(x = 3.5, y = abs(mean_ncc),
label = paste(" p ",signif(p.adj,2),
" T ",signif(mean_top,2),
" R ",signif(mean_ncc,2),sep =","))) +
theme(text = element_text(size = 6), legend.position = "none")
ggsave("./top_ecdf2.pdf",width = 1.8, height = 1.8)
top_wald<- ggscatter(data = df.plot4c |> filter(module != "grey") |>
group_by(module, mod_numb, contrast) |>
summarise(
genes = n(),
med_y = median(topscore),
sderr_y = sd(topscore)/sqrt(n()),
med_x= median(stat),
sderr_x = sd(stat)/sqrt(n()))
, color = "module", size = "genes",
label = "mod_numb", font.label = c(6, "bold"), repel = T,
add = "reg.line", add.params = list(color="grey"),
x="med_x", y="med_y", #cor.coef.size = 6, cor.coef = T,
conf.int = T, show.legend.text = T) + facet_grid(rows=vars(contrast)) +
scale_colour_identity() +
geom_errorbar(aes(ymin = med_y-sderr_y, ymax = med_y+sderr_y)) +
geom_errorbar(aes(xmin = med_x-sderr_x, xmax = med_x+sderr_x)) +
stat_cor(label.y = 1.45, label.x = -0.5, mapping = aes(size=6)) +
stat_regline_equation(label.y = 1.3, label.x = -0.5, mapping = aes(size=6)) +
coord_cartesian(xlim=c(-5,3), ylim = c(0.5,1.5)) +
theme(text = element_text(size = 6), legend.position = "none")
ggsave("./wald_top.pdf",width = 3, height = 2.7)
#PRESERVATION ANALYSIS
#vc.ENSG<-t(matrix(unlist(strsplit(rownames(df), "[.]")),nrow=2))[,1]
GSE77513<-read.table("./WGCNA_NCC_sets/GSE77513_Proj_03498_D_htseq_all_samples.txt2", header=T, row.names = 1)
dat.GSE77513 <- GSE77513 %>% mutate(ENSEMBL = str_extract(rownames(GSE77513), "ENSG[0-9]*")) %>%
left_join(All.df,by="ENSEMBL") %>% filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) %>%
select(-starts_with("Gene"),-starts_with("EN")) %>% column_to_rownames("SYMBOL")
dat.Zeltner<- t(varianceStabilizingTransformation(as.matrix(dat.GSE77513),blind=T))
GSE120200<-read.table("./WGCNA_NCC_sets/GSE120200_ExpressionMatrix.txt", header=T)
GSE127268<-read.table("./WGCNA_NCC_sets/GSE127268_RawCountMatrix.txt", header=T) |>
rename(Geneid = "FEATURE_ID") |> select(-starts_with("GENE_SYMBOL"))
dat.GSE120200<-GSE120200 |> left_join(GSE127268, by="Geneid") |>
rename(ENSEMBL = "Geneid") |>
left_join(All.df,by="ENSEMBL") %>% filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) %>%
select(-starts_with("EN")) %>% column_to_rownames("SYMBOL") %>%
as.data.frame()
dat.Matheus<-t(varianceStabilizingTransformation(as.matrix(dat.GSE120200),blind=T))
GSE108521<-read.csv("./WGCNA_NCC_sets/GSE108521.csv",header = T)
GSE121428<-read.csv("./WGCNA_NCC_sets/GSE121428.csv",header = T)
dat.Laugsch<- GSE108521 |> left_join(GSE121428, by="EnsemblID") |>
rename(ENSEMBL = "EnsemblID") |>
left_join(All.df,by="ENSEMBL") %>% filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) %>%
column_to_rownames("SYMBOL") %>% select(contains("FPKM")) %>%
as.data.frame() |> t()
GSE134532<-read.table("./WGCNA_NCC_sets/GSE134532.counts.txt", header = T, row.names = 1)
dat.GSE134532<-GSE134532 |> rownames_to_column("SYMBOL") |>
left_join(All.df,by="SYMBOL") |> filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) |>
column_to_rownames("SYMBOL") |> select(contains("CNCCdiff")) %>%
as.data.frame()
dat.Greenberg<- t(varianceStabilizingTransformation(as.matrix(GSE134532),blind = T))
GSE138799<-read.table("./WGCNA_NCC_sets/GSE138799_embryonic_heart_counts.txt", header = T)
dat.GSE138799<-GSE138799 |> left_join(All.df,by="ENSEMBL") |> filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) |>
column_to_rownames("SYMBOL") %>% select(starts_with("CS")) %>%
as.data.frame()
dat.VanOudenhove<- t(varianceStabilizingTransformation(as.matrix(dat.GSE138799),blind = T))
GSE197513<-read.csv("./WGCNA_NCC_sets/GSE197513_NCC_CF_FB_scaled_counts.csv", header = T)
dat.GSE197513<-GSE197513 |> left_join(All.df,by="ENSEMBL") |> filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) |>
column_to_rownames("SYMBOL") %>% select(starts_with("CS")) %>%
as.data.frame()
dat.Yankee<- t(varianceStabilizingTransformation(as.matrix(dat.GSE197513),blind = T))
GSE145327<-read.table("./WGCNA_NCC_sets/GSE145327_H9ESC_CNCC_chond_RNAseq_counts.txt",header=T)
dat.GSE145327<- GSE145327 |>
rename(SYMBOL = "Geneid") |>
left_join(All.df,by="SYMBOL") %>% filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) %>%
column_to_rownames("SYMBOL") %>% select(contains("NCC"))
dat.Long<-t(varianceStabilizingTransformation(as.matrix(dat.GSE145327),blind = T))
GSE176101<-read.table("./WGCNA_NCC_sets/GSE176101_Raw_gene_counts_matrix.txt", header=T)
dat.GSE176101<-GSE176101 %>% rename(ENSEMBL = "gene_id") %>% left_join(All.df,by="ENSEMBL") %>% filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) %>%
column_to_rownames("SYMBOL") |> select(-starts_with("EN"),-starts_with("ips")) |> as.data.frame()
dat.Wen<-t(varianceStabilizingTransformation(as.matrix(dat.GSE176101),blind = T))
GSE207112<-read.table("./WGCNA_NCC_sets/GSE207112_counts_TBLs.txt", header = T)
dat.GSE207112<- GSE207112 |> mutate(ENSEMBL = str_extract(Geneid, "ENSG[0-9]*")) |>
left_join(All.df,by="ENSEMBL")|> filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) |>
column_to_rownames("SYMBOL") |> select(starts_with("X")) |> as.data.frame()
dat.Ahern<-t(varianceStabilizingTransformation(as.matrix(dat.GSE207112),blind = T))
GSE164665<-read.table("./WGCNA_NCC_sets/GSE164665_Stroma.counts.txt", header = T)
dat.GSE164665<-GSE164665 %>% left_join(All.df,by="SYMBOL") %>% filter(!duplicated(SYMBOL) & !is.na(SYMBOL)) %>%
column_to_rownames("SYMBOL") |> select(-starts_with("EN")) |> as.data.frame()
dat.Birnbaum<- t(varianceStabilizingTransformation(as.matrix(dat.GSE164665),blind = T))
ls.testnets<- list(
fem = datExpr[colData(vsd3) |> as_tibble() |> group_by(condition) |>
filter(Sex == "Male") |> ungroup() |> pull(RegistryID),],
male = datExpr[colData(vsd3) |> as_tibble() |> group_by(condition) |>
filter(Sex == "Female") |> ungroup() |> pull(RegistryID),]
)
ls.testnetsA<- list(
Mix15 = datExpr[colData(vsd3) |> as_tibble() |> group_by(condition) |>
filter(Sex == "Male" & row_number() %% 3 == 0) |> ungroup() |> pull(RegistryID),],
MixF20 = datExpr[colData(vsd3) |> as_tibble() |> group_by(condition) |>
filter(Sex == "Female") |> ungroup() |> pull(RegistryID),],
OnlyXOs = datExpr[colData(vsd3) |> as_tibble() |> filter(simple == "XO" & Sex == "Male") |> pull(RegistryID),],
MonosomyX_TBLs = dat.Ahern
)
ls.testnetsB<-list(MixM15 = datExpr[colData(vsd3) |> as_tibble() |> group_by(condition) |>
filter(Sex == "Male" & row_number() %% 2 == 0) |> ungroup() |> pull(RegistryID),],
MixF20 = datExpr[colData(vsd3) |> as_tibble() |> group_by(condition) |>
filter(Sex == "Female") |> ungroup() |> pull(RegistryID),],
OnlyXOs = datExpr[colData(vsd3) |> as_tibble() |> filter(simple == "XO" & Sex == "Male") |> pull(RegistryID),],
CS_Face = dat.Yankee,
CS_Heart = dat.VanOudenhove,
BranchioOculofacial_TFAP2A = dat.Laugsch,
BohringOpitz_ASXL1 = dat.Matheus,
FamDysAutonomia_IKBKAP = dat.Zeltner,
FloatingHarbor_SRCAP = dat.Greenberg,
PierreRobin_SOX9 = dat.Long,
Waardenburg_SOX10 = dat.Wen,
MonosomyX_TBLs = dat.Ahern,
Control_Stroma = dat.Birnbaum
)
ls.testnetsC<-list(
CS_Face = dat.Yankee,
TFAP2A_WT = t(as.matrix(t(dat.Laugsch) |> as.data.frame() |> select(starts_with("WT")))),
ASXL1_HOM = t(as.matrix(t(dat.Matheus) |> as.data.frame() |> select(starts_with("HOM")))),
FHS_HET = t(as.matrix(t(dat.Greenberg) |> as.data.frame() |> select(contains("_FHS")))),
LONG_WT = t(as.matrix(t(dat.Long) |> as.data.frame() |> select(contains("_WT_"))))
)
fx.clean_nets<-function(ls.testnets)
{
for (i in c(1:length(ls.testnets)))
{
datExpr0 <- ls.testnets[[i]]
gsg <- goodSamplesGenes(datExpr0, verbose = 3)
gsg$allOK
if (!gsg$allOK){
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0) {
printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")))
}
if (sum(!gsg$goodSamples)>0) {
printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")))
}
# Remove the offending genes and samples from the data:
datExpr0 <- datExpr0[gsg$goodSamples, gsg$goodGenes]
}
ls.testnets[[i]]<-datExpr0
}
return(ls.testnets)
}
ls.testnets<-fx.clean_nets(ls.testnets)
ls.testnetsB<-fx.clean_nets(ls.testnetsB)
ls.testnetsC<-fx.clean_nets(ls.testnetsC)
colorsFull = moduleColors
multiColor = list(Full_set = colorsFull);
multiExpr = list(
FEM = list(data = ls.testnets[[1]]),
MALE = list(data = ls.testnets[[2]])
)
multiExprA = list(Full_set = list(data = datExpr),
Mix27 = list(data = ls.testnetsA[[1]]),
OnlyXOs = list(data = ls.testnetsA[[2]]),
XO_TBLs = list(data = ls.testnetsA[[3]])
)
multiExprB = list(Full_set = list(data = datExpr),
MixF20 = list(data = ls.testnetsB$MixF20),
MixM15 = list(data = ls.testnetsB$MixM15),
Only_XO = list(data = ls.testnetsB$OnlyXOs),
CS_Face = list(data = ls.testnetsB$CS_Face),
CS_Heart = list(data = ls.testnetsB$CS_Heart),
BranchioOculofacial = list(data = ls.testnetsB$BranchioOculofacial_TFAP2A),
BohringOpitz = list(data = ls.testnetsB$BohringOpitz_ASXL1),
FamDysAutonomia = list(data = ls.testnetsB$FamDysAutonomia_IKBKAP),
FloatingHarbor = list(data = ls.testnetsB$FloatingHarbor_SRCAP),
PierreRobin = list(data = ls.testnetsB$PierreRobin_SOX9),
Waardenburg = list(data = ls.testnetsB$Waardenburg_SOX10),
TBL = list(data = ls.testnetsB$MonosomyX_TBLs),
Control_Stroma = list(data = ls.testnetsB$Control_Stroma)
)
multiExprC = list(Full_set = list(data = datExpr),
CS_Face = list(data = ls.testnetsC[[1]]),
BOFS_WT = list(data = ls.testnetsC[[2]]),
BohringOpitz_MUT = list(data = ls.testnetsC[[3]]),
FHS_MUT = list(data = ls.testnetsC[[4]]),
PRS_WT = list(data = ls.testnetsC[[5]])
)
system.time( {
mp2 = modulePreservation(multiExprA,
multiColor,
dataIsExpr = T,
networkType = "signed",
corFnc = "bicor",
corOptions = "use = 'p', maxPOutliers = 0.1",
referenceNetworks = 1,
maxModuleSize = max(table(colorsFull)),
nPermutations = 50,
randomSeed = 1,
quickCor = 0,
verbose = 3)
} )
saveRDS(mp2, file= paste0(outdir2,"/mp2a.rds"))
system.time( {
mp3 = modulePreservation(multiExprB,
multiColor,
dataIsExpr = T,
networkType = "signed",
corOptions = "use = 'p', maxPOutliers = 0.1",
corFnc = "bicor",
referenceNetworks = 1,
maxModuleSize = max(table(colorsFull)),
nPermutations = 50,
randomSeed = 1,
quickCor = 0,
verbose = 3)
} )
saveRDS(mp3, file= paste0(outdir2,"/",tf.deepsplit,vn.mergeCut,"_mp3.rds"))
system.time( {
mp4 = modulePreservation(multiExprC,
multiColor,
dataIsExpr = T,
networkType = "signed",
corFnc = "bicor",
referenceNetworks = 1,
maxModuleSize = max(table(colorsFull)),
nPermutations = 100,
randomSeed = 1,
quickCor = 0,
verbose = 3)
} )
mp3<-readRDS(file = paste0(outdir2,"/mp3_25.rds"))
#mp<-mp1
#mp<-mp2
#mp<-mp3
#mp<-mp4
pdf(file="Zsummary-medianRank.pdf", wi=10, h=5)
par(mfrow = c(1,2))
par(mar = c(4.5,4.5,2.5,1))
df.Zsummary3<-as.data.frame(matrix(nrow=1+length(table(colorsFull))))
for (i in c(1:(length(multiExprB)-1)))
{
ref = 1
test = i+1
statsObs = cbind(mp$quality$observed[[ref]][[test]][, -1], mp$preservation$observed[[ref]][[test]][, -1])
statsZ = cbind(mp$quality$Z[[ref]][[test]][, -1], mp$preservation$Z[[ref]][[test]][, -1]);
print( cbind(statsObs[, c("medianRank.pres", "medianRank.qual")],
signif(statsZ[, c("Zsummary.pres", "Zsummary.qual")], 2)))
df.Zsummary3<-as.data.frame(cbind(df.Zsummary3, statsObs[, c("medianRank.pres", "medianRank.qual")],
signif(statsZ[, c("Zsummary.pres", "Zsummary.qual")], 2)))
# Module labels and module sizes are also contained in the results
modColors = rownames(mp$preservation$observed[[ref]][[test]])
moduleSizes = mp$preservation$Z[[ref]][[test]][, 1];
# leave grey and gold modules out
plotMods = !(modColors %in% c("grey", "gold"));
# Text labels for points
text = modColors[plotMods];
# Auxiliary convenience variable
plotData = cbind(mp$preservation$observed[[ref]][[test]][, 2], mp$preservation$Z[[ref]][[test]][, 2])
mains = c("Preservation Median rank", "Preservation Zsummary");
for (p in 1:2)
{
min = min(plotData[, p], na.rm = TRUE);
max = max(plotData[, p], na.rm = TRUE);
# Adjust ploting ranges appropriately
if (p==2)
{
if (min > -max/10) min = -max/10
ylim = c(min - 0.1 * (max-min), max + 0.1 * (max-min))
} else
ylim = c(max + 0.1 * (max-min), min - 0.1 * (max-min))
plot(moduleSizes[plotMods], plotData[plotMods, p], col = 1, bg = modColors[plotMods], pch = 21,
main = mains[p],
cex = 2.4,
ylab = mains[p], xlab = "Module size", log = "x",
ylim = ylim,
xlim = c(10, 5000), cex.lab = 1.2, cex.axis = 1.2, cex.main =1.4)
labelPoints(moduleSizes[plotMods], plotData[plotMods, p], text, cex = 1, offs = 0.08);
# For Zsummary, add threshold lines
if (p==2)
{
abline(h=0)
abline(h=2, col = "blue", lty = 2)
abline(h=10, col = "darkgreen", lty = 2)
}}
}
dev.off()
df.Zsummary3<-df.Zsummary3[,!(colnames(df.Zsummary3) %in% "V1")]
colnames(df.Zsummary3)<-c(
paste(rep(names(multiExprB)[-1], each = 4), colnames(df.Zsummary3), sep="_"))
mn.Zs<-as.matrix(cbind(df.Zsummary3[!rownames(df.Zsummary3) %in%
c("gold","grey"),grep("Zsummary.pres", colnames(df.Zsummary3))]))
colnames(mn.Zs)<-gsub("_Zsummary.pres.*", "", colnames(mn.Zs))
mn.Zs<-mn.Zs[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1),]
df.Zs<-mn.Zs |> as.data.frame() |> select(!starts_with("Mix27"))
#mn.Zs2<-mn.Zs[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1),]
sizeGrWindow(10,10)
par(mar = c(0, 0, 0, 0))
pdf(paste0(outdir2,"/modpres.bar3c.pdf"),width = 3, height = 9)
barplot(rev(mn.Zs[,grep("Only_XO", colnames(mn.Zs))]-mn.Zs[,grep("MixM15",colnames(mn.Zs))])#/rev(mn.Zs2[,grep("MixM15", colnames(mn.Zs2))])
,col=rev(rownames(mn.Zs)), #xlim=c(-100,50),
xlab = "fraction deltaZ module preservation", xpd =F,
horiz=T, names.arg=rev(vc.ME_labcol[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)]),
cex.names=0.2, width=rev(table(colorsFull)[head(gsub(x=colnames(MEs),pattern = "^ME",""), -1)])
)
abline(v = c(-5,5))
dev.off()
mn.Zs2<-as.matrix(df.Zs |> select(order(colnames(df.Zs))) |> select(-Only_XO)) #[,c(1:length(colnames(mn.Zs)))]
mn.Zs2[mn.Zs2<0]<-0
Breaks = c(2 ,seq(from = 3, to = 9, by = 1),
seq(from = 10, to = max(mn.Zs2), by = 5))
require(gplots)
require(pheatmap)
vc.colour<-as.vector(names(table(colorsFull)))
names(vc.colour)<-names(table(colorsFull))
ls.modpres_color<-list(colour = vc.colour)
df.modpres_row<-as.data.frame(cbind(
vc.colour))
colnames(df.modpres_row)<-c("colour") #"number")
#df.modpres_row<-cbind(vc.labcol_ME, vc.ME_labcol)
#colnames(df.modpres_row)<-c("colour", "number")
pheatmap(mn.Zs2[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1),], cellnote = round(mn.Zs2,digits = 1), display_numbers = T, number_format = "%.0f",
cluster_rows = F, cluster_cols = T, border_color = NA, cellwidth = 12, cellheight = 12,
color =colorpanel(length(Breaks)-1, "white", "orange", "red"), breaks=Breaks, angle_col = 90,
annotation_row = df.modpres_row, annotation_colors = ls.modpres_color, annotation_legend = F, annotation_names_row = T,
labels_row = paste0(vc.ME_labcol[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)],"-",
rownames(df.Zsummary3[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1),]), " (",
table(multiColor)[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)], ")" ),
filename=paste0(outdir2,"/MP_NCCs_newA2.pdf")
)
dev.off()
require(circlize)
df.Zs2<-df.Zs[,-grep("Mix|Onl", colnames(df.Zs))]
df.summary<-as.data.frame(cbind(df.Zs2,
#"maxpres" = apply(df.Zs2, 1, max),
"maxset" = colnames(df.Zs2)[apply(df.Zs2, 1, which.max)],
"moderate" = apply(df.Zs2, 1, function(X){max(X) > 5}),
"strongly" = apply(df.Zs2, 1, function(X){max(X) >= 10})
))
rownames(df.summary)<-vc.ME_labcol[rownames(df.summary)]
df.corr<-as.data.frame(moduleTraitCor[head(names(MEs),-1),])
df.corr[moduleTraitPvalue[head(names(MEs),-1),]>0.05]<-0
rownames(df.corr)<-gsub("^ME","", rownames(df.corr))
rownames(df.corr)<-vc.ME_labcol[rownames(df.corr)]
df.corrP<-signif(log(as.data.frame(moduleTraitPvalue[head(names(MEs),-1),]),10)*(-1),2)
df.corrP[moduleTraitPvalue[head(names(MEs),-1),]>0.05]<-0
rownames(df.corrP)<-gsub("^ME","", rownames(df.corrP))
rownames(df.corrP)<-vc.ME_labcol[rownames(df.corrP)]
df.msig<-as.data.frame(cbind(
"TS" = msig_wgcna@compareClusterResult |> filter(Description %in% toupper(vc.ORPHA881)) |> dplyr::select(Cluster) |> table() |>
setNames(vc.labcol_ME[levels(msig_wgcna@compareClusterResult$Cluster)]),
"development" = msig_wgcna@compareClusterResult |> filter(str_detect(Description, "HEART|CREST|CARDIO")) |> dplyr::select(Cluster) |> table() |>
setNames(vc.labcol_ME[levels(msig_wgcna@compareClusterResult$Cluster)]),
"cholesterol" = msig_wgcna@compareClusterResult |> filter(str_detect(Description, "CHOLESTEROL|LIPID")) |> dplyr::select(Cluster) |> table() |>
setNames(vc.labcol_ME[levels(msig_wgcna@compareClusterResult$Cluster)]),
"mitochondrial" = msig_wgcna@compareClusterResult |> filter(str_detect(Description, "MITOCHON|OXIDATIVE|ELECTRONTRANSPORT")) |> dplyr::select(Cluster) |> table() |>
setNames(vc.labcol_ME[levels(msig_wgcna@compareClusterResult$Cluster)]),
"signalling" = msig_wgcna@compareClusterResult |> filter(str_detect(Description, "SIGNALLING")) |> dplyr::select(Cluster) |> table() |>
setNames(vc.labcol_ME[levels(msig_wgcna@compareClusterResult$Cluster)]),
"ribosomal" = msig_wgcna@compareClusterResult |> filter(str_detect(Description, "RIBOSOM|TRANSLAT")) |> dplyr::select(Cluster) |> table() |>
setNames(vc.labcol_ME[levels(msig_wgcna@compareClusterResult$Cluster)])
))
rownames(df.msig)<-vc.ME_labcol[rownames(df.msig)]
df.go<-as.data.frame(cbind(
"development" = go_wgcna@compareClusterResult |> filter(str_detect(Description, "development")) |> dplyr::select(Cluster) |> table() |>
setNames(vc.labcol_ME[levels(go_wgcna@compareClusterResult$Cluster)]),
"cholesterol" = go_wgcna@compareClusterResult |> filter(str_detect(Description, "cholesterol|lipid")) |> dplyr::select(Cluster) |> table() |>
setNames(vc.labcol_ME[levels(go_wgcna@compareClusterResult$Cluster)]),
"mitochondrial" = go_wgcna@compareClusterResult |> filter(str_detect(Description, "mitochon|oxidative|electrontransport")) |> dplyr::select(Cluster) |> table() |>
setNames(vc.labcol_ME[levels(go_wgcna@compareClusterResult$Cluster)]),
"signalling" = go_wgcna@compareClusterResult |> filter(str_detect(Description, "signalling")) |> dplyr::select(Cluster) |> table() |>
setNames(vc.labcol_ME[levels(go_wgcna@compareClusterResult$Cluster)]),
"ribosomal" = go_wgcna@compareClusterResult |> filter(str_detect(Description, "ribosom|translat")) |> dplyr::select(Cluster) |> table() |>
setNames(vc.labcol_ME[levels(go_wgcna@compareClusterResult$Cluster)])
))
rownames(df.go)<-vc.ME_labcol[rownames(df.go)]
require(circlize)
require(gplots)
df.mod_wald<-t(lapply(ls.ensByMod, function(X)
colMeans(df.gsea2[df.gsea2$SYMBOL %in% X, grep("XO",colnames(df.gsea2))])) |> as.data.frame())
rownames(df.mod_wald)<-gsub("X","",rownames(df.mod_wald))
mn.wald<-df.mod_wald[vc.ME_labcol[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)],]
mn.pres<-df.summary[vc.ME_labcol[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)],] |> dplyr::select_if(is.numeric) |> as.matrix()
mn.msig<-df.msig[vc.ME_labcol[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)],] |> dplyr::select_if(is.numeric) |> as.matrix()
mn.corr<-df.corr[vc.ME_labcol[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)],] |> dplyr::select_if(is.numeric) |> as.matrix()
mn.corrP<-df.corrP[vc.ME_labcol[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)],] |> dplyr::select_if(is.numeric) |> as.matrix()
require(ComplexHeatmap)
fs = 6
chm<-Heatmap(vc.ME_labcol[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)],
col = vc.labcol_ME[rownames(mn.pres)], show_heatmap_legend = F,
column_names_gp = grid::gpar(fontsize = fs)) +
Heatmap(mn.corr, col = colorRamp2(c(-1, 0, 1), c("blue", "white","red")),
cluster_columns = T,
heatmap_legend_param = list(title = "Pearson R"),#, direction = "horizontal", position = "topcenter"),
column_names_gp = grid::gpar(fontsize = fs),
left_annotation = rowAnnotation(genes = anno_barplot(
setNames(as.vector(table(moduleColors)),names(table(moduleColors)))[head(gsub(x=colnames(MEs),pattern = "^ME",""),-1)])),
cell_fun = function(j, i, x, y, width, height, fill)
{
if(mn.corrP[i, j] > 0)
grid.text(sprintf("%.0f", mn.corrP[i, j]), x, y, gp = gpar(fontsize = fs))
}) +
Heatmap(mn.msig,
cluster_rows = F, cluster_columns = F,
col = colorRamp2(c(0, 1, 10), c("white","honeydew2","honeydew4")),
column_names_gp = grid::gpar(fontsize = fs),
heatmap_legend_param = list(title = "mSigDB hits"),# direction = "horizontal", position = "topcenter"),
cell_fun = function(j, i, x, y, width, height, fill)
{
if(mn.msig[i, j] > 0)
grid.text(sprintf("%.0f", mn.msig[i, j]), x, y, gp = gpar(fontsize = fs))
}) +
Heatmap(mn.pres,
col = colorRamp2(Breaks, colorpanel(length(Breaks), "white", "orange", "red")),
cluster_rows = F, cluster_columns = T,
heatmap_legend_param = list(title = "Preservation"),# direction = "horizontal", position = "topcenter"),
column_names_gp = grid::gpar(fontsize = fs),
cell_fun = function(j, i, x, y, width, height, fill)
{
if(mn.pres[i, j] >= 5)
grid.text(sprintf("%.0f", mn.pres[i, j]), x, y, gp = gpar(fontsize = fs))
}) +
Heatmap(mn.wald, col = colorRamp2(c(-2, 0, 2), c("cyan", "white","magenta")),
cluster_columns = T, cluster_rows = F,
heatmap_legend_param = list(title = "avg. Wald"),# direction = "horizontal", position = "topcenter"),
column_names_gp = grid::gpar(fontsize = fs),
row_names_gp = grid::gpar(fontsize = fs))
pdf(file=paste0(outdir2,"/summary.pdf"), pointsize = 6, width = 6.5, height = 4.5)
draw(chm)
dev.off()
go_wgcna |>
as.data.frame() |>
mutate(
object_file = "go_wgcna",
ID = sprintf("GO%s_%s", ONTOLOGY, ID)
) |>
dplyr::select(-ONTOLOGY) |>
rbind(
setReadable(msig_wgcna, OrgDb = org.Hs.eg.db, keyType = "ENTREZID") |>
as.data.frame() |>
mutate(object_file = "msig_all")
) |>
dplyr::select(-object_file) |>
rename(Module = "Cluster") |>
arrange(qvalue) |>
write_csv(paste0(outdir,"/Table_S3.csv"))