Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
BAP_trophoblast_45X/DESEQ2_GSEA_TBLs_pass.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
392 lines (327 sloc)
17 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(msigdbr) | |
msig_df <- msigdbr(species = "Homo sapiens") | |
require(clusterProfiler) | |
require(org.Hs.eg.db) | |
require(GOSemSim) | |
require(enrichplot) | |
msig_Hall_C1_CP_TFT_MOD_HP <- msig_df %>% | |
filter(gs_cat == "H" | | |
gs_cat == "C1" | | |
(gs_cat == "C2" & str_detect(gs_subcat, "CP:")) | | |
(gs_cat == "C3" & str_detect(gs_subcat, "TFT:")) | | |
gs_cat == "C4" | | |
(gs_cat == "C5" & str_detect(gs_subcat, "HPO")) | |
) %>% | |
dplyr::select(gs_name, entrez_gene) | |
msig_CGP_C6_C7_C8 <- msig_df %>% | |
filter((gs_cat == "C2" & str_detect(gs_subcat, "CGP:")) | | |
gs_cat == "C6" | | |
gs_cat == "C7" | | |
gs_cat == "C8") %>% | |
dplyr::select(gs_name, entrez_gene) | |
dsem.sim.bp<-godata('org.Hs.eg.db', ont="BP") | |
dsem.sim.cc<-godata('org.Hs.eg.db', ont="CC") | |
dsem.sim.mf<-godata('org.Hs.eg.db', ont="MF") | |
require(preprocessCore) | |
vc.ENSG<-t(matrix(unlist(strsplit(rownames(df), "[.]")),nrow=2))[,1] | |
mn.stat<-matrix(cbind(df$fXO_stat, df$mXO_stat, df$FM_stat, | |
df$fmO_stat), ncol=4) | |
rownames(mn.stat)<-vc.ENSG | |
colnames(mn.stat)<-c("fXO_stat", "mXO_stat", "FM_stat", | |
"fmO_stat") | |
df.gsea1<-na.omit(merge(All.df.ENSG, mn.stat, by="row.names")) | |
mn.stat_qn<-normalize.quantiles(matrix(unlist(df.gsea1[,grep("_stat", colnames(df.gsea1))]), ncol = length(grep("stat",colnames(df.gsea1))))) | |
colnames(mn.stat_qn)<-colnames(mn.stat) | |
df.gsea2<-cbind(df.gsea1[,-grep("stat",colnames(df.gsea1))], | |
mn.stat_qn, | |
#"aveiXO_stat" = rowMeans(mn.stat_qn[,grep("XO_stat", colnames(mn.stat_qn))]), | |
"aveXO_stat" = rowMeans(mn.stat_qn[,c(1:3)])) | |
df.gsea2<-df.gsea2[!duplicated(df.gsea2$ENTREZID),] | |
ls.gsea <- list( | |
fXO = (df.gsea2 %>% | |
arrange(desc(fXO_stat)) %>% | |
dplyr::select(ENTREZID, fXO_stat) %>% | |
deframe() | |
), | |
mXO = (df.gsea2 %>% | |
arrange(desc(mXO_stat)) %>% | |
dplyr::select(ENTREZID, mXO_stat) %>% | |
deframe() | |
), | |
fmXO = (df.gsea2 %>% | |
arrange(desc(FM_stat)) %>% | |
dplyr::select(ENTREZID, FM_stat) %>% | |
deframe() | |
), | |
OO = (df.gsea2 %>% | |
arrange(desc(fmO_stat)) %>% | |
dplyr::select(ENTREZID, fmO_stat) %>% | |
deframe() | |
), | |
ave = (df.gsea2 %>% | |
arrange(desc(aveXO_stat)) %>% | |
dplyr::select(ENTREZID, aveXO_stat) %>% | |
deframe() | |
) | |
) | |
#run once | |
pcut1<-0.1 | |
fx.GSEA<-function(genelist, terms2genes){ | |
set.seed(42) | |
gsea.result<-GSEA(genelist, TERM2GENE = terms2genes, pvalueCutoff = pcut1, | |
pAdjustMethod = "BH", by="fgsea", seed=T, eps= 0) | |
return(gsea.result) | |
} | |
msig_fXO<-fx.GSEA(ls.gsea$fXO, msig_Hall_C1_CP_TFT_MOD_HP) | |
msig_mXO<-fx.GSEA(ls.gsea$mXO, msig_Hall_C1_CP_TFT_MOD_HP) | |
msig_fmXO<-fx.GSEA(ls.gsea$fmXO, msig_Hall_C1_CP_TFT_MOD_HP) | |
msig_OO<-fx.GSEA(ls.gsea$OO, msig_Hall_C1_CP_TFT_MOD_HP) | |
msig_ave<-fx.GSEA(ls.gsea$ave, msig_Hall_C1_CP_TFT_MOD_HP) | |
msig2_fXO<-fx.GSEA(ls.gsea$fXO, msig_CGP_C6_C7_C8) | |
msig2_mXO<-fx.GSEA(ls.gsea$mXO, msig_CGP_C6_C7_C8) | |
msig2_fmXO<-fx.GSEA(ls.gsea$fmXO, msig_CGP_C6_C7_C8) | |
msig2_OO<-fx.GSEA(ls.gsea$OO, msig_CGP_C6_C7_C8) | |
msig2_ave<-fx.GSEA(ls.gsea$ave, msig_CGP_C6_C7_C8) | |
fx.gseGO<-function(genelist, ontology){ | |
set.seed(42) | |
gsea.result<-gseGO(genelist, ont = ontology, OrgDb = org.Hs.eg.db, keyType = "ENTREZID", | |
pvalueCutoff = pcut1, pAdjustMethod = "BH", by="fgsea", seed=T, eps= 0) | |
return(gsea.result) | |
} | |
GO_fXO<-fx.gseGO(ls.gsea$fXO, "ALL") | |
GO_mXO<-fx.gseGO(ls.gsea$mXO, "ALL") | |
GO_fmXO<-fx.gseGO(ls.gsea$fmXO, "ALL") | |
GO_OO<-fx.gseGO(ls.gsea$OO, "ALL") | |
GO_ave<-fx.gseGO(ls.gsea$ave, "ALL") | |
require(DOSE) | |
label.ave<-".ave" | |
label.fXO<-".fXO" | |
label.mXO<-".mXO" | |
label.fmXO<-"fm.XO" | |
label.OO<-"OO" | |
if("Category" %in% colnames(msig_ave@result)) | |
{msig_ave@result$Category = label.ave} else {msig_ave@result<-msig_ave@result %>% add_column("Category" = label.ave, .before = 1)} | |
if("Category" %in% colnames(msig_fXO@result)) | |
{msig_fXO@result$Category = label.fXO} else {msig_fXO@result<-msig_fXO@result %>% add_column("Category" = label.fXO, .before = 1)} | |
if("Category" %in% colnames(msig_mXO@result)) | |
{msig_mXO@result$Category = label.mXO} else {msig_mXO@result<-msig_mXO@result %>% add_column("Category" = label.mXO, .before = 1)} | |
if("Category" %in% colnames(msig_fmXO@result)) | |
{msig_fmXO@result$Category = label.fmXO} else {msig_fmXO@result<-msig_fmXO@result %>% add_column("Category" = label.fmXO, .before = 1)} | |
if("Category" %in% colnames(msig_OO@result)) | |
{msig_OO@result$Category = label.OO} else {msig_OO@result<-msig_OO@result %>% add_column("Category" = label.OO, .before = 1)} | |
if("Category" %in% colnames(msig2_ave@result)) | |
{msig2_ave@result$Category = label.ave} else {msig2_ave@result<-msig2_ave@result %>% add_column("Category" = label.ave, .before = 1)} | |
if("Category" %in% colnames(msig2_fXO@result)) | |
{msig2_fXO@result$Category = label.fXO} else {msig2_fXO@result<-msig2_fXO@result %>% add_column("Category" = label.fXO, .before = 1)} | |
if("Category" %in% colnames(msig2_mXO@result)) | |
{msig2_mXO@result$Category = label.mXO} else {msig2_mXO@result<-msig2_mXO@result %>% add_column("Category" = label.mXO, .before = 1)} | |
if("Category" %in% colnames(msig2_fmXO@result)) | |
{msig2_fmXO@result$Category = label.fmXO} else {msig2_fmXO@result<-msig2_fmXO@result %>% add_column("Category" = label.fmXO, .before = 1)} | |
if("Category" %in% colnames(msig2_OO@result)) | |
{msig2_OO@result$Category = label.OO} else {msig2_OO@result<-msig2_OO@result %>% add_column("Category" = label.OO, .before = 1)} | |
if("Category" %in% colnames(GO_ave@result)) | |
{GO_ave@result$Category = label.ave} else {GO_ave@result<-GO_ave@result %>% add_column("Category" = label.ave, .before = 1)} | |
if("Category" %in% colnames(GO_fXO@result)) | |
{GO_fXO@result$Category = label.fXO} else {GO_fXO@result<-GO_fXO@result %>% add_column("Category" = label.fXO, .before = 1)} | |
if("Category" %in% colnames(GO_mXO@result)) | |
{GO_mXO@result$Category = label.mXO} else {GO_mXO@result<-GO_mXO@result %>% add_column("Category" = label.mXO, .before = 1)} | |
if("Category" %in% colnames(GO_fmXO@result)) | |
{GO_fmXO@result$Category = label.fmXO} else {GO_fmXO@result<-GO_fmXO@result %>% add_column("Category" = label.fmXO, .before = 1)} | |
if("Category" %in% colnames(GO_OO@result)) | |
{GO_OO@result$Category = label.OO} else {GO_OO@result<-GO_OO@result %>% add_column("Category" = label.OO, .before = 1)} | |
logplim<-15 | |
msig_all<-msig_ave | |
msig_all@result<-bind_rows(msig_ave@result, | |
msig_fXO@result, | |
msig_mXO@result, | |
msig_fmXO@result, | |
msig_OO@result) %>% | |
arrange(rank) %>% add_column("Score" = pmin(-log(.$p.adjust, 10),logplim) * sign(.$NES), .after = 1) | |
msig2_all<-msig2_ave | |
msig2_all@result<-bind_rows(msig2_ave@result, | |
msig2_fXO@result, | |
msig2_mXO@result, | |
msig2_fmXO@result, | |
msig2_OO@result) %>% | |
arrange(rank) %>% add_column("Score" = pmin(-log(.$p.adjust, 10),logplim) * sign(.$NES), .after = 1) | |
GO_all<-GO_ave | |
GO_all@result<-bind_rows(GO_ave@result, | |
GO_fXO@result, | |
GO_mXO@result, | |
GO_fmXO@result, | |
GO_OO@result) %>% | |
arrange(rank) %>% add_column("Score" = pmin(-log(.$p.adjust, 10),logplim) * sign(.$NES), .after = 1) | |
GO_all_res <- GO_all@result | |
GO_BP_ave <- GO_all_res %>% | |
filter(ONTOLOGY=="BP") %>% | |
filter(Category==".ave") %>% | |
filter(p.adjust<0.05) | |
GO_BP_mXO <- GO_all_res %>% | |
filter(ONTOLOGY=="BP") %>% | |
filter(Category==".mXO") %>% | |
filter(p.adjust<0.05) | |
GO_BP_fXO <- GO_all_res %>% | |
filter(ONTOLOGY=="BP") %>% | |
filter(Category==".fXO") %>% | |
filter(p.adjust<0.05) | |
GOBP_mXO_fXO <- merge(GO_BP_mXO, GO_BP_fXO, by = "Description") | |
fx.labels1<- Vectorize(function(X) { | |
s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 1 )) | |
abbreviate(s, minlength = 30, dot = T) | |
}) | |
fx.labels2<- Vectorize(function(X) { | |
s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 2)) | |
s<-str_replace_all(s, "_", " ") | |
abbreviate(s, minlength = 30, dot = T) | |
}) | |
gpfx.gsea<-function(prefix, labelfx, padj, filterX, X){ | |
ggplot(X %>% filter(ID %in% filterX@result$ID & str_detect(ID, prefix) & | |
ID %in% (filterX@result$ID[which(filterX@result$p.adjust <= padj)])), | |
showCategory = dim(X %>% filter(ID %in% filterX@result$ID))[1], | |
aes(Score, fct_reorder(ID, Score))) + facet_wrap(vars(Category), nrow=1) + | |
geom_segment(aes(xend=0, yend = ID)) + | |
geom_point(aes(color=NES, size = as.numeric(summary(strsplit(core_enrichment, "/"))[,1]))) + | |
scale_color_gradient2(low="blue",mid = "white", high="red") + | |
scale_y_discrete(labels = labelfx) + | |
theme_minimal(base_size = 6) + | |
xlab("signed log10-scaled p.adjust") + | |
ylab(prefix) + | |
labs(size="core genes") | |
} | |
pcut2<-0.005 | |
glol.chr<-gpfx.gsea("chr", fx.labels1, pcut2, msig_ave, msig_all) | |
glol.module<-gpfx.gsea("MODULE", fx.labels2,pcut2, msig_ave, msig_all) | |
glol.descartes<-gpfx.gsea("^DESCARTES_", fx.labels2, pcut2, msig2_ave, msig2_all) | |
pcut2<-0.01 | |
glol.reactome<-gpfx.gsea("^REACTOME", fx.labels2, pcut2, msig_ave, msig_all) | |
pcut2<-0.05 | |
glol.hallmark<-gpfx.gsea("^HALLMARK", fx.labels2, pcut2, msig_ave, msig_all) | |
glol.wp<-gpfx.gsea("^WP", fx.labels2, pcut2, msig_ave, msig_all) | |
require(gridExtra) | |
ls.lol.plots<-list( | |
#glol.hallmark + theme(axis.line = element_line(colour = "grey50")) + scale_size_area(max_size = 4), | |
glol.wp + theme(axis.line = element_line(colour = "grey50")) + scale_size_area(max_size = 4) | |
) | |
vn.lol.plots<-sapply(ls.lol.plots, function(X){max(ggplot_build(X)$data[[1]] %>% filter(PANEL == 1) %>% dplyr::select(y))}) | |
ggsave(paste0(outdir,"/lol.gsea1.pdf"), grid.arrange(grobs = ls.lol.plots, nrow=length(vn.lol.plots), | |
heights = c(6*vn.lol.plots/sum(vn.lol.plots))), | |
width = 7, height = 2.5, | |
device = "pdf", units = "in") | |
gpfx.go<-function(prefix, labelfx, padj, filterX, X){ | |
ggplot(X %>% filter(ID %in% filterX@result$ID & str_detect(ONTOLOGY, prefix) & | |
ID %in% (filterX@result$ID[which(filterX@result$p.adjust <= padj)])), | |
showCategory = dim(X %>% filter(ID %in% filterX@result$ID))[1], | |
aes(Score, fct_reorder(Description, Score))) + facet_wrap(vars(Category), nrow=1) + | |
geom_segment(aes(xend=0, yend = Description)) + | |
geom_point(aes(color=NES, size = as.numeric(summary(strsplit(core_enrichment, "/"))[,1]))) + | |
scale_color_gradient2(low="blue",mid = "white", high="red") + | |
scale_y_discrete(labels = labelfx) + | |
theme_minimal(base_size = 6) + | |
xlab("signed log10-scaled p.adjust") + | |
ylab(prefix) + | |
labs(size="core genes") | |
} | |
pcut2<-0.01 | |
glol.bp<-gpfx.go("BP", fx.labels1, pcut2, GO_ave, GO_all) | |
glol.mf<-gpfx.go("MF", fx.labels1, pcut2, GO_ave, GO_all) | |
glol.cc<-gpfx.go("CC", fx.labels1, pcut2, GO_ave, GO_all) | |
ls.lol.plots<-list( | |
glol.chr + theme(axis.line = element_line(colour = "grey50")) + guides(size='none') + scale_size_area(max_size = 4), | |
glol.module + theme(axis.line = element_line(colour = "grey50")) + guides(size='none') + scale_size_area(max_size = 4), | |
glol.descartes + theme(axis.line = element_line(colour = "grey50")) + guides(size='none') + scale_size_area(max_size =4), | |
glol.reactome + theme(axis.line = element_line(colour = "grey50")) + guides(size='none') + scale_size_area(max_size =4), | |
glol.cc + theme(axis.line = element_line(colour = "grey50")) + guides(size='none') + scale_size_area(max_size = 4), | |
glol.mf + theme(axis.line = element_line(colour = "grey50")) + guides(size='none') + scale_size_area(max_size = 4) | |
) | |
vn.lol.plots<-sapply(ls.lol.plots, function(X){max(ggplot_build(X)$data[[1]] %>% filter(PANEL == 1) %>% dplyr::select(y))}) | |
ggsave(paste0(outdir,"/lol.gsea2.pdf"), grid.arrange(grobs = ls.lol.plots, nrow=length(vn.lol.plots), | |
heights = c(10*vn.lol.plots/sum(vn.lol.plots))), | |
width = 7.5, height = 10, | |
device = "pdf", units = "in") | |
require(ggnewscale) | |
pcut2<-0.05 | |
fx.semsim<-function(ontology, label, padj){pairwise_termsim(GO_all %>% filter(ONTOLOGY == ontology & | |
p.adjust <= padj & | |
Category == label & | |
(ID %in% GO_fXO@result$ID | | |
ID %in% GO_fmXO@result$ID | | |
ID %in% GO_mXO@result$ID)) %>% | |
arrange(p.adjust), method="JC", semData = dsem.sim.bp)} | |
ss.bp.ave<-fx.semsim("BP", ".ave", 0.05) | |
ss.cc.ave<-fx.semsim("CC", ".ave", 0.05) | |
ss.mf.ave<-fx.semsim("MF", ".ave", 0.05) | |
ego.bp.ave<-emapplot_cluster(ss.bp.ave, color = "Score", showCategory = min(200,dim(ss.bp.ave@result)[1]), cex_label_group = 1, shadowtext = F, | |
label_format = 15, nWords = 4, #nCluster =12, | |
min_edge = 0.05, repel = T, cex_category = 0.7, cex_line=0.7) + | |
scale_fill_gradient2(name = "signed log10(p.adj)", low="blue", high="red") | |
ego.cc.ave<-emapplot_cluster(ss.cc.ave, color = "Score", showCategory = min(200,dim(ss.cc.ave@result)[1]), cex_label_group = 1, shadowtext = F, | |
label_format = 15, nWords = 4, #nCluster =12, | |
min_edge = 0.05, repel = T, cex_category = 0.7, cex_line=0.7) + | |
scale_fill_gradient2(name = "signed log10(p.adj)", low="blue", high="red") | |
ego.mf.ave<-emapplot_cluster(ss.mf.ave, color = "Score", showCategory = min(200,dim(ss.mf.ave@result)[1]), cex_label_group = 1, shadowtext = F, | |
label_format = 15, nWords = 4, #nCluster =12, | |
min_edge = 0.05, repel = T, cex_category = 0.7, cex_line=0.7) + | |
scale_fill_gradient2(name = "signed log10(p.adj)", low="blue", high="red") | |
ls.ego.plots<-list( | |
ego.bp.ave, | |
ego.cc.ave) | |
ggsave("lol.go1.pdf", grid.arrange(grobs = ls.ego.plots, nrow=length(ls.ego.plots), heights=c(5,5)), device = "pdf", width=8, height=11, units = "in") | |
#replot these | |
ecililys_ave<-GO_ave %>% filter(str_detect(Description, "cilium") | str_detect(Description, "lysosom") | str_detect(Description, "autophag")) | |
ecililys_fXO<-GO_fXO %>% filter(str_detect(Description, "cilium") | str_detect(Description, "lysosom") | str_detect(Description, "autophag")) | |
ecililys_mXO<-GO_mXO %>% filter(str_detect(Description, "cilium") | str_detect(Description, "lysosom") | str_detect(Description, "autophag")) | |
ecililys_fmXO<-GO_fmXO %>% filter(str_detect(Description, "cilium") | str_detect(Description, "lysosom") | str_detect(Description, "autophag")) | |
cnet.fem<-cnetplot(ecililys_fXO, node_label = 'category', showCategory =40, layout = 'nicely',cex_label_category = 0.5,cex_gene = 0.1, shadowtext = "none", #color_category = 'grey', | |
foldChange = ecililys_fXO@geneList[which(abs(ecililys_fXO@geneList)>min(abs(df[vtf.F,"fXO_stat"])))]) | |
cnet.male<-cnetplot(ecililys_mXO, node_label = 'category', showCategory =40, layout = 'nicely',cex_label_category = 0.5,cex_gene = 0.1,shadowtext = "none", #color_category = 'grey', | |
foldChange = ecililys_mXO@geneList[which(abs(ecililys_mXO@geneList)>min(abs(df[vtf.M,"mXO_stat"])))]) | |
cnet.fmXO<-cnetplot(ecililys_fmXO, node_label = 'category', showCategory =40, layout = 'nicely',cex_label_category = 0.5,cex_gene = 0.1,shadowtext = "none", #color_category = 'grey', | |
foldChange = ecililys_fmXO@geneList[which(abs(ecililys_fmXO@geneList)>min(abs(df[vtf.FM,"FM_stat"])))]) | |
ls.cnet.plots<-list( | |
cnet.fem, cnet.male | |
) | |
ggsave("lol.go2.pdf", grid.arrange(grobs = ls.cnet.plots, nrow=length(ls.cnet.plots), heights=c(4,4)), device = "pdf", width=5, height=8, units = "in") | |
unique_categories <- c("aveXO", "fXO", "mXO", "fXO-XY", "XOXO") | |
GO_all |> | |
as.data.frame() |> | |
mutate( | |
object_file = "GO_all", | |
ID = sprintf("GO%s_%s", ONTOLOGY, ID) | |
) |> | |
dplyr::select(-ONTOLOGY) |> | |
rbind( | |
msig_all |> | |
as.data.frame() |> | |
mutate(object_file = "msig_all") | |
) |> | |
rbind( | |
msig2_all |> | |
as.data.frame() |> | |
mutate(object_file = "msig2_all") | |
) |> | |
mutate( | |
Category = case_when( | |
Category == "OO" ~ "XOXO", | |
Category == ".fXO" ~ "fXO", | |
Category == ".mXO" ~ "mXO", | |
Category == ".ave" ~ "aveXO", | |
Category == "fm.XO" ~ "fXO-XY" | |
) | |
) |> | |
pivot_wider( | |
id_cols = c(ID, Description, object_file), | |
names_from = Category, | |
values_from = c(NES, | |
p.adjust), | |
names_glue = "{Category}_{.value}", | |
names_sort = T | |
) |> | |
dplyr::select( | |
ID, | |
Description, | |
starts_with(sprintf("%s_",unique_categories)) | |
) |> | |
arrange( | |
!!as.symbol(sprintf("%s_p.adjust",unique_categories)) | |
) |> | |
write_csv("Table_S3.csv") |