Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
require(msigdbr)
msig_df <- msigdbr(species = "Homo sapiens")
require(clusterProfiler)
require(org.Hs.eg.db)
require(GOSemSim)
require(enrichplot)
msig_Hall_C1_CP_TFT_MOD_HP <- msig_df %>%
filter(gs_cat == "H" |
gs_cat == "C1" |
(gs_cat == "C2" & str_detect(gs_subcat, "CP:")) |
(gs_cat == "C3" & str_detect(gs_subcat, "TFT:")) |
gs_cat == "C4" |
(gs_cat == "C5" & str_detect(gs_subcat, "HPO"))
) %>%
dplyr::select(gs_name, entrez_gene)
msig_CGP_C6_C7_C8 <- msig_df %>%
filter((gs_cat == "C2" & str_detect(gs_subcat, "CGP:")) |
gs_cat == "C6" |
gs_cat == "C7" |
gs_cat == "C8") %>%
dplyr::select(gs_name, entrez_gene)
dsem.sim.bp<-godata('org.Hs.eg.db', ont="BP")
dsem.sim.cc<-godata('org.Hs.eg.db', ont="CC")
dsem.sim.mf<-godata('org.Hs.eg.db', ont="MF")
require(preprocessCore)
vc.ENSG<-t(matrix(unlist(strsplit(rownames(df), "[.]")),nrow=2))[,1]
mn.stat<-matrix(cbind(df$fXO_stat, df$mXO_stat, df$FM_stat,
df$fmO_stat), ncol=4)
rownames(mn.stat)<-vc.ENSG
colnames(mn.stat)<-c("fXO_stat", "mXO_stat", "FM_stat",
"fmO_stat")
df.gsea1<-na.omit(merge(All.df.ENSG, mn.stat, by="row.names"))
mn.stat_qn<-normalize.quantiles(matrix(unlist(df.gsea1[,grep("_stat", colnames(df.gsea1))]), ncol = length(grep("stat",colnames(df.gsea1)))))
colnames(mn.stat_qn)<-colnames(mn.stat)
df.gsea2<-cbind(df.gsea1[,-grep("stat",colnames(df.gsea1))],
mn.stat_qn,
#"aveiXO_stat" = rowMeans(mn.stat_qn[,grep("XO_stat", colnames(mn.stat_qn))]),
"aveXO_stat" = rowMeans(mn.stat_qn[,c(1:3)]))
df.gsea2<-df.gsea2[!duplicated(df.gsea2$ENTREZID),]
ls.gsea <- list(
fXO = (df.gsea2 %>%
arrange(desc(fXO_stat)) %>%
dplyr::select(ENTREZID, fXO_stat) %>%
deframe()
),
mXO = (df.gsea2 %>%
arrange(desc(mXO_stat)) %>%
dplyr::select(ENTREZID, mXO_stat) %>%
deframe()
),
fmXO = (df.gsea2 %>%
arrange(desc(FM_stat)) %>%
dplyr::select(ENTREZID, FM_stat) %>%
deframe()
),
OO = (df.gsea2 %>%
arrange(desc(fmO_stat)) %>%
dplyr::select(ENTREZID, fmO_stat) %>%
deframe()
),
ave = (df.gsea2 %>%
arrange(desc(aveXO_stat)) %>%
dplyr::select(ENTREZID, aveXO_stat) %>%
deframe()
)
)
#run once
pcut1<-0.1
fx.GSEA<-function(genelist, terms2genes){
set.seed(42)
gsea.result<-GSEA(genelist, TERM2GENE = terms2genes, pvalueCutoff = pcut1,
pAdjustMethod = "BH", by="fgsea", seed=T, eps= 0)
return(gsea.result)
}
msig_fXO<-fx.GSEA(ls.gsea$fXO, msig_Hall_C1_CP_TFT_MOD_HP)
msig_mXO<-fx.GSEA(ls.gsea$mXO, msig_Hall_C1_CP_TFT_MOD_HP)
msig_fmXO<-fx.GSEA(ls.gsea$fmXO, msig_Hall_C1_CP_TFT_MOD_HP)
msig_OO<-fx.GSEA(ls.gsea$OO, msig_Hall_C1_CP_TFT_MOD_HP)
msig_ave<-fx.GSEA(ls.gsea$ave, msig_Hall_C1_CP_TFT_MOD_HP)
msig2_fXO<-fx.GSEA(ls.gsea$fXO, msig_CGP_C6_C7_C8)
msig2_mXO<-fx.GSEA(ls.gsea$mXO, msig_CGP_C6_C7_C8)
msig2_fmXO<-fx.GSEA(ls.gsea$fmXO, msig_CGP_C6_C7_C8)
msig2_OO<-fx.GSEA(ls.gsea$OO, msig_CGP_C6_C7_C8)
msig2_ave<-fx.GSEA(ls.gsea$ave, msig_CGP_C6_C7_C8)
fx.gseGO<-function(genelist, ontology){
set.seed(42)
gsea.result<-gseGO(genelist, ont = ontology, OrgDb = org.Hs.eg.db, keyType = "ENTREZID",
pvalueCutoff = pcut1, pAdjustMethod = "BH", by="fgsea", seed=T, eps= 0)
return(gsea.result)
}
GO_fXO<-fx.gseGO(ls.gsea$fXO, "ALL")
GO_mXO<-fx.gseGO(ls.gsea$mXO, "ALL")
GO_fmXO<-fx.gseGO(ls.gsea$fmXO, "ALL")
GO_OO<-fx.gseGO(ls.gsea$OO, "ALL")
GO_ave<-fx.gseGO(ls.gsea$ave, "ALL")
require(DOSE)
label.ave<-".ave"
label.fXO<-".fXO"
label.mXO<-".mXO"
label.fmXO<-"fm.XO"
label.OO<-"OO"
if("Category" %in% colnames(msig_ave@result))
{msig_ave@result$Category = label.ave} else {msig_ave@result<-msig_ave@result %>% add_column("Category" = label.ave, .before = 1)}
if("Category" %in% colnames(msig_fXO@result))
{msig_fXO@result$Category = label.fXO} else {msig_fXO@result<-msig_fXO@result %>% add_column("Category" = label.fXO, .before = 1)}
if("Category" %in% colnames(msig_mXO@result))
{msig_mXO@result$Category = label.mXO} else {msig_mXO@result<-msig_mXO@result %>% add_column("Category" = label.mXO, .before = 1)}
if("Category" %in% colnames(msig_fmXO@result))
{msig_fmXO@result$Category = label.fmXO} else {msig_fmXO@result<-msig_fmXO@result %>% add_column("Category" = label.fmXO, .before = 1)}
if("Category" %in% colnames(msig_OO@result))
{msig_OO@result$Category = label.OO} else {msig_OO@result<-msig_OO@result %>% add_column("Category" = label.OO, .before = 1)}
if("Category" %in% colnames(msig2_ave@result))
{msig2_ave@result$Category = label.ave} else {msig2_ave@result<-msig2_ave@result %>% add_column("Category" = label.ave, .before = 1)}
if("Category" %in% colnames(msig2_fXO@result))
{msig2_fXO@result$Category = label.fXO} else {msig2_fXO@result<-msig2_fXO@result %>% add_column("Category" = label.fXO, .before = 1)}
if("Category" %in% colnames(msig2_mXO@result))
{msig2_mXO@result$Category = label.mXO} else {msig2_mXO@result<-msig2_mXO@result %>% add_column("Category" = label.mXO, .before = 1)}
if("Category" %in% colnames(msig2_fmXO@result))
{msig2_fmXO@result$Category = label.fmXO} else {msig2_fmXO@result<-msig2_fmXO@result %>% add_column("Category" = label.fmXO, .before = 1)}
if("Category" %in% colnames(msig2_OO@result))
{msig2_OO@result$Category = label.OO} else {msig2_OO@result<-msig2_OO@result %>% add_column("Category" = label.OO, .before = 1)}
if("Category" %in% colnames(GO_ave@result))
{GO_ave@result$Category = label.ave} else {GO_ave@result<-GO_ave@result %>% add_column("Category" = label.ave, .before = 1)}
if("Category" %in% colnames(GO_fXO@result))
{GO_fXO@result$Category = label.fXO} else {GO_fXO@result<-GO_fXO@result %>% add_column("Category" = label.fXO, .before = 1)}
if("Category" %in% colnames(GO_mXO@result))
{GO_mXO@result$Category = label.mXO} else {GO_mXO@result<-GO_mXO@result %>% add_column("Category" = label.mXO, .before = 1)}
if("Category" %in% colnames(GO_fmXO@result))
{GO_fmXO@result$Category = label.fmXO} else {GO_fmXO@result<-GO_fmXO@result %>% add_column("Category" = label.fmXO, .before = 1)}
if("Category" %in% colnames(GO_OO@result))
{GO_OO@result$Category = label.OO} else {GO_OO@result<-GO_OO@result %>% add_column("Category" = label.OO, .before = 1)}
logplim<-15
msig_all<-msig_ave
msig_all@result<-bind_rows(msig_ave@result,
msig_fXO@result,
msig_mXO@result,
msig_fmXO@result,
msig_OO@result) %>%
arrange(rank) %>% add_column("Score" = pmin(-log(.$p.adjust, 10),logplim) * sign(.$NES), .after = 1)
msig2_all<-msig2_ave
msig2_all@result<-bind_rows(msig2_ave@result,
msig2_fXO@result,
msig2_mXO@result,
msig2_fmXO@result,
msig2_OO@result) %>%
arrange(rank) %>% add_column("Score" = pmin(-log(.$p.adjust, 10),logplim) * sign(.$NES), .after = 1)
GO_all<-GO_ave
GO_all@result<-bind_rows(GO_ave@result,
GO_fXO@result,
GO_mXO@result,
GO_fmXO@result,
GO_OO@result) %>%
arrange(rank) %>% add_column("Score" = pmin(-log(.$p.adjust, 10),logplim) * sign(.$NES), .after = 1)
GO_all_res <- GO_all@result
GO_BP_ave <- GO_all_res %>%
filter(ONTOLOGY=="BP") %>%
filter(Category==".ave") %>%
filter(p.adjust<0.05)
GO_BP_mXO <- GO_all_res %>%
filter(ONTOLOGY=="BP") %>%
filter(Category==".mXO") %>%
filter(p.adjust<0.05)
GO_BP_fXO <- GO_all_res %>%
filter(ONTOLOGY=="BP") %>%
filter(Category==".fXO") %>%
filter(p.adjust<0.05)
GOBP_mXO_fXO <- merge(GO_BP_mXO, GO_BP_fXO, by = "Description")
fx.labels1<- Vectorize(function(X) {
s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 1 ))
abbreviate(s, minlength = 30, dot = T)
})
fx.labels2<- Vectorize(function(X) {
s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 2))
s<-str_replace_all(s, "_", " ")
abbreviate(s, minlength = 30, dot = T)
})
gpfx.gsea<-function(prefix, labelfx, padj, filterX, X){
ggplot(X %>% filter(ID %in% filterX@result$ID & str_detect(ID, prefix) &
ID %in% (filterX@result$ID[which(filterX@result$p.adjust <= padj)])),
showCategory = dim(X %>% filter(ID %in% filterX@result$ID))[1],
aes(Score, fct_reorder(ID, Score))) + facet_wrap(vars(Category), nrow=1) +
geom_segment(aes(xend=0, yend = ID)) +
geom_point(aes(color=NES, size = as.numeric(summary(strsplit(core_enrichment, "/"))[,1]))) +
scale_color_gradient2(low="blue",mid = "white", high="red") +
scale_y_discrete(labels = labelfx) +
theme_minimal(base_size = 6) +
xlab("signed log10-scaled p.adjust") +
ylab(prefix) +
labs(size="core genes")
}
pcut2<-0.005
glol.chr<-gpfx.gsea("chr", fx.labels1, pcut2, msig_ave, msig_all)
glol.module<-gpfx.gsea("MODULE", fx.labels2,pcut2, msig_ave, msig_all)
glol.descartes<-gpfx.gsea("^DESCARTES_", fx.labels2, pcut2, msig2_ave, msig2_all)
pcut2<-0.01
glol.reactome<-gpfx.gsea("^REACTOME", fx.labels2, pcut2, msig_ave, msig_all)
pcut2<-0.05
glol.hallmark<-gpfx.gsea("^HALLMARK", fx.labels2, pcut2, msig_ave, msig_all)
glol.wp<-gpfx.gsea("^WP", fx.labels2, pcut2, msig_ave, msig_all)
require(gridExtra)
ls.lol.plots<-list(
#glol.hallmark + theme(axis.line = element_line(colour = "grey50")) + scale_size_area(max_size = 4),
glol.wp + theme(axis.line = element_line(colour = "grey50")) + scale_size_area(max_size = 4)
)
vn.lol.plots<-sapply(ls.lol.plots, function(X){max(ggplot_build(X)$data[[1]] %>% filter(PANEL == 1) %>% dplyr::select(y))})
ggsave(paste0(outdir,"/lol.gsea1.pdf"), grid.arrange(grobs = ls.lol.plots, nrow=length(vn.lol.plots),
heights = c(6*vn.lol.plots/sum(vn.lol.plots))),
width = 7, height = 2.5,
device = "pdf", units = "in")
gpfx.go<-function(prefix, labelfx, padj, filterX, X){
ggplot(X %>% filter(ID %in% filterX@result$ID & str_detect(ONTOLOGY, prefix) &
ID %in% (filterX@result$ID[which(filterX@result$p.adjust <= padj)])),
showCategory = dim(X %>% filter(ID %in% filterX@result$ID))[1],
aes(Score, fct_reorder(Description, Score))) + facet_wrap(vars(Category), nrow=1) +
geom_segment(aes(xend=0, yend = Description)) +
geom_point(aes(color=NES, size = as.numeric(summary(strsplit(core_enrichment, "/"))[,1]))) +
scale_color_gradient2(low="blue",mid = "white", high="red") +
scale_y_discrete(labels = labelfx) +
theme_minimal(base_size = 6) +
xlab("signed log10-scaled p.adjust") +
ylab(prefix) +
labs(size="core genes")
}
pcut2<-0.01
glol.bp<-gpfx.go("BP", fx.labels1, pcut2, GO_ave, GO_all)
glol.mf<-gpfx.go("MF", fx.labels1, pcut2, GO_ave, GO_all)
glol.cc<-gpfx.go("CC", fx.labels1, pcut2, GO_ave, GO_all)
ls.lol.plots<-list(
glol.chr + theme(axis.line = element_line(colour = "grey50")) + guides(size='none') + scale_size_area(max_size = 4),
glol.module + theme(axis.line = element_line(colour = "grey50")) + guides(size='none') + scale_size_area(max_size = 4),
glol.descartes + theme(axis.line = element_line(colour = "grey50")) + guides(size='none') + scale_size_area(max_size =4),
glol.reactome + theme(axis.line = element_line(colour = "grey50")) + guides(size='none') + scale_size_area(max_size =4),
glol.cc + theme(axis.line = element_line(colour = "grey50")) + guides(size='none') + scale_size_area(max_size = 4),
glol.mf + theme(axis.line = element_line(colour = "grey50")) + guides(size='none') + scale_size_area(max_size = 4)
)
vn.lol.plots<-sapply(ls.lol.plots, function(X){max(ggplot_build(X)$data[[1]] %>% filter(PANEL == 1) %>% dplyr::select(y))})
ggsave(paste0(outdir,"/lol.gsea2.pdf"), grid.arrange(grobs = ls.lol.plots, nrow=length(vn.lol.plots),
heights = c(10*vn.lol.plots/sum(vn.lol.plots))),
width = 7.5, height = 10,
device = "pdf", units = "in")
require(ggnewscale)
pcut2<-0.05
fx.semsim<-function(ontology, label, padj){pairwise_termsim(GO_all %>% filter(ONTOLOGY == ontology &
p.adjust <= padj &
Category == label &
(ID %in% GO_fXO@result$ID |
ID %in% GO_fmXO@result$ID |
ID %in% GO_mXO@result$ID)) %>%
arrange(p.adjust), method="JC", semData = dsem.sim.bp)}
ss.bp.ave<-fx.semsim("BP", ".ave", 0.05)
ss.cc.ave<-fx.semsim("CC", ".ave", 0.05)
ss.mf.ave<-fx.semsim("MF", ".ave", 0.05)
ego.bp.ave<-emapplot_cluster(ss.bp.ave, color = "Score", showCategory = min(200,dim(ss.bp.ave@result)[1]), cex_label_group = 1, shadowtext = F,
label_format = 15, nWords = 4, #nCluster =12,
min_edge = 0.05, repel = T, cex_category = 0.7, cex_line=0.7) +
scale_fill_gradient2(name = "signed log10(p.adj)", low="blue", high="red")
ego.cc.ave<-emapplot_cluster(ss.cc.ave, color = "Score", showCategory = min(200,dim(ss.cc.ave@result)[1]), cex_label_group = 1, shadowtext = F,
label_format = 15, nWords = 4, #nCluster =12,
min_edge = 0.05, repel = T, cex_category = 0.7, cex_line=0.7) +
scale_fill_gradient2(name = "signed log10(p.adj)", low="blue", high="red")
ego.mf.ave<-emapplot_cluster(ss.mf.ave, color = "Score", showCategory = min(200,dim(ss.mf.ave@result)[1]), cex_label_group = 1, shadowtext = F,
label_format = 15, nWords = 4, #nCluster =12,
min_edge = 0.05, repel = T, cex_category = 0.7, cex_line=0.7) +
scale_fill_gradient2(name = "signed log10(p.adj)", low="blue", high="red")
ls.ego.plots<-list(
ego.bp.ave,
ego.cc.ave)
ggsave("lol.go1.pdf", grid.arrange(grobs = ls.ego.plots, nrow=length(ls.ego.plots), heights=c(5,5)), device = "pdf", width=8, height=11, units = "in")
#replot these
ecililys_ave<-GO_ave %>% filter(str_detect(Description, "cilium") | str_detect(Description, "lysosom") | str_detect(Description, "autophag"))
ecililys_fXO<-GO_fXO %>% filter(str_detect(Description, "cilium") | str_detect(Description, "lysosom") | str_detect(Description, "autophag"))
ecililys_mXO<-GO_mXO %>% filter(str_detect(Description, "cilium") | str_detect(Description, "lysosom") | str_detect(Description, "autophag"))
ecililys_fmXO<-GO_fmXO %>% filter(str_detect(Description, "cilium") | str_detect(Description, "lysosom") | str_detect(Description, "autophag"))
cnet.fem<-cnetplot(ecililys_fXO, node_label = 'category', showCategory =40, layout = 'nicely',cex_label_category = 0.5,cex_gene = 0.1, shadowtext = "none", #color_category = 'grey',
foldChange = ecililys_fXO@geneList[which(abs(ecililys_fXO@geneList)>min(abs(df[vtf.F,"fXO_stat"])))])
cnet.male<-cnetplot(ecililys_mXO, node_label = 'category', showCategory =40, layout = 'nicely',cex_label_category = 0.5,cex_gene = 0.1,shadowtext = "none", #color_category = 'grey',
foldChange = ecililys_mXO@geneList[which(abs(ecililys_mXO@geneList)>min(abs(df[vtf.M,"mXO_stat"])))])
cnet.fmXO<-cnetplot(ecililys_fmXO, node_label = 'category', showCategory =40, layout = 'nicely',cex_label_category = 0.5,cex_gene = 0.1,shadowtext = "none", #color_category = 'grey',
foldChange = ecililys_fmXO@geneList[which(abs(ecililys_fmXO@geneList)>min(abs(df[vtf.FM,"FM_stat"])))])
ls.cnet.plots<-list(
cnet.fem, cnet.male
)
ggsave("lol.go2.pdf", grid.arrange(grobs = ls.cnet.plots, nrow=length(ls.cnet.plots), heights=c(4,4)), device = "pdf", width=5, height=8, units = "in")
unique_categories <- c("aveXO", "fXO", "mXO", "fXO-XY", "XOXO")
GO_all |>
as.data.frame() |>
mutate(
object_file = "GO_all",
ID = sprintf("GO%s_%s", ONTOLOGY, ID)
) |>
dplyr::select(-ONTOLOGY) |>
rbind(
msig_all |>
as.data.frame() |>
mutate(object_file = "msig_all")
) |>
rbind(
msig2_all |>
as.data.frame() |>
mutate(object_file = "msig2_all")
) |>
mutate(
Category = case_when(
Category == "OO" ~ "XOXO",
Category == ".fXO" ~ "fXO",
Category == ".mXO" ~ "mXO",
Category == ".ave" ~ "aveXO",
Category == "fm.XO" ~ "fXO-XY"
)
) |>
pivot_wider(
id_cols = c(ID, Description, object_file),
names_from = Category,
values_from = c(NES,
p.adjust),
names_glue = "{Category}_{.value}",
names_sort = T
) |>
dplyr::select(
ID,
Description,
starts_with(sprintf("%s_",unique_categories))
) |>
arrange(
!!as.symbol(sprintf("%s_p.adjust",unique_categories))
) |>
write_csv("Table_S3.csv")