Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
T21-XIST/make_figures.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
2573 lines (2417 sloc)
121 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## --------------------------- | |
## | |
## Purpose of script: Generate figures for thesis | |
## | |
## Author: Prakhar Bansal | |
## | |
## Date Created: 2022-03-27 | |
## | |
## Copyright (c) Prakhar Bansal, 2022 | |
## Email: pbansal@uchc.edu | |
## | |
## --------------------------- | |
## | |
## Notes: | |
## | |
## | |
## --------------------------- | |
## set working directory | |
setwd(dirname(unlist(rstudioapi::getSourceEditorContext()['path']))) | |
## --------------------------- | |
## load up the packages we will need: (uncomment as required) | |
require(tidyverse) | |
theme_set( | |
theme_classic() + | |
theme( | |
text=element_text(size=9, family="sans"), | |
plot.title=element_text(hjust=0.5, size=10, family = "sans", face="bold"), | |
axis.title = element_text(size=9, face="bold"), | |
axis.text = element_text(size=9), | |
legend.spacing.x = unit(0, "cm"), | |
legend.spacing.y = unit(1, "mm"), | |
legend.key.height = unit(4, "mm"), | |
legend.margin = margin(0,0,0,0), | |
axis.line = element_line(size=0), | |
strip.background = element_blank(), | |
strip.text = element_text(margin = margin(1,1,1,1, unit="mm")), | |
panel.border = element_rect(color="black", fill=NA, size=.5) | |
) | |
) | |
require(ggsci) | |
library(ggdendro) | |
library(ComplexHeatmap) | |
library(rstatix) | |
library(ggpubr) | |
## --------------------------- | |
output_dir <- "outputs/2024-02-17 21-39-53.641348 timepoint+dox_status+effective_genotype" | |
diff_expr_compare_to <- "T21_vs_D21" | |
diff_expr_comparisons <- c("dox_effect", "dT21XIST_vs_T21", "wdT21XIST_vs_T21", "dwdT21SIT_vs_T21") | |
diff_expr_combined_padj_cutoff <- .1 | |
gsea_compare_to <- "T21_vs_D21" | |
gsea_comparisons <- c("dox_effect", "dT21XIST_vs_T21", "wdT21XIST_vs_T21", "dwdT21SIT_vs_T21") | |
chr21_changes_comparisons <- c("dT21XIST_vs_T21", "wdT21XIST_vs_T21") | |
gsea_combined_padj_cutoff <- .2 | |
lol_cutoff_primary <- .1 | |
lol_cutoff_overlap <- .1 | |
fig_output_dir <- sprintf("revision_figs/%s", str_replace(output_dir, "outputs/", "")) | |
table_output_dir <- sprintf("revision_tables/%s", str_replace(output_dir, "outputs/", "")) | |
dir.create(fig_output_dir) | |
dir.create(table_output_dir) | |
gene_info_df <- read_csv("input_data/gene_info_df.csv.gz") | |
sample_sheet_df <- read_csv(paste0(output_dir, "/tables/sample_sheet_df.csv.gz")) | |
batch_corrected_vsd_df <- read_csv(paste0(output_dir, "/tables/batch_corrected_vsd_salmon_df.csv.gz")) | |
combined_results_df <- read_csv(paste0(output_dir, "/tables/combined_results_df.csv.gz")) | |
neural_output_dir <- (read_csv(paste0(output_dir, "/params.csv.gz")) |> | |
filter(parameter == "neural_output_dir") |> | |
pull(value))[1] | |
neural_results_nonrtta_T21_vs_D21 <- read_csv(paste0(neural_output_dir, "/tables/results_neural_t21_d21.csv.gz")) | |
neural_combined_results_df <- read_csv(paste0(neural_output_dir, "/tables/combined_results_df.csv.gz")) | |
combined_results_df <- read_csv(paste0(output_dir, "/tables/combined_results_df.csv.gz")) |> | |
full_join(neural_combined_results_df) |> | |
mutate_at(vars(starts_with("padj")), list(~ if_else(is.na(.), 1, .))) |> | |
mutate_at(vars(starts_with("pvalue")), list(~ if_else(is.na(.), 1, .))) |> | |
mutate_at(vars(starts_with("log2FoldChange")), list(~ if_else(is.na(.), 0, .))) |> | |
mutate_at(vars(starts_with("stat")), list(~ if_else(is.na(.), 0, .))) |> | |
left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) | |
write_csv(combined_results_df, paste0(table_output_dir, "/deseq2_results.csv")) | |
combined_results_df |> | |
dplyr::select(ensembl_id, hgnc_symbol, chromosome_name, start_position, gene_biotype, | |
ends_with(c("T21_vs_D21", "D21_vs_T21", "dT21XIST_vs_T21", | |
"wdT21XIST_vs_T21", "dwdT21SIT_vs_T21", | |
"neural_t21_d21", "neural_t21xistdox_t21", "neural_t21xistwd_t21", "neural_t21xistdoxcombined_t21"))) |> | |
rename_all( | |
.funs = list( | |
~str_replace_all(.,"dwdT21SIT_vs_T21", "combinedT21XIST_vs_T21") |> | |
str_replace_all("neural_t21_d21", "neural_T21_vs_D21") |> | |
str_replace_all("neural_t21xistdox_t21", "neural_dT21XIST_vs_T21") |> | |
str_replace_all("neural_t21xistwd_t21", "neural_wdT21XIST_vs_T21") |> | |
str_replace_all("neural_t21xistdoxcombined_t21", "neural_combinedT21XIST_vs_T21") | |
) | |
) |> | |
write_csv(paste0(table_output_dir, "/Data_S1_bulk_deseq2.csv.gz")) | |
######################## Diff Expr 198-1 and 198-2 ############################# | |
dds <- readRDS(sprintf("%s/tables/dds.rds", output_dir)) | |
vsd <- varianceStabilizingTransformation(dds, blind = F) | |
sprintf("Outputting DESeq2 Results...\n") | |
diff_exprs <- list( | |
"1981_vs_1982" = c( | |
'cell_line == "198_1"', | |
'cell_line == "198_2"', | |
"timepoint|dox_status") | |
) | |
for (de_name in names(diff_exprs)) { | |
write_results(de_name, | |
diff_exprs[[de_name]][1], | |
diff_exprs[[de_name]][2], | |
dds, | |
outdir, | |
diff_exprs[[de_name]][3]) | |
} | |
# MA plot between D21 and T21XIST | |
vsd_salmon_df <- read_csv(paste0(output_dir, "/tables/vsd_salmon_df.csv.gz")) | |
mean_d21_t21xist_vst_df <- vsd_salmon_df |> | |
pivot_longer(cols = -ensembl_id, | |
names_to = "sample_id", | |
values_to = "vst_count") |> | |
left_join(sample_sheet_df, by="sample_id") |> | |
filter(genotype == "D21" | (genotype == "T21XIST" & dox_status != "no")) |> | |
group_by(ensembl_id) |> | |
summarise(mean_vst_count = mean(vst_count)) | |
aov_d21_t21xist_df <- aov(vst_count ~ group, | |
vsd_salmon_df |> | |
pivot_longer(cols = -ensembl_id, | |
names_to = "sample_id", | |
values_to = "vst_count") |> | |
left_join(sample_sheet_df, by="sample_id") |> | |
left_join(gene_info_df |> dplyr::select(ensembl_id=ensembl_gene_id_version, chromosome_name)) |> | |
# filter(chromosome_name != "21") |> | |
filter(genotype == "D21" | (genotype == "T21XIST" & dox_status != "no")) |> | |
mutate(group = case_when( | |
genotype == "D21" ~ "D21", | |
genotype == "T21XIST" & dox_status != "no" ~ "T21XIST" | |
))) |> | |
broom::tidy() | |
aov_t21_d21_df <- aov(vst_count ~ group, | |
vsd_salmon_df |> | |
pivot_longer(cols = -ensembl_id, | |
names_to = "sample_id", | |
values_to = "vst_count") |> | |
left_join(sample_sheet_df, by="sample_id") |> | |
left_join(gene_info_df |> dplyr::select(ensembl_id=ensembl_gene_id_version, chromosome_name)) |> | |
# filter(chromosome_name != "21") |> | |
filter(genotype == "D21" | genotype == "T21") |> | |
mutate(group = case_when( | |
genotype == "D21" ~ "D21", | |
genotype == "T21" ~ "T21" | |
))) |> | |
broom::tidy() | |
aov_t21_t21xist_df <- aov(vst_count ~ group, | |
vsd_salmon_df |> | |
pivot_longer(cols = -ensembl_id, | |
names_to = "sample_id", | |
values_to = "vst_count") |> | |
left_join(sample_sheet_df, by="sample_id") |> | |
left_join(gene_info_df |> dplyr::select(ensembl_id=ensembl_gene_id_version, chromosome_name)) |> | |
# filter(chromosome_name != "21") |> | |
filter(genotype == "T21" | (genotype == "T21XIST" & dox_status != "no")) |> | |
mutate(group = case_when( | |
genotype == "T21" ~ "T21", | |
genotype == "T21XIST" & dox_status != "no" ~ "T21XIST" | |
))) |> | |
broom::tidy() | |
aov_d21_t21xist_df |> | |
mutate(comparison = "T21XIST vs D21") |> | |
rbind( | |
aov_t21_t21xist_df |> | |
mutate(comparison = "T21XIST vs T21")) |> | |
rbind( | |
aov_t21_d21_df |> | |
mutate(comparison = "T21 vs D21")) |> | |
filter(term == "group") |> | |
ggplot(aes(x=comparison, y=statistic)) + | |
geom_col() + | |
labs( | |
title="ANOVA Test Statistic w/ chr21", | |
# title="ANOVA Test Statistic", | |
x="Groups Compared") + | |
geom_text(aes(label=sprintf("p = %.1g", p.value)), vjust=-0.2, size=3) + | |
# scale_y_continuous(limits=c(0,1)) + | |
scale_y_continuous(limits=c(0,2.4)) + | |
theme( | |
axis.text.x = element_text(angle=30, hjust=1), | |
axis.title.x = element_blank() | |
) | |
ggsave(sprintf("%s/anova_statistic.pdf", fig_output_dir), width = 2, height=2) | |
ggsave(sprintf("%s/anova_statistic_withchr21.pdf", fig_output_dir), width = 2, height=2) | |
ggplotly(combined_results_df |> | |
# filter(!(gene_biotype %in% c("rRNA", "TEC")) & !is.na(gene_biotype))|> | |
filter((gene_biotype %in% c("protein_coding")) & !is.na(gene_biotype))|> | |
select(ensembl_id, chromosome_name, hgnc_symbol, gene_biotype, ends_with("_dwdT21SIT_vs_D21")) |> | |
left_join(mean_d21_t21xist_vst_df) |> | |
mutate(colorby = case_when( | |
padj_dwdT21SIT_vs_D21 > .1 ~ "nonDE", | |
padj_dwdT21SIT_vs_D21 <= .1 & log2FoldChange_dwdT21SIT_vs_D21 > 0 ~ "upReg", | |
padj_dwdT21SIT_vs_D21 <= .1 & log2FoldChange_dwdT21SIT_vs_D21 < 0 ~ "downReg" | |
)) |> | |
ggplot(aes(x=mean_vst_count, y=log2FoldChange_dwdT21SIT_vs_D21, color=colorby, name=ensembl_id, biotype=gene_biotype)) + | |
geom_point() + | |
scale_color_manual(values=c("blue", "grey", "red"), limits=c("downReg", "nonDE", "upReg"))) | |
# MA plot between D21 and T21 | |
vsd_salmon_df <- read_csv(paste0(output_dir, "/tables/vsd_salmon_df.csv.gz")) | |
mean_d21_t21xist_vst_df <- vsd_salmon_df |> | |
pivot_longer(cols = -ensembl_id, | |
names_to = "sample_id", | |
values_to = "vst_count") |> | |
left_join(sample_sheet_df, by="sample_id") |> | |
filter(genotype == "D21" | (genotype == "T21")) |> | |
group_by(ensembl_id) |> | |
summarise(mean_vst_count = mean(vst_count)) | |
ggplotly(combined_results_df |> | |
# filter(!(gene_biotype %in% c("rRNA", "TEC")) & !is.na(gene_biotype))|> | |
filter((gene_biotype %in% c("protein_coding")) & !is.na(gene_biotype))|> | |
select(ensembl_id, chromosome_name, hgnc_symbol, gene_biotype, ends_with("_T21_vs_D21")) |> | |
left_join(mean_d21_t21xist_vst_df) |> | |
mutate(colorby = case_when( | |
padj_T21_vs_D21 > .1 ~ "nonDE", | |
padj_T21_vs_D21 <= .1 & log2FoldChange_T21_vs_D21 > 0 ~ "upReg", | |
padj_T21_vs_D21 <= .1 & log2FoldChange_T21_vs_D21 < 0 ~ "downReg" | |
)) |> | |
ggplot(aes(x=mean_vst_count, y=log2FoldChange_T21_vs_D21, color=colorby, name=ensembl_id, biotype=gene_biotype)) + | |
geom_point() + | |
scale_color_manual(values=c("blue", "grey", "red"), limits=c("downReg", "nonDE", "upReg"))) | |
################################################################################ | |
combined_results_df |> | |
pivot_longer( | |
cols = c(tidyselect::ends_with(c( | |
"D21_vs_T21", "dT21XIST_vs_T21", "wdT21XIST_vs_T21", "dwdT21SIT_vs_T21", | |
"T21_vs_D21", "dT21XIST_vs_D21", "wdT21XIST_vs_D21", "dwdT21SIT_vs_D21"))), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(id_cols = c(ensembl_id, comparison), | |
names_from = value_type, | |
values_from = value) |> | |
left_join(gene_info_df |> dplyr::select(ensembl_gene_id_version, chromosome_name), | |
by=c("ensembl_id"="ensembl_gene_id_version")) |> | |
mutate(padj = case_when(is.na(padj) ~ 1, | |
T ~ padj), | |
chromosome_name = case_when(is.na(chromosome_name) ~ "NA", | |
T ~ chromosome_name)) |> | |
dplyr::count(comparison, chromosome_name == "21", log2FoldChange > 0, padj <= .01) |> | |
pivot_wider(names_from = comparison, | |
values_from = n) |> | |
arrange(`padj <= 0.01`, desc(`chromosome_name == "21"`), desc(`log2FoldChange > 0`)) |> | |
dplyr::select(`padj <= 0.01`, `chromosome_name == "21"`, `log2FoldChange > 0`, | |
T21_vs_D21, dT21XIST_vs_D21, dwdT21SIT_vs_D21, | |
dT21XIST_vs_T21, dwdT21SIT_vs_T21) |> | |
write_csv("revision_figs/diff_expr_counts.csv") | |
gene_updown_df <- combined_results_df |> | |
pivot_longer( | |
cols = c(tidyselect::ends_with(c( | |
"dT21XIST_vs_T21", "wdT21XIST_vs_T21", "dwdT21SIT_vs_T21", | |
"T21_vs_D21"))), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(id_cols = c(ensembl_id, comparison), | |
names_from = value_type, | |
values_from = value) |> | |
left_join(gene_info_df |> dplyr::select(ensembl_gene_id_version, chromosome_name), | |
by=c("ensembl_id"="ensembl_gene_id_version")) |> | |
mutate(padj = case_when(is.na(padj) ~ 1, | |
T ~ padj), | |
chromosome_name = case_when(is.na(chromosome_name) ~ "NA", | |
T ~ chromosome_name)) |> | |
dplyr::count(comparison, chromosome_name == "21", log2FoldChange > 0, padj <= .1) |> | |
pivot_wider(names_from = comparison, | |
values_from = n) |> | |
arrange(`padj <= 0.1`, desc(`chromosome_name == "21"`), desc(`log2FoldChange > 0`)) |> | |
dplyr::select(`padj <= 0.1`, | |
`chromosome_name == "21"`, | |
`log2FoldChange > 0`, | |
T21_vs_D21, dT21XIST_vs_T21, wdT21XIST_vs_T21, dwdT21SIT_vs_T21) |> | |
pivot_longer(cols = -c(`padj <= 0.1`, | |
`chromosome_name == "21"`, | |
`log2FoldChange > 0`), | |
names_to = "comparison", | |
values_to= "num_genes" ) |> | |
filter(`padj <= 0.1` == T) |> | |
mutate(`chromosome_name == "21"` = case_when( | |
`chromosome_name == "21"` == T ~ "chr21", | |
T ~ "non chr21" | |
)) | |
binom_test_data_df <- gene_updown_df |> | |
pivot_wider(names_from = `log2FoldChange > 0`, values_from = "num_genes") |> | |
mutate(total = `TRUE` + `FALSE`) |> | |
mutate(binom_pval = purrr::pmap(.l = list(x=`TRUE`, tot=total), | |
.f = \(x, tot) (binom.test(x = x, | |
n = tot, | |
alternative = "two.sided")$p.value)[1])) |> | |
tidyr::unnest(binom_pval) | |
ggplot(gene_updown_df) + | |
geom_col(aes(x=comparison, y=num_genes, fill=`log2FoldChange > 0`), position = position_dodge()) + | |
facet_grid(rows = vars(`chromosome_name == "21"`), scales = "free_y") + | |
geom_text(data = binom_test_data_df |> | |
mutate(max_count = pmax(`TRUE`, `FALSE`)), | |
mapping = aes(x=comparison, y=max_count*1.2, label=sprintf("%.2g", binom_pval), vjust=1), | |
size=3) + | |
scale_fill_aaas() + | |
scale_x_discrete(limits = c("T21_vs_D21", "dT21XIST_vs_T21", "wdT21XIST_vs_T21", "dwdT21SIT_vs_T21"), | |
labels = c("T21 vs D21", "+dox vs T21", "w/d vs T21", "combined vs T21")) + | |
theme(axis.title.x = element_blank()) + | |
labs(y = "# Genes", | |
title = "Differential Expression by Chromosome") | |
ggsave(sprintf("%s/diff_expr_by_chr.pdf", fig_output_dir), width = 4, height = 3) | |
combined_results_df |> | |
pivot_longer( | |
cols = c(tidyselect::ends_with(c( | |
"D21_vs_T21", "dT21XIST_vs_T21", "wdT21XIST_vs_T21", "dwdT21SIT_vs_T21", | |
"T21_vs_D21", "dT21XIST_vs_D21", "wdT21XIST_vs_D21", "dwdT21SIT_vs_D21"))), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(id_cols = c(ensembl_id, comparison), | |
names_from = value_type, | |
values_from = value) |> | |
left_join(gene_info_df |> dplyr::select(ensembl_gene_id_version, chromosome_name), | |
by=c("ensembl_id"="ensembl_gene_id_version")) |> | |
mutate(padj = case_when(is.na(padj) ~ 1, | |
T ~ padj), | |
chromosome_name = case_when(is.na(chromosome_name) ~ "NA", | |
T ~ chromosome_name), | |
is21 = case_when(chromosome_name == "21" ~ "chr21", | |
T ~ "non chr21")) |> | |
filter(comparison %in% c("dwdT21SIT_vs_T21", "dwdT21SIT_vs_D21")) |> | |
ggplot() + | |
geom_density(aes(x=padj, color = comparison)) + | |
facet_grid(rows = vars(is21)) + | |
scale_color_aaas(breaks = c("dwdT21SIT_vs_T21", "dwdT21SIT_vs_D21"), labels = c("T21XIST vs T21", "T21XIST vs D21")) + | |
scale_x_log10(limits=c(1e-4, 1e0), oob = scales::squish) + | |
labs(title="Distribution of padj") | |
ggsave(sprintf("%s/2_dist_padj.png", fig_output_dir)) | |
ggsave(sprintf("%s/2_dist_padj_linear.png", fig_output_dir)) | |
combined_results_df |> | |
pivot_longer( | |
cols = c(tidyselect::ends_with(c( | |
"D21_vs_T21", "dT21XIST_vs_T21", "wdT21XIST_vs_T21", "dwdT21SIT_vs_T21", | |
"T21_vs_D21", "dT21XIST_vs_D21", "wdT21XIST_vs_D21", "dwdT21SIT_vs_D21"))), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(id_cols = c(ensembl_id, comparison), | |
names_from = value_type, | |
values_from = value) |> | |
left_join(gene_info_df |> dplyr::select(ensembl_gene_id_version, chromosome_name), | |
by=c("ensembl_id"="ensembl_gene_id_version")) |> | |
mutate(padj = case_when(is.na(padj) ~ 1, | |
T ~ padj), | |
chromosome_name = case_when(is.na(chromosome_name) ~ "NA", | |
T ~ chromosome_name), | |
is21 = case_when(chromosome_name == "21" ~ "chr21", | |
T ~ "non chr21")) |> | |
filter(comparison %in% c("dwdT21SIT_vs_T21", "dwdT21SIT_vs_D21") & | |
padj <= .01) |> | |
ggplot() + | |
geom_density(aes(x=log2FoldChange, color = comparison)) + | |
facet_grid(rows = vars(is21)) + | |
geom_vline(xintercept = 0) + | |
scale_color_aaas(breaks = c("dwdT21SIT_vs_T21", "dwdT21SIT_vs_D21"), labels = c("T21XIST vs T21", "T21XIST vs D21")) + | |
scale_x_continuous(limits = c(-1.5, 1.5), breaks = c(seq(0,15, by=2)/10, seq(0,15, by=2)/-10), oob = scales::squish) + | |
labs(title="Distribution of significant log2FC (padj <= .01)") | |
ggsave(sprintf("%s/2_dist_log2fc_01.png", fig_output_dir)) | |
file.copy(fs::dir_ls(path = sprintf("%s/tables", neural_output_dir), | |
glob = sprintf( | |
"%s/tables/gsea_*csv.gz", | |
neural_output_dir |> | |
str_replace_all("\\+", "\\\\+") |> | |
str_replace_all("\\,", "\\\\,") | |
)), | |
sprintf("%s/tables", output_dir), | |
overwrite=TRUE) | |
gsea_files <- fs::dir_ls( | |
path = sprintf("%s/tables", output_dir), | |
glob = sprintf( | |
"%s/tables/gsea_*csv.gz", | |
output_dir |> | |
str_replace_all("\\+", "\\\\+") |> | |
str_replace_all("\\,", "\\\\,") | |
) | |
) | |
print(gsea_files) | |
combined_enrichment_df <- read_csv(gsea_files, | |
id = "origin_file") |> | |
mutate(comparison = str_replace_all(origin_file, sprintf("(%s/tables/gsea_)|(.csv.gz)", output_dir |> | |
str_replace_all("\\+", "\\\\+") |> | |
str_replace_all("\\(", "\\\\(") |> | |
str_replace_all("\\)", "\\\\)") |> | |
str_replace_all("\\,", "\\\\,")), "")) |> | |
mutate( | |
leadingEdgeTags = as.numeric(str_replace_all(str_split(leading_edge, ", ", simplify = T)[, 1], "tags=|%", "")) / 100, | |
leadingEdgeList = as.numeric(str_replace_all(str_split(leading_edge, ", ", simplify = T)[, 2], "list=|%", "")) / 100, | |
leadingEdgeSignal = as.numeric(str_replace_all(str_split(leading_edge, ", ", simplify = T)[, 3], "signal=|%", "")) / 100, | |
numCoreEnrichment = str_count(str_split(core_enrichment, "/")) + 1 | |
) |> | |
pivot_wider(id_cols = ID, names_from = comparison, values_from = c(NES, p.adjust, qvalue, enrichmentScore, setSize, leadingEdgeTags, leadingEdgeList, leadingEdgeSignal, numCoreEnrichment)) |> | |
mutate_at(vars(starts_with("p.adjust")), list(~ if_else(is.na(.), 1, .))) |> | |
mutate_at(vars(starts_with("qvalues")), list(~ if_else(is.na(.), 1, .))) |> | |
mutate_at(vars(starts_with("NES")), list(~ if_else(is.na(.), 0, .))) |> | |
mutate_at(vars(starts_with("enrichmentScore")), list(~ if_else(is.na(.), 0, .))) |> | |
# left_join(term_clusters_df, by = c("ID" = "term_name")) |> | |
# left_join(advanced_terms_clusters_df |> dplyr::select(clustered, top_words)) |> | |
dplyr::mutate(category = dplyr::case_when( | |
str_detect(ID, "STRESS") ~ "stress", | |
str_detect(ID, "ENDOPLASMIC_RETICULUM") ~ "ER", | |
str_detect(ID, "chr21") ~ "chr21", | |
str_detect(ID, "TRANSLATION") ~ "translation", | |
str_detect(ID, "RIBOSOM") ~ "ribosome", | |
str_detect(ID, "SERINE") & (!str_detect(ID, "SERINE_THREONINE")) ~ "serine", | |
str_detect(ID, "MITOCH") ~ "mitochondria", | |
str_detect(ID, "NRF2") ~ "stress", | |
# str_detect(ID, "FOLATE") ~ "Folate", | |
# str_detect(ID, "MTORC1") ~ "MTORC1", | |
str_detect(ID, "APOPTO") | str_detect(ID, "PROGRAMMED_CELL_DEATH") ~ "apoptosis", | |
# str_detect(top_words, "EXTRACELLULAR|MATRIX") ~ "ECM", | |
T ~ "other" | |
)) | |
write_csv(combined_enrichment_df, paste0(table_output_dir, "/bulk_gsea_results.csv")) | |
combined_enrichment_df |> | |
dplyr::select(ID, category, | |
ends_with(c("T21_vs_D21", "D21_vs_T21", "dT21XIST_vs_T21", | |
"wdT21XIST_vs_T21", "dwdT21SIT_vs_T21", | |
"neural_t21_d21", "neural_t21xistdox_t21", "neural_t21xistwd_t21", "neural_t21xistdoxcombined_t21"))) |> | |
rename_all( | |
.funs = list( | |
~str_replace_all(.,"dwdT21SIT_vs_T21", "combinedT21XIST_vs_T21") |> | |
str_replace_all("neural_t21_d21", "neural_T21_vs_D21") |> | |
str_replace_all("neural_t21xistdox_t21", "neural_dT21XIST_vs_T21") |> | |
str_replace_all("neural_t21xistwd_t21", "neural_wdT21XIST_vs_T21") |> | |
str_replace_all("neural_t21xistdoxcombined_t21", "neural_combinedT21XIST_vs_T21") | |
) | |
) |> | |
write_csv(paste0(table_output_dir, "/Data_S2_bulk_gsea.csv.gz")) | |
## PCA & dendro | |
batch_corrected_vsd <- batch_corrected_vsd_df |> | |
column_to_rownames(var="ensembl_id") | |
pca <- batch_corrected_vsd |> | |
t() |> | |
prcomp() | |
pca["x"] |> | |
as.data.frame() |> | |
rownames_to_column(var = "sample_id") |> | |
rename_with(~ str_remove(.x, "x\\.")) |> | |
left_join(sample_sheet_df, by="sample_id") |> | |
mutate( | |
designation = case_when( | |
cell_line %in% c("198_1", "198_2") ~ "Euploid", | |
cell_line == "198_5" ~ "T21", | |
dox_status == "no" ~ "T21", | |
dox_status == "yes" ~ "Euploid", | |
dox_status == "withdrawal" ~ "w/d" | |
), | |
cell_line = factor(case_when( | |
cell_line == "rtTA_XIST_c1" ~ "c1", | |
cell_line == "rtTA_XIST_c3" ~ "c3", | |
cell_line == "rtTA_XIST_c4" ~ "c4", | |
cell_line == "rtTA_XIST_c7" ~ "c7", | |
cell_line == "198_1" ~ "198-1", | |
cell_line == "198_2" ~ "198-2", | |
cell_line == "198_5" ~ "198-5" | |
), | |
levels = c("c1", "c3", "c4", "c7", "198-1", "198-2", "198-5") | |
) | |
) |> | |
ggplot(aes(x = PC1, y = PC2, color = designation, shape = cell_line)) + | |
geom_point(size = 1.5) + | |
scale_color_manual(values = pal_d3()(5)[3:5], name = "Designation") + | |
scale_shape_manual(limits=c("c1", "c4", "c7", "198-1", "198-2", "198-5"), values = c(15,16,17,0,1,2), name="Cell Line") + | |
# geom_label(aes(label = sample_id)) + | |
labs( | |
x = sprintf("PC%d (%.2f%%)", 1, (pca$sdev^2 / sum(pca$sdev^2) * 100)[1]), | |
y = sprintf("PC%d (%.2f%%)", 2, (pca$sdev^2 / sum(pca$sdev^2) * 100)[2]), | |
title = "PCA" | |
) + | |
theme(legend.box.margin = margin(l=-3, r=-2, unit = "mm")) | |
ggsave(sprintf("%s/pca.pdf", fig_output_dir), width = 3.12, height = 2.3) | |
sampleTree <- hclust(dist(t(batch_corrected_vsd)), method = "average") | |
# ggdendrogram(sampleTree) + labs(title = "WGCNA Sample Dendrogram") | |
sample_dend_data <- dendro_data(sampleTree, type = "rectangle") | |
sample_dend_data$labels <- sample_dend_data$labels %>% | |
left_join(sample_sheet_df |> | |
dplyr::select(sample_id, dox_status, genotype, effective_ploidy), by = c("label" = "sample_id")) | |
ggplot() + | |
geom_segment( | |
data = sample_dend_data$segments, | |
mapping = aes(x = x, y = y, xend = xend, yend = yend) | |
) + | |
geom_text( | |
data = sample_dend_data$labels, | |
aes(x = x, y = y, label = label), | |
angle = 90, size = 3 | |
) + | |
geom_tile( | |
data = sample_dend_data$labels, | |
aes(x = x, y = -6, fill = effective_ploidy), height = 2 | |
) + | |
annotate(geom = "text", x = 50, y = -6, label = "Effective Ploidy", vjust = .5) + | |
scale_x_continuous(limits = c(0, 60)) + | |
scale_fill_npg() + | |
geom_tile( | |
data = sample_dend_data$labels, | |
aes(x = x, y = -9, fill = genotype), height = 2 | |
) + | |
annotate(geom = "text", x = 50, y = -9, label = "Genotype", hjust = 0) + | |
geom_tile( | |
data = sample_dend_data$labels, | |
aes(x = x, y = -12, fill = dox_status), height = 2 | |
) + | |
annotate(geom = "text", x = 50, y = -12, label = "Dox Status", hjust = 0) | |
## DESeq2 Results Summary | |
setups_df <- read_csv(paste0(output_dir, "/diff_exprs.csv.gz")) | |
## Chr21 Changes | |
gene_counts_df <- batch_corrected_vsd_df |> | |
pivot_longer(cols = -ensembl_id, names_to = "sample_id", values_to = "vsd_count") |> | |
left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |> | |
left_join(sample_sheet_df) | |
gene_counts_df |> | |
filter(hgnc_symbol == "XIST" & | |
genotype == "T21XIST") |> | |
mutate( | |
dox_status = case_when( | |
dox_status == "yes" ~ "+dox", | |
dox_status == "no" ~ "-dox", | |
dox_status == "withdrawal" ~ "w/d" | |
), | |
dox_status = factor(dox_status, levels = c("-dox", "+dox", "w/d")), | |
clone = case_when( | |
cell_line == "rtTA_XIST_c1" ~ "c1", | |
cell_line == "rtTA_XIST_c3" ~ "c3", | |
cell_line == "rtTA_XIST_c4" ~ "c4", | |
cell_line == "rtTA_XIST_c7" ~ "c7" | |
), | |
timepoint = case_when( | |
timepoint == "14wk" ~ "14wk (6)", | |
timepoint == "17wk" ~ "17wk (6)", | |
timepoint == "20wk" ~ "20wk (9)", | |
timepoint == "3wk" ~ "3wk (6)", | |
timepoint == "6wk" ~ "6wk (9)", | |
timepoint == "9wk" ~ "9wk (8)", | |
T ~ as.character(timepoint) | |
), | |
timepoint = factor(timepoint, levels=rev(c("3wk (6)", "6wk (9)", "9wk (8)", | |
"14wk (6)", "17wk (6)", "20wk (9)"))) | |
) |> | |
arrange(timepoint) |> | |
ggplot(aes(y = vsd_count, x = clone, color = timepoint)) + | |
facet_grid(cols = vars(dox_status)) + | |
geom_violin(color = "lightgrey") + | |
ggbeeswarm::geom_quasirandom(size = 1) + | |
scale_color_manual(values = c(pal_aaas()(4), pal_d3()(2)[2], pal_cosmic()(4)[4]), name="Timepoint", | |
limits=c("3wk (6)", "6wk (9)", "9wk (8)", | |
"14wk (6)", "17wk (6)", "20wk (9)")) + | |
# scale_color_d3(name = "Timepoint") + | |
theme(axis.title.x = element_blank()) + | |
labs( | |
title = "XIST Expression", | |
y = "VST Count" | |
) | |
ggsave(sprintf("%s/xist_expression.pdf", fig_output_dir), width = 3, height = 1.7) | |
# using complexheatmap to add more sample and gene annotations | |
chr21_rownorm_vsd <- batch_corrected_vsd_df |> | |
pivot_longer(cols = -ensembl_id, names_to = "sample_id", values_to = "vst_count") |> | |
left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |> | |
left_join(sample_sheet_df, by = c("sample_id")) |> | |
filter(chromosome_name == "21") |> | |
group_by(ensembl_id) |> | |
mutate( | |
row_norm_count = vst_count - mean(cur_data() %>% filter(`Cell Line` == "198-5") %>% pull(vst_count)), | |
mean_1985_count = mean(cur_data() %>% filter(`Cell Line` == "198-5") %>% pull(vst_count)) | |
) |> | |
ungroup() |> | |
mutate(dox_status = factor(dox_status, levels = c("no", "yes", "withdrawal"))) | |
chr21_rownorm_hm_df <- chr21_rownorm_vsd |> | |
filter( # mean_1985_count>8 & | |
!(gene_biotype %in% c("rRNA", "TEC"))) |> | |
mutate( | |
gene_biotype = factor(case_when( | |
gene_biotype == "protein_coding" ~ "Protein\nCoding", | |
gene_biotype == "processed_pseudogene" ~ "Pseudo-\ngene", | |
gene_biotype == "transcribed_processed_pseudogene" ~ "Pseudo-\ngene", | |
T ~ gene_biotype), | |
levels = c("Protein\nCoding", "lncRNA", "Pseudo-\ngene"))) | |
ipsc_rownorm_mat <- chr21_rownorm_hm_df |> | |
pivot_wider(id_cols = ensembl_id, | |
names_from = sample_id, | |
values_from = row_norm_count) |> | |
column_to_rownames(var="ensembl_id") | |
t21count_col_fun <- circlize::colorRamp2(c(8,16), c("#ffffff", "#7105E0")) | |
gene_annotation <- rowAnnotation( | |
`Mean T21 Count` = | |
anno_simple( | |
x=chr21_rownorm_hm_df |> | |
dplyr::select(ensembl_id, mean_1985_count) |> | |
dplyr::distinct() |> | |
deframe(), | |
col = t21count_col_fun, | |
simple_anno_size = unit(3, "mm")), | |
annotation_name_gp= gpar(fontsize = 7)) | |
lgd_t21count = Legend(title = "Mean T21\nCount", col_fun = t21count_col_fun, at = c(8,10,12,14,16), | |
labels = c(8,10,12,14,16), | |
direction = "horizontal", | |
labels_gp=gpar(fontsize=7), | |
title_gp=gpar(fontsize=7), | |
grid_height = unit(2, "mm")) | |
sample_annotation <- columnAnnotation( | |
`Cell Line` = chr21_rownorm_hm_df |> | |
dplyr::select(sample_id, `Cell Line`) |> | |
dplyr::distinct() |> | |
mutate(`Cell Line` = str_replace(`Cell Line`, "rtTA-XIST", "T21XIST")) |> | |
column_to_rownames(var="sample_id") |> | |
deframe(), | |
`Batch` = chr21_rownorm_hm_df |> | |
dplyr::select(sample_id, `timepoint`) |> | |
dplyr::distinct() |> | |
column_to_rownames(var="sample_id") |> | |
deframe() |> | |
factor(levels=c("3wk", "6wk", "9wk", "14wk", "17wk", "20wk")), | |
simple_anno_size = unit(3, "mm"), | |
annotation_legend_param = list(labels_gp=gpar(fontsize=7), title_gp=gpar(fontsize=7), grid_height = unit(2, "mm"), grid_width = unit(2, "mm")), | |
col = list(`Cell Line` = c("198-1"="#1697a6", "198-2"="#0e606b", "198-5"="#ffc24b", | |
"T21XIST c1"="#fff4f1", "T21XIST c4"="#ffb3ae", "T21XIST c7"="#f47068"), | |
`Batch` = c("3wk"="#faedcd", "6wk"="#e7c8a0", "9wk"="#d4a373", | |
"14wk"="#a1c349", "17wk"="#87a330", "20wk"="#6a8532")), | |
annotation_name_gp= gpar(fontsize = 7) | |
) | |
ipsc_hm <- Heatmap(ipsc_rownorm_mat, | |
col=circlize::colorRamp2(c(-1, 0, 1), c("#3784AF", "#f7f7f7", "#D34B16")), | |
# col=circlize::colorRamp2(c(-1, 0, 1), c("#0571B0", "#f7f7f7", "#CA0020")), | |
name="Row Norm\nVST Count", | |
border=T, | |
cluster_rows = F, | |
row_order = chr21_rownorm_hm_df |> dplyr::select(ensembl_id, mean_1985_count) |> deframe() |> sort() |> names() |> unique() |> rev(), | |
row_split = chr21_rownorm_hm_df |> | |
dplyr::select(ensembl_id, gene_biotype) |> | |
distinct() |> | |
deframe(), | |
row_title_side = "right", | |
row_gap = unit(1, "mm"), | |
show_row_names = F, | |
row_title_gp = gpar(fontsize = 7), | |
row_title_rot = 45, | |
column_title_gp = gpar(fontsize = 7), | |
column_names_gp = gpar(fontsize = 7), | |
left_annotation = gene_annotation, | |
show_column_dend=F, | |
column_split = chr21_rownorm_hm_df |> | |
mutate(split_label = case_when( | |
genotype == "T21XIST" & dox_status == "yes" ~ "+dox", | |
genotype == "T21XIST" & dox_status == "no" ~ "-dox", | |
genotype == "T21XIST" & dox_status == "withdrawal" ~ "w/d", | |
T ~ genotype)) |> | |
dplyr::select(sample_id, split_label) |> | |
distinct() |> | |
deframe() |> | |
factor(levels=c("T21", "-dox", "+dox", "w/d", "D21")), | |
cluster_column_slices=F, | |
column_gap = unit(1, "mm"), | |
show_column_names = F, | |
bottom_annotation = sample_annotation, | |
heatmap_legend_param = list(direction = "horizontal", | |
labels_gp=gpar(fontsize=7), | |
title_gp=gpar(fontsize=7), | |
grid_height = unit(2, "mm"))) | |
pdf(sprintf("%s/expression_relative_to_1985.pdf", fig_output_dir), width = 3.5, height = 4.7) | |
draw(ipsc_hm, annotation_legend_list = list(lgd_t21count), | |
merge_legend=T, heatmap_legend_side = "bottom", annotation_legend_side = "bottom") | |
dev.off() | |
chr21_changes_comparisons <- c("D21_vs_T21", "dT21XIST_vs_T21", "wdT21XIST_vs_T21") | |
chr21_changes_comparison_cols <- purrr::cross(list( | |
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"), | |
chr21_changes_comparisons | |
)) |> map(lift(paste0)) |> unlist() | |
chr21_changes_df <- combined_results_df |> | |
# left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |> | |
dplyr::filter(chromosome_name == "21") |> | |
pivot_longer( | |
cols = c(ends_with(chr21_changes_comparison_cols)), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
dplyr::mutate( | |
# log2FoldChange = log2FoldChange_dox_t21xist_vs_1985, | |
# padj = padj_dox_t21xist_vs_1985, | |
# baseMean = baseMean_dox_t21xist_vs_1985, | |
color_label = case_when( | |
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .1), log2((2 / 3) + .1)) ~ "padj≤.1 & FC=(2/3)±.1", | |
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .15), log2((2 / 3)+.15)) ~ "padj≤.1 & FC=(2/3)±.15", | |
padj <= .1 ~ "padj≤.1", | |
T ~ "padj>.1" | |
) | |
) | |
chr21_arms <- read_tsv("http://hgdownload.cse.ucsc.edu/goldenpath/hg38/database/cytoBand.txt.gz", | |
col_names = c("chrom","chromStart","chromEnd","name","gieStain")) |> | |
filter(chrom == "chr21") |> | |
mutate(arm = substring(name, 1, 1)) |> | |
group_by(chrom, arm) |> | |
summarise(start_pos = min(chromStart), | |
end_pos = max(chromEnd), | |
len = end_pos - start_pos) | |
chr21_changes_df |> | |
dplyr::select(ensembl_id, start_position, end_position) |> | |
unique() |> | |
filter(start_position < 12000000) |> | |
arrange(start_position) |> | |
top_n(-1) | |
ggplot( | |
chr21_changes_df |> | |
mutate(comparison = factor(case_when( | |
comparison == "D21_vs_T21" ~ "D21", | |
comparison == "dT21XIST_vs_T21" ~ "+dox", | |
comparison == "wdT21XIST_vs_T21" ~ "w/d", | |
T ~ comparison), levels = c("D21", "+dox", "w/d"))), | |
aes(x = fct_reorder(ensembl_id, start_position), y = log2FoldChange, color = color_label, size = baseMean) | |
) + | |
facet_grid(rows = vars(comparison)) + | |
geom_point(alpha = .5) + | |
scale_color_manual( | |
values = c(pal_d3()(3)[c(1, 3, 2)], pal_uchicago()(2)[2]), | |
limits = c("padj≤.1 & FC=(2/3)±.1", "padj≤.1 & FC=(2/3)±.15", "padj≤.1", "padj>.1") | |
) + | |
scale_size_area(limits = c(NA, 10000), oob = scales::squish, max_size = 4, breaks=c(1,10,100,1000,10000), guide = "none") + | |
labs( | |
title = "chr21 Dosage Correction", | |
color = "Gene Category", | |
size = "Base Mean Expression", | |
x = "chr21 Genes (ordered by position)", | |
y = "log2(FC)" | |
) + | |
geom_hline(yintercept = log2(2 / 3), linetype = "dashed", size = .3) + | |
geom_hline(yintercept = 0, linetype = "solid") + | |
geom_vline(xintercept = "ENSG00000157540.22", linetype = "dotdash", size = .3) + | |
# geom_vline(xintercept = "ENSG00000277117.5", linetype = "dotted", size=.8) + | |
guides(color=guide_legend(ncol=2, label.position = "right", label.hjust = .1)) + | |
theme( | |
axis.text.x = element_blank(), | |
axis.ticks.x = element_blank(), | |
legend.position = "top", | |
# legend.position = "right", | |
legend.box.margin = margin(t=-5,b=-9), | |
legend.spacing.y = unit(1, "mm") | |
# legend.spacing.x = unit(3, "mm"), | |
# legend.margin = margin(3,3,3,3, "mm"), | |
# legend.box.background = element_rect(color = "black", size=.5) | |
) + | |
scale_y_continuous(breaks = c(-1,0, 1)) + | |
coord_cartesian(ylim = c(-1.5, 1.5)) | |
ggsave(sprintf("%s/hsa21_dosage_correction.pdf", fig_output_dir), width = 3.5, height = 2.75, device = cairo_pdf) | |
ggsave(sprintf("%s/hsa21_dosage_correction_basemeanscale.pdf", fig_output_dir), width = 3.5, height = 2.75, device = cairo_pdf) | |
chr21_changes_df |> | |
mutate(comparison = factor(case_when( | |
comparison == "D21_vs_T21" ~ "D21", | |
comparison == "dT21XIST_vs_T21" ~ "+dox", | |
comparison == "wdT21XIST_vs_T21" ~ "w/d", | |
T ~ comparison), levels = c("D21", "+dox", "w/d")), | |
baseMean = case_when(is.na(baseMean) ~ 0, | |
T ~ baseMean)) |> | |
dplyr::count(comparison, color_label, baseMean >= 1000) |> | |
pivot_wider(names_from = `baseMean >= 1000`, values_from = n) |> | |
dplyr::rename("baseMean>=1000"=`TRUE`, "baseMean<1000"=`FALSE`) |> | |
write_csv(sprintf("%s/ipsc_hsa21_dosage_correction_counts.csv", table_output_dir), quote = "all") | |
chr21_changes_comparisons <- c("dT21XIST_vs_T21", "wdT21XIST_vs_T21") | |
combined_results_df |> | |
# left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |> | |
dplyr::filter(chromosome_name == "21") |> | |
pivot_longer( | |
cols = c(ends_with(purrr::cross(list( | |
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"), | |
c(chr21_changes_comparisons, "dwdT21SIT_vs_T21") | |
)) |> map(lift(paste0)) |> unlist())), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
dplyr::mutate( | |
# log2FoldChange = log2FoldChange_dox_t21xist_vs_1985, | |
# padj = padj_dox_t21xist_vs_1985, | |
# baseMean = baseMean_dox_t21xist_vs_1985, | |
color_label = case_when( | |
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .1), log2((2 / 3) + .1)) ~ "padj≤.1 & FC=(2/3)±.1", | |
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .15), log2((2 / 3)+.15)) ~ "padj≤.1 & FC=(2/3)±.15", | |
padj <= .1 ~ "padj≤.1", | |
T ~ "padj>.1" | |
) | |
) |> | |
mutate(comparison = case_when(comparison == "dT21XIST_vs_T21" ~ "+dox", | |
comparison == "wdT21XIST_vs_T21" ~ "w/d", | |
comparison == "dwdT21SIT_vs_T21" ~ "combined", | |
T ~ comparison)) |> | |
# filter(padj <= .1) |> | |
filter(baseMean >= 1000) |> | |
ggplot() + | |
geom_vline(xintercept = log2(.9 * (2 / 3)), color = pal_d3()(3)[1], linetype="dotted", size=.3) + | |
geom_vline(xintercept = log2(1.1 * (2 / 3)), color = pal_d3()(3)[1], linetype="dotted", size=.3) + | |
geom_vline(xintercept = log2(.85 * (2 / 3)), color = pal_d3()(3)[3], linetype="dotted", size=.3) + | |
geom_vline(xintercept = log2(1.15 * (2 / 3)), color = pal_d3()(3)[3], linetype="dotted", size=.3) + | |
annotate("rect", xmin = log2(.9 * (2 / 3)), xmax = log2(1.1 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[1], alpha = .3) + | |
annotate("rect", xmin = log2(.85 * (2 / 3)), xmax = log2(.9 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[3], alpha = .3) + | |
annotate("rect", xmin = log2(1.1 * (2 / 3)), xmax = log2(1.15 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[3], alpha = .3) + | |
geom_density(aes(x = log2FoldChange, color = comparison)) + | |
scale_color_manual(limits=c("+dox", "combined", "w/d"), values=pal_d3()(5)[c(4,2,5)]) + | |
geom_vline(xintercept = log2(2 / 3), linetype = "dashed", color = "black", size = .3) + | |
geom_vline(xintercept = 0, linetype = "solid", color = "black", size = .3) + | |
annotate("text", x = -1.2, y = 2, label = "log2(2/3)", color = "black", size = 2) + | |
coord_cartesian(xlim = c(-1.5, 1.5)) + | |
labs(y = "Density", x = "log2(FC)", title = "chr21 Log2(FC) (baseMean >= 1000)") + | |
theme(legend.position = c(.8, .5)) | |
ggsave(sprintf("%s/hsa21_log2fc_1000.pdf", fig_output_dir), width = 3.5, height = 1.5) | |
# ggsave(sprintf("%s/hsa21_log2fc.pdf", fig_output_dir), width = 3.25, height = 1.5) | |
combined_results_df |> | |
# left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |> | |
dplyr::filter(chromosome_name == "21") |> | |
pivot_longer( | |
cols = c(ends_with(purrr::cross(list( | |
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"), | |
c(chr21_changes_comparisons, "dwdT21SIT_vs_T21") | |
)) |> map(lift(paste0)) |> unlist())), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
dplyr::mutate( | |
# log2FoldChange = log2FoldChange_dox_t21xist_vs_1985, | |
# padj = padj_dox_t21xist_vs_1985, | |
# baseMean = baseMean_dox_t21xist_vs_1985, | |
color_label = case_when( | |
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .1), log2((2 / 3) + .1)) ~ "padj≤.1 & FC=(2/3)±.1", | |
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .15), log2((2 / 3)+.15)) ~ "padj≤.1 & FC=(2/3)±.15", | |
padj <= .1 ~ "padj≤.1", | |
T ~ "padj>.1" | |
) | |
) |> | |
mutate(comparison = case_when(comparison == "dT21XIST_vs_T21" ~ "+dox", | |
comparison == "wdT21XIST_vs_T21" ~ "w/d", | |
comparison == "dwdT21SIT_vs_T21" ~ "combined", | |
T ~ comparison)) |> | |
filter(padj <= .1) |> | |
group_by(comparison) |> | |
summarise(num_genes = length(unique(ensembl_id))) | |
combined_results_df |> | |
dplyr::filter(chromosome_name == "21") |> | |
pivot_longer( | |
cols = c(ends_with(purrr::cross(list( | |
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"), | |
c("dT21XIST_vs_D21", "wdT21XIST_vs_D21", "dwdT21SIT_vs_D21") | |
)) |> map(lift(paste0)) |> unlist())), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
dplyr::mutate( | |
color_label = case_when( | |
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .1), log2((2 / 3) + .1)) ~ "padj≤.1 & FC=(2/3)±.1", | |
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .15), log2((2 / 3)+.15)) ~ "padj≤.1 & FC=(2/3)±.15", | |
padj <= .1 ~ "padj≤.1", | |
T ~ "padj>.1" | |
) | |
) |> | |
mutate(comparison = case_when(comparison == "dT21XIST_vs_D21" ~ "+dox", | |
comparison == "wdT21XIST_vs_D21" ~ "w/d", | |
comparison == "dwdT21SIT_vs_D21" ~ "combined", | |
T ~ comparison)) |> | |
# filter(padj <= .1) |> | |
filter(baseMean >= 1000) |> | |
ggplot() + | |
# geom_vline(xintercept = log2(.9 * (2 / 3)), color = pal_d3()(3)[1], linetype="dotted", size=.3) + | |
# geom_vline(xintercept = log2(1.1 * (2 / 3)), color = pal_d3()(3)[1], linetype="dotted", size=.3) + | |
# geom_vline(xintercept = log2(.85 * (2 / 3)), color = pal_d3()(3)[3], linetype="dotted", size=.3) + | |
# geom_vline(xintercept = log2(1.15 * (2 / 3)), color = pal_d3()(3)[3], linetype="dotted", size=.3) + | |
# annotate("rect", xmin = log2(.9 * (2 / 3)), xmax = log2(1.1 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[1], alpha = .3) + | |
# annotate("rect", xmin = log2(.85 * (2 / 3)), xmax = log2(.9 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[3], alpha = .3) + | |
# annotate("rect", xmin = log2(1.1 * (2 / 3)), xmax = log2(1.15 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[3], alpha = .3) + | |
geom_density(aes(x = log2FoldChange, color = comparison)) + | |
scale_color_manual(limits=c("+dox", "combined", "w/d"), values=pal_d3()(5)[c(4,2,5)]) + | |
# geom_vline(xintercept = log2(2 / 3), linetype = "dashed", color = "black", size = .3) + | |
geom_vline(xintercept = 0, linetype = "solid", color = "black", size = .3) + | |
# annotate("text", x = -1.2, y = 2, label = "log2(2/3)", color = "black", size = 2) + | |
coord_cartesian(xlim = c(-1.5, 1.5)) + | |
labs(y = "Density", x = "log2(FC)", title = "chr21 Log2(FC) from D21\niPSCs (baseMean >= 1000)") + | |
theme(legend.position = c(.8, .7)) | |
ggsave(sprintf("%s/hsa21_log2fc_d21_1000.pdf", fig_output_dir), width = 3.5, height = 2) | |
combined_results_df |> | |
# left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |> | |
dplyr::filter(chromosome_name == "21") |> | |
pivot_longer( | |
cols = c(ends_with(purrr::cross(list( | |
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"), | |
c(chr21_changes_comparisons, "dwdT21SIT_vs_T21") | |
)) |> map(lift(paste0)) |> unlist())), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
dplyr::mutate( | |
# log2FoldChange = log2FoldChange_dox_t21xist_vs_1985, | |
# padj = padj_dox_t21xist_vs_1985, | |
# baseMean = baseMean_dox_t21xist_vs_1985, | |
color_label = case_when( | |
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .1), log2((2 / 3) + .1)) ~ "padj≤.1 & FC=(2/3)±.1", | |
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .15), log2((2 / 3)+.15)) ~ "padj≤.1 & FC=(2/3)±.15", | |
padj <= .1 ~ "padj≤.1", | |
T ~ "padj>.1" | |
) | |
) |> | |
mutate(comparison = case_when(comparison == "dT21XIST_vs_T21" ~ "+dox", | |
comparison == "wdT21XIST_vs_T21" ~ "w/d", | |
comparison == "dwdT21SIT_vs_T21" ~ "combined", | |
T ~ comparison)) |> | |
filter(padj <= .1) |> | |
ggplot() + | |
geom_vline(xintercept = log2(.9 * (2 / 3)), color = pal_d3()(3)[1], linetype="dotted", size=.3) + | |
geom_vline(xintercept = log2(1.1 * (2 / 3)), color = pal_d3()(3)[1], linetype="dotted", size=.3) + | |
geom_vline(xintercept = log2(.85 * (2 / 3)), color = pal_d3()(3)[3], linetype="dotted", size=.3) + | |
geom_vline(xintercept = log2(1.15 * (2 / 3)), color = pal_d3()(3)[3], linetype="dotted", size=.3) + | |
annotate("rect", xmin = log2(.9 * (2 / 3)), xmax = log2(1.1 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[1], alpha = .3) + | |
annotate("rect", xmin = log2(.85 * (2 / 3)), xmax = log2(.9 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[3], alpha = .3) + | |
annotate("rect", xmin = log2(1.1 * (2 / 3)), xmax = log2(1.15 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[3], alpha = .3) + | |
geom_density(aes(x = log2FoldChange, color = comparison)) + | |
scale_color_manual(limits=c("+dox", "combined", "w/d"), values=pal_d3()(5)[c(4,2,5)]) + | |
geom_vline(xintercept = log2(2 / 3), linetype = "dashed", color = "black", size = .3) + | |
geom_vline(xintercept = 0, linetype = "solid", color = "black", size = .3) + | |
annotate("text", x = -1.2, y = 2, label = "log2(2/3)", color = "black", size = 2) + | |
coord_cartesian(xlim = c(-1.5, 1.5)) + | |
labs(y = "Density", x = "log2(FC)", title = "HSA21 Significant Log2(FC)") + | |
theme(legend.position = c(.8, .7)) | |
batch_corrected_vsd_df |> | |
pivot_longer( | |
cols = -ensembl_id, | |
names_to = "sample_id", | |
values_to = "vst_count" | |
) |> | |
left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |> | |
dplyr::filter(chromosome_name == "21") |> | |
left_join(sample_sheet_df, by="sample_id") |> | |
mutate(sample_group = case_when(genotype %in% c("D21", "T21") ~ genotype, | |
genotype == "T21XIST" ~ paste(dox_status, timepoint, sep="_"))) |> | |
group_by(ensembl_id, sample_group) |> | |
summarise(mean_vst = mean(vst_count)) |> | |
ungroup() |> | |
pivot_wider(id_cols = "ensembl_id", names_from="sample_group", values_from = "mean_vst") |> | |
pivot_longer(cols = -c(ensembl_id, T21), names_to = "sample_group", values_to = "mean_vst") |> | |
mutate(diff_from_t21 = mean_vst - T21, | |
dox_status = str_extract(sample_group, "D21|no|yes|withdrawal"), | |
timepoint = str_extract(sample_group, "3wk|6wk|9wk|14wk|17wk|20wk")) |> | |
filter(dox_status == "withdrawal") |> | |
mutate(dox_group = case_when( | |
str_detect(timepoint, "14wk|17wk|20wk") ~ "2m", | |
T ~ "3w"), | |
wd_group = case_when( | |
timepoint %in% c("6wk", "17wk") ~ "3wk", | |
timepoint %in% c("9wk", "20wk") ~ "6wk" | |
)) |> | |
ggplot() + | |
geom_density(aes(x = diff_from_t21, color = wd_group, linetype = dox_group), alpha=.7) + | |
geom_vline(xintercept = 0, linetype = "solid", color = "black", size = .3) + | |
coord_cartesian(xlim = c(-1, 1)) + | |
scale_color_manual(values=pal_simpsons()(5)[c(2,5)]) + | |
scale_linetype_manual(limits=c("3w", "2m"), values=c("solid", "dashed")) + | |
labs(y = "Density", x = "VST Diff", title = "HSA21 Mean VST diff from T21") + | |
theme(legend.position = c(.8, .57)) | |
ggsave(sprintf("%s/hsa21_withdrawal_vst_diff.pdf", fig_output_dir), width = 3.25, height = 2) | |
# only looking at allelic escapees | |
potential_escapees <- c("ENSG00000142166.13", "ENSG00000142192.21", "ENSG00000154734.16", "ENSG00000157557.13", "ENSG00000159110.21", "ENSG00000182362.14", "ENSG00000243927.6", "ENSG00000249624.10") | |
ggplot( | |
chr21_changes_df |> | |
mutate(comparison = factor(case_when( | |
comparison == "D21_vs_T21" ~ "D21", | |
comparison == "dT21XIST_vs_T21" ~ "+dox", | |
comparison == "wdT21XIST_vs_T21" ~ "w/d", | |
T ~ comparison), levels = c("D21", "+dox", "w/d"))), | |
aes(x = fct_reorder(ensembl_id, start_position), y = log2FoldChange, color = color_label, size = baseMean) | |
) + | |
facet_grid(rows = vars(comparison)) + | |
geom_point(data = chr21_changes_df |> | |
mutate(comparison = factor(case_when( | |
comparison == "D21_vs_T21" ~ "D21", | |
comparison == "dT21XIST_vs_T21" ~ "+dox", | |
comparison == "wdT21XIST_vs_T21" ~ "w/d", | |
T ~ comparison), levels = c("D21", "+dox", "w/d"))), color="white") + | |
geom_point(data = chr21_changes_df |> | |
mutate(comparison = factor(case_when( | |
comparison == "D21_vs_T21" ~ "D21", | |
comparison == "dT21XIST_vs_T21" ~ "+dox", | |
comparison == "wdT21XIST_vs_T21" ~ "w/d", | |
T ~ comparison), levels = c("D21", "+dox", "w/d"))) |> | |
filter(ensembl_id %in% potential_escapees), alpha=.5) + | |
scale_color_manual( | |
values = c(pal_d3()(3)[c(1, 3, 2)], pal_uchicago()(2)[2]), limits = c("padj≤.1 & FC=(2/3)±.1", "padj≤.1 & FC=(2/3)±.15", "padj≤.1", "padj>.1"), | |
guide="none" | |
) + | |
scale_size_area(limits = c(NA, 10000), oob = scales::squish, guide = "none", max_size = 4) + | |
labs( | |
title = "chr21 Dosage Correction - Allelic Escapees", | |
color = "Gene Category", | |
size = "Base Mean Expression", | |
x = "chr21 Genes (ordered by position)", | |
y = "log2(FC)" | |
) + | |
ggrepel::geom_text_repel(data = chr21_changes_df |> | |
mutate(comparison = factor(case_when( | |
comparison == "D21_vs_T21" ~ "D21", | |
comparison == "dT21XIST_vs_T21" ~ "+dox", | |
comparison == "wdT21XIST_vs_T21" ~ "w/d", | |
T ~ comparison), levels = c("D21", "+dox", "w/d")))|> | |
filter(ensembl_id %in% potential_escapees), | |
mapping=aes(label=hgnc_symbol), size=2, | |
min.segment.length=0) + | |
geom_hline(yintercept = log2(2 / 3), linetype = "dashed", size = .3) + | |
geom_hline(yintercept = 0, linetype = "solid") + | |
geom_vline(xintercept = "ENSG00000157540.22", linetype = "dotdash", size = .3) + | |
# guides(color=guide_legend(ncol=2, label.position = "right", label.hjust = .1)) + | |
theme( | |
axis.text.x = element_blank(), | |
axis.ticks.x = element_blank(), | |
legend.position = "top", | |
# legend.spacing.x = unit(3, "mm"), | |
# legend.margin = margin(3,3,3,3, "mm"), | |
legend.box.background = element_rect(color = "black", size=1) | |
) + | |
coord_cartesian(ylim = c(-1.5, 1.5)) | |
ggsave(sprintf("%s/hsa21_dosage_correction_allelic_escapees.pdf", fig_output_dir), width = 3.5, height = 2.28) | |
# ggsave(sprintf("%s/hsa21_dosage_correction_allelic_escapees.png", fig_output_dir), width = 3.25, height = 1.97, dpi=300) | |
chr21_changes_comparison_cols <- purrr::cross(list( | |
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"), | |
chr21_changes_comparisons | |
)) |> map(lift(paste0)) |> unlist() | |
combined_results_df |> | |
# left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |> | |
filter(chromosome_name %in% c( | |
"1", "2", "3", "4", "5", "6", "7", | |
"8", "9", "10", "11", "12", "13", | |
"14", "15", "16", "17", "18", "19", | |
"20", "21", "22", "X", "Y" | |
)) |> | |
pivot_longer( | |
# cols = c(ends_with("nonrtta_d21_vs_t21"), ends_with("rtta_wd_vs_nd"), ends_with("rtta_wd_vs_nd_powered"), ends_with("_rtta_d_vs_nd"), ends_with("dox_t21xist_vs_1985")), | |
cols = c(starts_with(c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"))), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
mutate(category = factor(case_when( | |
padj < .1 & log2FoldChange > 0 ~ "gain", | |
padj < .1 & log2FoldChange < 0 ~ "loss", | |
T ~ "nochange" | |
), levels = c("gain", "nochange", "loss"))) |> | |
dplyr::count(chromosome_name, category, comparison) |> | |
filter(comparison %in% c("dwdT21SIT_vs_T21", "T21_vs_D21")) |> | |
mutate(comparison = case_when(comparison == "dwdT21SIT_vs_T21" ~ "T21XIST/T21", | |
comparison == "T21_vs_D21" ~ "T21/D21"), | |
comparison= factor(comparison, levels = c("T21/D21", "T21XIST/T21"))) |> | |
ggplot() + | |
geom_bar(aes( | |
x = factor( | |
chromosome_name, | |
levels = c( | |
"1", "2", "3", "4", "5", "6", "7", | |
"8", "9", "10", "11", "12", "13", | |
"14", "15", "16", "17", "18", "19", | |
"20", "21", "22", "X", "Y" | |
) | |
), | |
y = n, | |
fill = category | |
), | |
stat = "identity", | |
position = position_fill() | |
) + | |
# facet_grid(rows = vars(factor(comparison, levels = c("nonrtta_d21_vs_t21", "rtta_wd_vs_nd", "rtta_wd_vs_nd_powered", "rtta_d_vs_nd", "dox_t21xist_vs_1985")))) + | |
facet_grid(rows = vars(comparison)) + | |
geom_hline(yintercept = .5, linetype = "dashed", size = .3) + | |
scale_fill_uchicago( | |
limits = c("gain", "nochange", "loss"), | |
labels = c("Inc", "~0", "Dec") | |
) + | |
labs( | |
title = "Global Changes in Gene Expression - iPSCs", | |
fill = "Direction", | |
x = "Chromosome", | |
y = "% Genes" | |
) + | |
theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1, size=8), | |
plot.margin = margin(t=.5,0,0,0, "mm"), | |
legend.margin = margin(l=-3, unit = "mm"), | |
axis.title.x = element_blank()) | |
ggsave(sprintf("%s/global_changes_in_gene_expression.pdf", fig_output_dir), width = 4, height = 2.3) | |
combined_results_df |> | |
# left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |> | |
filter(chromosome_name %in% c( | |
"1", "2", "3", "4", "5", "6", "7", | |
"8", "9", "10", "11", "12", "13", | |
"14", "15", "16", "17", "18", "19", | |
"20", "21", "22", "X", "Y" | |
)) |> | |
pivot_longer( | |
# cols = c(ends_with("nonrtta_d21_vs_t21"), ends_with("rtta_wd_vs_nd"), ends_with("rtta_wd_vs_nd_powered"), ends_with("_rtta_d_vs_nd"), ends_with("dox_t21xist_vs_1985")), | |
cols = c(starts_with(c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"))), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
mutate(category = factor(case_when( | |
padj < .1 & log2FoldChange > 0 ~ "gain", | |
padj < .1 & log2FoldChange < 0 ~ "loss", | |
T ~ "nochange" | |
), levels = c("gain", "nochange", "loss"))) |> | |
dplyr::count(chromosome_name, category, comparison) |> | |
filter(comparison %in% c("dT21XIST_vs_T21")) |> | |
mutate(comparison = case_when(comparison == "dT21XIST_vs_T21" ~ "+dox", | |
comparison == "wdT21XIST_vs_T21" ~ "w/d")) |> | |
ggplot() + | |
geom_bar(aes( | |
x = factor( | |
chromosome_name, | |
levels = c( | |
"1", "2", "3", "4", "5", "6", "7", | |
"8", "9", "10", "11", "12", "13", | |
"14", "15", "16", "17", "18", "19", | |
"20", "21", "22", "X", "Y" | |
) | |
), | |
y = n, | |
fill = category | |
), | |
stat = "identity", | |
position = position_fill() | |
) + | |
# facet_grid(rows = vars(factor(comparison, levels = c("nonrtta_d21_vs_t21", "rtta_wd_vs_nd", "rtta_wd_vs_nd_powered", "rtta_d_vs_nd", "dox_t21xist_vs_1985")))) + | |
# facet_grid(rows = vars(comparison)) + | |
geom_hline(yintercept = .5, linetype = "dashed", size = .3) + | |
scale_fill_uchicago( | |
limits = c("gain", "nochange", "loss"), | |
labels = c("Inc", "~0", "Dec") | |
) + | |
labs( | |
title = "Global Changes in Gene Expression", | |
fill = "Direction", | |
x = "Chromosome", | |
y = "% Genes" | |
) + | |
theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1, size=8), | |
plot.margin = margin(t=.5,0,0,0, "mm"), | |
legend.margin = margin(l=-3, unit = "mm"), | |
axis.title.x = element_blank()) | |
ggsave(sprintf("%s/global_changes_in_gene_expression.pdf", fig_output_dir), width = 4, height = 1.3) | |
## Diff. Expression Overlaps | |
combined_results_df |> | |
# left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |> | |
dplyr::filter(chromosome_name == "21") |> | |
pivot_longer( | |
cols = c(ends_with(purrr::cross(list( | |
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"), | |
c("neural_t21xistdoxcombined_t21", | |
"neural_t21xistdox_t21", | |
"neural_t21xistwd_t21") | |
)) |> map(lift(paste0)) |> unlist())), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
mutate(comparison = case_when(comparison == "neural_t21xistdox_t21" ~ "+dox", | |
comparison == "neural_t21xistwd_t21" ~ "w/d", | |
comparison == "neural_t21xistdoxcombined_t21" ~ "combined", | |
T ~ comparison)) |> | |
filter(baseMean >= 1000) |> | |
ggplot() + | |
geom_vline(xintercept = log2(.9 * (2 / 3)), color = pal_d3()(3)[1], linetype="dotted", size=.3) + | |
geom_vline(xintercept = log2(1.1 * (2 / 3)), color = pal_d3()(3)[1], linetype="dotted", size=.3) + | |
geom_vline(xintercept = log2(.85 * (2 / 3)), color = pal_d3()(3)[3], linetype="dotted", size=.3) + | |
geom_vline(xintercept = log2(1.15 * (2 / 3)), color = pal_d3()(3)[3], linetype="dotted", size=.3) + | |
annotate("rect", xmin = log2(.9 * (2 / 3)), xmax = log2(1.1 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[1], alpha = .3) + | |
annotate("rect", xmin = log2(.85 * (2 / 3)), xmax = log2(.9 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[3], alpha = .3) + | |
annotate("rect", xmin = log2(1.1 * (2 / 3)), xmax = log2(1.15 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[3], alpha = .3) + | |
geom_density(aes(x = log2FoldChange, color = comparison)) + | |
scale_color_manual(limits=c("+dox", "combined", "w/d"), values=pal_d3()(5)[c(4,2,5)]) + | |
geom_vline(xintercept = log2(2 / 3), linetype = "dashed", color = "black", size = .3) + | |
geom_vline(xintercept = 0, linetype = "solid", color = "black", size = .3) + | |
annotate("text", x = -1, y = .75, label = "log2(2/3)", color = "black", size = 2) + | |
coord_cartesian(xlim = c(-1.5, 1.5)) + | |
labs(y = "Density", x = "log2(FC)", title = "Neural chr21 Log2(FC) (baseMean >= 1000)") + | |
theme(legend.position = c(.8, .7)) | |
ggsave(sprintf("%s/chr21_dist_neural_1000.pdf", fig_output_dir), width = 3.5, height = 1.5) | |
combined_results_df |> | |
# left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |> | |
dplyr::filter(chromosome_name == "21") |> | |
pivot_longer( | |
cols = c(ends_with(purrr::cross(list( | |
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"), | |
c("neural_d21_t21xistdoxcombined", | |
"neural_d21_t21xistdox", | |
"neural_d21_t21xistwd") | |
)) |> map(lift(paste0)) |> unlist())), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
mutate(comparison = case_when(comparison == "neural_d21_t21xistdox" ~ "+dox", | |
comparison == "neural_d21_t21xistwd" ~ "w/d", | |
comparison == "neural_d21_t21xistdoxcombined" ~ "combined", | |
T ~ comparison)) |> | |
filter(baseMean >= 1000) |> | |
ggplot() + | |
geom_density(aes(x = -1 * log2FoldChange, color = comparison)) + | |
scale_color_manual(limits=c("+dox", "combined", "w/d"), values=pal_d3()(5)[c(4,2,5)]) + | |
geom_vline(xintercept = 0, linetype = "solid", color = "black", size = .3) + | |
coord_cartesian(xlim = c(-1.5, 1.5)) + | |
labs(y = "Density", x = "log2(FC)", title = "chr21 Log2(FC) from D21\nNeural (baseMean >= 1000)") + | |
theme(legend.position = c(.8, .7)) | |
ggsave(sprintf("%s/chr21_dist_neural_d21_1000.pdf", fig_output_dir), width = 3.5, height = 2) | |
# batch_corrected_vsd_df_neural <- read_csv(paste0(neural_output_dir, "/tables/batch_corrected_vsd_salmon_df.csv.gz")) | |
batch_corrected_vsd_df_neural <- read_csv(paste0(neural_output_dir, "/tables/vsd_salmon_df.csv.gz")) | |
sample_sheet_df_neural <- read_csv(paste0(neural_output_dir, "/tables/sample_sheet_df.csv.gz")) | |
write_csv(sample_sheet_df |> | |
dplyr::select(sample_id, | |
timepoint, | |
dox_status, | |
genotype, | |
cell_line | |
), paste0(table_output_dir, "/bulk_ipsc_rnaseq_samples.csv")) | |
write_csv(sample_sheet_df_neural |> | |
dplyr::select(sample_id, | |
cell_line, | |
neural_diff, | |
genotype | |
), paste0(table_output_dir, "/bulk_neural_rnaseq_samples.csv")) | |
chr21_rownorm_vsd <- batch_corrected_vsd_df_neural |> | |
pivot_longer(cols = -ensembl_id, names_to = "sample_id", values_to = "vst_count") |> | |
left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |> | |
left_join(sample_sheet_df_neural, by = c("sample_id")) |> | |
filter(chromosome_name == "21") |> | |
group_by(ensembl_id) |> | |
mutate( | |
row_norm_count = vst_count - mean(cur_data() %>% filter(cell_line == "198_5") %>% pull(vst_count)), | |
mean_1985_count = mean(cur_data() %>% filter(cell_line == "198_5") %>% pull(vst_count)) | |
) |> | |
ungroup() | |
# complexheatmap for neurons | |
chr21_rownorm_hm_df <- chr21_rownorm_vsd |> | |
filter( # mean_1985_count>8 & | |
!(gene_biotype %in% c("rRNA", "TEC"))) |> | |
mutate( | |
gene_biotype = factor(case_when( | |
gene_biotype == "protein_coding" ~ "Protein\nCoding", | |
gene_biotype == "processed_pseudogene" ~ "Pseudo-\ngene", | |
gene_biotype == "transcribed_processed_pseudogene" ~ "Pseudo-\ngene", | |
T ~ gene_biotype), | |
levels = c("Protein\nCoding", "lncRNA", "Pseudo-\ngene"))) | |
neural_rownorm_mat <- chr21_rownorm_hm_df |> | |
pivot_wider(id_cols = ensembl_id, | |
names_from = sample_id, | |
values_from = row_norm_count) |> | |
column_to_rownames(var="ensembl_id") | |
t21count_col_fun <- circlize::colorRamp2(c(8,16), c("#ffffff", "#7105E0")) | |
gene_annotation <- rowAnnotation( | |
`Mean T21 Count` = | |
anno_simple( | |
x=chr21_rownorm_hm_df |> | |
dplyr::select(ensembl_id, mean_1985_count) |> | |
dplyr::distinct() |> | |
deframe(), | |
col = t21count_col_fun, | |
simple_anno_size = unit(3, "mm")), | |
annotation_name_gp= gpar(fontsize = 7)) | |
lgd_t21count = Legend(title = "Mean T21 Count", col_fun = t21count_col_fun, at = c(8,10,12,14,16), | |
labels = c(8,10,12,14,16), | |
direction = "horizontal", | |
labels_gp=gpar(fontsize=7), | |
title_gp=gpar(fontsize=7), | |
grid_height = unit(2, "mm")) | |
sample_annotation <- columnAnnotation( | |
`Cell Line` = chr21_rownorm_hm_df |> | |
dplyr::select(sample_id, `cell_line`) |> | |
dplyr::distinct() |> | |
mutate(`cell_line` = case_when( | |
cell_line == "198_1" ~ "198-1", | |
cell_line == "198_2" ~ "198-2", | |
cell_line == "198_5" ~ "198-5", | |
cell_line == "C1_8" ~ "T21XIST c1", | |
cell_line == "C4_8" ~ "T21XIST c4", | |
cell_line == "C7_9" ~ "T21XIST c7")) |> | |
column_to_rownames(var="sample_id") |> | |
deframe(), | |
`Diff Round` = chr21_rownorm_hm_df |> | |
dplyr::select(sample_id, `neural_diff`) |> | |
dplyr::distinct() |> | |
column_to_rownames(var="sample_id") |> | |
deframe() |> | |
factor(levels=c("ND6", "ND7", "ND9", "ND11", "ND12", "ND18", "ND20")), | |
simple_anno_size = unit(3, "mm"), | |
annotation_legend_param = list(labels_gp=gpar(fontsize=7), title_gp=gpar(fontsize=7), grid_height = unit(2, "mm"), grid_width = unit(2, "mm")), | |
col = list(`Cell Line` = c("198-1"="#1697a6", "198-2"="#0e606b", "198-5"="#ffc24b", | |
"T21XIST c1"="#fff4f1", "T21XIST c4"="#ffb3ae", "T21XIST c7"="#f47068"), | |
`Diff Round` = c("ND6"="#ffadad", "ND7"="#ffd6a5", "ND9"="#fdffb6", | |
"ND11"="#caffbf", "ND12"="#9bf6ff", "ND18"="#a0c4ff", | |
"ND20"="#bdb2ff")), | |
annotation_name_gp= gpar(fontsize = 7) | |
) | |
neural_hm <- Heatmap(neural_rownorm_mat, | |
col=circlize::colorRamp2(c(-1, 0, 1), c("#3784AF", "#f7f7f7", "#D34B16")), | |
# col=circlize::colorRamp2(c(-1, 0, 1), c("#0571B0", "#f7f7f7", "#CA0020")), | |
name="Row Norm VST Count", | |
border=T, | |
cluster_rows = F, | |
row_order = chr21_rownorm_hm_df |> dplyr::select(ensembl_id, mean_1985_count) |> deframe() |> sort() |> names() |> unique() |> rev(), | |
row_split = chr21_rownorm_hm_df |> | |
dplyr::select(ensembl_id, gene_biotype) |> | |
distinct() |> | |
deframe(), | |
row_title_side = "right", | |
row_gap = unit(1, "mm"), | |
show_row_names = F, | |
row_title_gp = gpar(fontsize = 7), | |
row_title_rot = 45, | |
column_title_gp = gpar(fontsize = 7), | |
column_names_gp = gpar(fontsize = 7), | |
left_annotation = gene_annotation, | |
show_column_dend=F, | |
column_split = chr21_rownorm_hm_df |> | |
mutate(split_label = case_when( | |
genotype == "T21XIST" & !is.na(description.y) & description.y != "wd" ~ "+dox", | |
genotype == "T21XIST" & dox_status == "no" ~ "-dox", | |
genotype == "T21XIST" & description.y == "wd" ~ "w/d", | |
T ~ genotype)) |> | |
dplyr::select(sample_id, split_label) |> | |
distinct() |> | |
deframe() |> | |
factor(levels=c("T21", "-dox", "+dox", "w/d", "D21")), | |
cluster_column_slices=F, | |
column_gap = unit(1, "mm"), | |
show_column_names = F, | |
bottom_annotation = sample_annotation, | |
heatmap_legend_param = list(direction = "horizontal", | |
labels_gp=gpar(fontsize=7), | |
title_gp=gpar(fontsize=7), | |
grid_height = unit(2, "mm"))) | |
pdf(sprintf("%s/expression_relative_to_1985_neural.pdf", fig_output_dir), width = 3.5, height = 4.7) | |
draw(neural_hm, annotation_legend_list = list(lgd_t21count), | |
merge_legend=T, heatmap_legend_side = "bottom", annotation_legend_side = "bottom") | |
dev.off() | |
chr21_changes_comparison_cols <- purrr::cross(list( | |
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"), | |
c("neural_t21_d21", "neural_t21xistwd_t21", "neural_t21xistdox_t21") | |
)) |> map(lift(paste0)) |> unlist() | |
chr21_changes_df <- combined_results_df |> | |
dplyr::filter(chromosome_name == "21") |> | |
pivot_longer( | |
cols = c(ends_with(chr21_changes_comparison_cols)), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
dplyr::mutate( | |
# log2FoldChange = log2FoldChange_dox_t21xist_vs_1985, | |
# padj = padj_dox_t21xist_vs_1985, | |
# baseMean = baseMean_dox_t21xist_vs_1985, | |
color_label = case_when( | |
padj <= .1 & dplyr::between(log2FoldChange, log2((3 / 2) - .1), log2((3 / 2) + .1)) ~ "padj≤.1 & FC=(3/2)±.1", | |
padj <= .1 & dplyr::between(log2FoldChange, log2((3 / 2) - .15), log2((3 / 2)+.15)) ~ "padj≤.1 & FC=(3/2)±.15", | |
padj <= .1 ~ "padj≤.1", | |
T ~ "padj>.1" | |
) | |
) | |
ggplot( | |
chr21_changes_df |> | |
mutate(comparison = case_when(comparison == "dT21XIST_vs_T21" ~ "+dox", | |
comparison == "wdT21XIST_vs_T21" ~ "w/d", | |
T ~ comparison)), | |
aes(x = fct_reorder(ensembl_id, start_position), y = log2FoldChange, color = color_label, size = baseMean) | |
) + | |
facet_grid(rows = vars(comparison)) + | |
scale_color_manual( | |
values = c(pal_d3()(3)[c(1, 3, 2)], pal_uchicago()(2)[2]), limits = c("padj≤.1 & FC=(3/2)±.1", "padj≤.1 & FC=(3/2)±.15", "padj≤.1", "padj>.1") | |
) + | |
scale_size_area(limits = c(NA, 10000), oob = scales::squish, guide = "none", max_size = 4) + | |
labs( | |
title = "Neural HSA21 Excess Dosage", | |
color = "Gene Category", | |
size = "Base Mean Expression", | |
x = "HSA21 Genes", | |
y = "log2(FC)" | |
) + | |
geom_hline(yintercept = log2(3 / 2), linetype = "dashed", size = .3) + | |
geom_hline(yintercept = log2(2 / 3), linetype = "dashed", size = .3) + | |
geom_hline(yintercept = 0, linetype = "solid") + | |
geom_vline(xintercept = "ENSG00000157540.22", linetype = "dotdash", size = .3) + | |
geom_point(alpha = .5) + | |
guides(color=guide_legend(ncol=2, label.position = "right", label.hjust = .1)) + | |
theme( | |
axis.text.x = element_blank(), | |
axis.ticks.x = element_blank(), | |
legend.position = "top", | |
# legend.spacing.x = unit(3, "mm"), | |
# legend.margin = margin(3,3,3,3, "mm"), | |
legend.box.background = element_rect(color = "black", size=.5) | |
) + | |
coord_cartesian(ylim = c(-1.5, 1.5)) | |
ggsave(sprintf("%s/hsa21_dosage_correction_neural.pdf", fig_output_dir), width = 3.25, height = 2) | |
chr21_changes_df |> | |
# mutate(comparison = case_when(comparison == "dT21XIST_vs_T21" ~ "+dox", | |
# comparison == "wdT21XIST_vs_T21" ~ "w/d", | |
# T ~ comparison)) |> | |
ggplot(aes(x=log2FoldChange, color=comparison)) + | |
# facet_grid(rows=vars(comparison)) + | |
geom_density() + | |
coord_cartesian(xlim=c(-2, 2)) + | |
geom_vline(xintercept = 0) + | |
geom_vline(xintercept = log2(3/2), linetype="dashed") + | |
geom_vline(xintercept = log2(2/3), linetype="dashed") + | |
labs(title="Log2(FC) Neural T21/D21", | |
x="log2(FC)") | |
ggsave(sprintf("%s/log2fc_dist_neural.pdf", fig_output_dir), width = 3, height = 1.7) | |
vectorized_sumlog <- function(x, y) { | |
if (length(x) != length(y)) { | |
stop("lengths of lists must be the same") | |
} | |
combined_pvals <- c() | |
for (i in 1:length(x)) { | |
combined_pvals <- c(combined_pvals, (metap::sumlog(c(x[i], y[i])))$p) | |
} | |
return(combined_pvals) | |
} | |
compared_to_log2FC <- sprintf("log2FoldChange_%s", diff_expr_compare_to) | |
compared_to_padj <- sprintf("padj_%s", diff_expr_compare_to) | |
# compared_from_suffixes <- sprintf("%s_%s", c("baseMean", "log2FoldChange", "pvalue", "padj", "stat"), diff_expr_comparisons) | |
diff_expr_comparisons <- c("dox_effect", "dwdT21SIT_vs_T21") | |
compared_from_suffixes <- purrr::cross(list( | |
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"), | |
diff_expr_comparisons | |
)) |> map(lift(paste0)) |> unlist() | |
combined_results_df |> | |
filter(hgnc_symbol != "XIST") |> | |
pivot_longer( | |
cols = c(ends_with(compared_from_suffixes)), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
dplyr::mutate(comparison = case_when(comparison == "dox_effect" ~ "Dox Effect", | |
comparison == "dwdT21SIT_vs_T21" ~ "XIST Correction")) |> | |
dplyr::mutate(combined_pval := vectorized_sumlog(!!as.name(compared_to_padj), padj)) %>% | |
# filter(combined_pval <= diff_expr_combined_padj_cutoff) |> | |
ggplot(aes(x = log2FoldChange, y = !!as.name(compared_to_log2FC))) + | |
facet_grid(cols = vars(comparison)) + | |
scale_color_viridis_c(limits = c(1, 5), oob = scales::squish) + | |
geom_hline(yintercept = 0, size = .3) + | |
geom_vline(xintercept = 0, size = .3) + | |
geom_point(data = . %>% filter(combined_pval > .1),color="lightgrey", size = 1.5, alpha = .5) + | |
geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) + | |
# geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) + | |
# geom_smooth(method = "lm", color = "black", size = .3, linetype = "dashed") + | |
geom_smooth(data = . %>% filter(combined_pval <= .1), method = "lm", color = "black", size = .3, linetype = "dashed") + | |
coord_cartesian(xlim = c(-5, 5), ylim = c(-5, 5)) + | |
# ggpubr::stat_cor(method = "spearman", label.x = -4, label.y = 4, size = 3) + | |
ggpubr::stat_cor(data = . %>% filter(combined_pval <= .1), method = "spearman", label.x = -4, label.y = 4, size = 3) + | |
labs( | |
y = "log2FC(T21 vs. D21)", | |
x = "log2FC", | |
title = "Correlation with T21-driven Gene Expression", | |
color = "-log10\n(c.padj)" | |
) + | |
theme( | |
legend.position = "top", | |
legend.margin = margin(l = -5, b=-3, unit = "mm"), | |
legend.title = element_text(margin = margin(r = 2, unit = "mm")), | |
legend.key.height = unit(3, "mm"), | |
legend.key.width = unit(3.5, "mm") | |
) | |
# ggsave(sprintf("%s/rescue_of_t21_dysregulated_gene_expression.pdf", fig_output_dir), width=3, height = 4.125) | |
ggsave(sprintf("%s/rescue_of_t21_ipsc.pdf", fig_output_dir), width=4.1, height = 3.3) | |
ggsave(sprintf("%s/rescue_of_t21_ipsc_sigcoor.pdf", fig_output_dir), width=4.1, height = 3.3) | |
# ggsave(sprintf("%s/rescue_of_t21_dysregulated_gene_expression_horizontal.pdf", fig_output_dir), width=4, height = 4.125-.5) | |
# ggsave(sprintf("%s/rescue_of_t21_dysregulated_gene_expression_horizontal_sigcoor.pdf", fig_output_dir), width=4, height = 4.125-.5) | |
# ggsave(sprintf("%s/rescue_of_t21_dysregulated_gene_expression_vertical.pdf", fig_output_dir), width=3, height = 4.125) | |
# ggsave(sprintf("%s/rescue_of_t21_dysregulated_gene_expression_vertical_sigcoor.pdf", fig_output_dir), width=3, height = 4.125) | |
compared_to_log2FC <- sprintf("log2FoldChange_%s", diff_expr_compare_to) | |
compared_to_padj <- sprintf("padj_%s", diff_expr_compare_to) | |
# compared_from_suffixes <- sprintf("%s_%s", c("baseMean", "log2FoldChange", "pvalue", "padj", "stat"), diff_expr_comparisons) | |
diff_expr_comparisons <- c("dox_effect", "dwdT21SIT_vs_T21", "dwdT21SIT_vs_D21") | |
compared_from_suffixes <- purrr::cross(list( | |
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"), | |
diff_expr_comparisons | |
)) |> map(lift(paste0)) |> unlist() | |
combined_results_df |> | |
filter(hgnc_symbol != "XIST" & chromosome_name != "21") |> | |
pivot_longer( | |
cols = c(ends_with(compared_from_suffixes)), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
dplyr::mutate(comparison = factor(case_when(comparison == "dox_effect" ~ "Dox Effect", | |
comparison == "dwdT21SIT_vs_T21" ~ "T21XIST/T21", | |
comparison == "dwdT21SIT_vs_D21" ~ "T21XIST/D21"), | |
levels = c("Dox Effect", "T21XIST/T21","T21XIST/D21"))) |> | |
dplyr::mutate(combined_pval := vectorized_sumlog(!!as.name(compared_to_padj), padj)) %>% | |
# filter(combined_pval <= diff_expr_combined_padj_cutoff) |> | |
ggplot(aes(x = log2FoldChange, y = !!as.name(compared_to_log2FC))) + | |
facet_grid(cols = vars(comparison)) + | |
scale_color_viridis_c(limits = c(1, 5), oob = scales::squish) + | |
geom_hline(yintercept = 0, size = .3) + | |
geom_vline(xintercept = 0, size = .3) + | |
geom_point(data = . %>% filter(combined_pval > .1),color="lightgrey", size = 1.5, alpha = .5) + | |
geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) + | |
# geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) + | |
geom_smooth(data = . %>% filter(combined_pval <= .1 & hgnc_symbol != "XIST" & chromosome_name != "21"), method = "lm", color = "black", size = .3, linetype = "dashed") + | |
# geom_smooth(method = "lm", color = "black", size = .3, linetype = "dashed") + | |
coord_cartesian(xlim = c(-5, 5), ylim = c(-5, 5)) + | |
ggpubr::stat_cor(data = . %>% filter(combined_pval <= .1 & hgnc_symbol != "XIST" & chromosome_name != "21"), method = "spearman", label.x = -4, label.y = 4, size = 3) + | |
# ggpubr::stat_cor(method = "spearman", label.x = -4, label.y = 4, size = 3) + | |
labs( | |
y = "log2FC(T21 vs. D21)", | |
x = "log2FC", | |
title = "Correlation with T21-driven Gene Expression\nexcluding Chr21", | |
color = "-log10\n(c.padj)" | |
) + | |
theme( | |
legend.position = "top", | |
legend.margin = margin(l = -5, b=-3, unit = "mm"), | |
legend.title = element_text(margin = margin(r = 2, unit = "mm")), | |
legend.key.height = unit(3, "mm"), | |
legend.key.width = unit(3.5, "mm") | |
) | |
ggsave(sprintf("%s/rescue_of_t21_ipsc_nochr21.pdf", fig_output_dir), width=3.55, height = 3.3) | |
ggsave(sprintf("%s/rescue_of_t21_ipsc_nochr21_witht21xistd21.pdf", fig_output_dir), width=3.55, height = 3.3) | |
ggsave(sprintf("%s/rescue_of_t21_ipsc_nochr21_witht21xistd21_sigcoor.pdf", fig_output_dir), width=4.5, height = 2.7) | |
compared_to_log2FC <- sprintf("log2FoldChange_%s", diff_expr_compare_to) | |
compared_to_padj <- sprintf("padj_%s", diff_expr_compare_to) | |
# compared_from_suffixes <- sprintf("%s_%s", c("baseMean", "log2FoldChange", "pvalue", "padj", "stat"), diff_expr_comparisons) | |
diff_expr_comparisons <- c("neural_T21_D21") | |
compared_from_suffixes <- purrr::cross(list( | |
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"), | |
diff_expr_comparisons | |
)) |> map(lift(paste0)) |> unlist() | |
combined_results_df |> | |
filter(hgnc_symbol != "XIST") |> | |
pivot_longer( | |
cols = c(ends_with(compared_from_suffixes)), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
dplyr::mutate(comparison = case_when(comparison == "neural_T21_D21" ~ "Neural T21/D21")) |> | |
dplyr::mutate(combined_pval := vectorized_sumlog(!!as.name(compared_to_padj), padj)) %>% | |
# filter(combined_pval <= diff_expr_combined_padj_cutoff) |> | |
ggplot(aes(x = log2FoldChange, y = !!as.name(compared_to_log2FC))) + | |
# facet_grid(cols = vars(comparison)) + | |
scale_color_viridis_c(limits = c(1, 5), oob = scales::squish) + | |
geom_hline(yintercept = 0, size = .3) + | |
geom_vline(xintercept = 0, size = .3) + | |
geom_point(data = . %>% filter(combined_pval > .1),color="lightgrey", size = 1.5, alpha = .5) + | |
geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) + | |
# geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) + | |
# geom_smooth( method = "lm", color = "black", size = .3, linetype = "dashed") + | |
geom_smooth(data = . %>% filter(combined_pval <= .1), method = "lm", color = "black", size = .3, linetype = "dashed") + | |
coord_cartesian(xlim = c(-5, 5), ylim = c(-5, 5)) + | |
# ggpubr::stat_cor( method = "spearman", label.x = -4, label.y = 4, size = 3) + | |
ggpubr::stat_cor(data = . %>% filter(combined_pval <= .1), method = "spearman", label.x = -4, label.y = 4, size = 3) + | |
labs( | |
y = "log2FC(iPSC T21 vs. D21)", | |
x = "log2FC(Neural T21 vs. D21)", | |
title = "iPSC and Neural T21 Overlap", | |
color = "-log10\n(c.padj)" | |
) + | |
theme( | |
legend.position = "top", | |
legend.margin = margin(l = -5, b=-3, unit = "mm"), | |
legend.title = element_text(margin = margin(r = 2, unit = "mm")), | |
legend.key.height = unit(3, "mm"), | |
legend.key.width = unit(3.5, "mm") | |
) | |
ggsave(sprintf("%s/neural_ipsc_gene_overlap.pdf", fig_output_dir), width=3, height = 3.1) | |
ggsave(sprintf("%s/neural_ipsc_gene_overlap_sigcoor.pdf", fig_output_dir), width=3, height = 3.1) | |
compared_to_log2FC <- sprintf("log2FoldChange_%s", "neural_t21_d21") | |
compared_to_padj <- sprintf("padj_%s", "neural_t21_d21") | |
# compared_from_suffixes <- sprintf("%s_%s", c("baseMean", "log2FoldChange", "pvalue", "padj", "stat"), diff_expr_comparisons) | |
diff_expr_comparisons <- c("neural_t21xistdoxcombined_t21") | |
compared_from_suffixes <- purrr::cross(list( | |
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"), | |
diff_expr_comparisons | |
)) |> map(lift(paste0)) |> unlist() | |
combined_results_df |> | |
filter(hgnc_symbol != "XIST") |> | |
pivot_longer( | |
cols = c(ends_with(compared_from_suffixes)), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
dplyr::mutate(comparison = case_when(comparison == "neural_t21xistdox_t21" ~ "+Dox", | |
comparison == "neural_t21xistwd_t21" ~ "w/d")) |> | |
dplyr::mutate(combined_pval := vectorized_sumlog(!!as.name(compared_to_padj), padj)) %>% | |
# filter(combined_pval <= diff_expr_combined_padj_cutoff) |> | |
ggplot(aes(x = log2FoldChange, y = !!as.name(compared_to_log2FC))) + | |
# facet_grid(cols = vars(comparison)) + | |
scale_color_viridis_c(limits = c(1, 5), oob = scales::squish) + | |
geom_hline(yintercept = 0, size = .3) + | |
geom_vline(xintercept = 0, size = .3) + | |
geom_point(data = . %>% filter(combined_pval > .1),color="lightgrey", size = 1.5, alpha = .5) + | |
geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) + | |
# geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) + | |
# geom_smooth(method = "lm", color = "black", size = .3, linetype = "dashed") + | |
geom_smooth(data = . %>% filter(combined_pval <= .1), method = "lm", color = "black", size = .3, linetype = "dashed") + | |
coord_cartesian(xlim = c(-10, 10), ylim = c(-10, 10)) + | |
# ggpubr::stat_cor(method = "spearman", label.x = -4, label.y = 4, size = 3) + | |
ggpubr::stat_cor(data = . %>% filter(combined_pval <= .1), method = "spearman", label.x = -9, label.y = 6, size = 3) + | |
labs( | |
y = "log2FC(Neural T21 vs. D21)", | |
x = "log2FC (Neural T21XIST vs T21)", | |
title = "Correlation with T21-driven Expression - Neural", | |
color = "-log10\n(c.padj)" | |
) + | |
theme( | |
legend.position = "top", | |
legend.margin = margin(l = -5, b=-3, unit = "mm"), | |
legend.title = element_text(margin = margin(r = 2, unit = "mm")), | |
legend.key.height = unit(3, "mm"), | |
legend.key.width = unit(3.5, "mm") | |
) | |
# ggsave(sprintf("%s/rescue_of_t21_neural.pdf", fig_output_dir), width=3, height = 3.3) | |
ggsave(sprintf("%s/rescue_of_t21_neural_sigcoor.pdf", fig_output_dir), width=3.5, height = 2.4) | |
combined_results_df |> | |
filter(hgnc_symbol != "XIST") |> | |
pivot_longer( | |
cols = c(ends_with(compared_from_suffixes)), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
dplyr::mutate(comparison = case_when(comparison == "dox_effect" ~ "Dox Effect", | |
comparison == "dwdT21SIT_vs_T21" ~ "XIST Correction")) |> | |
dplyr::mutate(combined_pval := vectorized_sumlog(!!as.name(compared_to_padj), padj)) %>% | |
# filter(combined_pval <= diff_expr_combined_padj_cutoff) |> | |
ggplot(aes(x = log2FoldChange, y = !!as.name(compared_to_log2FC))) + | |
facet_grid(cols = vars(comparison)) + | |
scale_color_viridis_c(limits = c(1, 5), oob = scales::squish) + | |
geom_hline(yintercept = 0, size = .3) + | |
geom_vline(xintercept = 0, size = .3) + | |
geom_point(data = . %>% filter(combined_pval > .1),color="lightgrey", size = 1.5, alpha = .5) + | |
geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) + | |
# geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) + | |
# geom_smooth(method = "lm", color = "black", size = .3, linetype = "dashed") + | |
geom_smooth(data = . %>% filter(combined_pval <= .1), method = "lm", color = "black", size = .3, linetype = "dashed") + | |
coord_cartesian(xlim = c(-5, 5), ylim = c(-5, 5)) + | |
# ggpubr::stat_cor(method = "spearman", label.x = -4, label.y = 4, size = 3) + | |
ggpubr::stat_cor(data = . %>% filter(combined_pval <= .1), method = "spearman", label.x = -4, label.y = 4, size = 3) + | |
labs( | |
y = "log2FC(T21 vs. D21)", | |
x = "log2FC", | |
title = "Rescue of T21 Dysregulation", | |
color = "-log10\n(c.padj)" | |
) + | |
theme( | |
legend.position = "top", | |
legend.margin = margin(l = -5, b=-3, unit = "mm"), | |
legend.title = element_text(margin = margin(r = 2, unit = "mm")), | |
legend.key.height = unit(3, "mm"), | |
legend.key.width = unit(3.5, "mm") | |
) | |
compared_from_suffixes <- purrr::cross(list( | |
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"), | |
c("dox_effect", "dwdT21SIT_vs_T21", "T21_vs_D21") | |
)) |> map(lift(paste0)) |> unlist() | |
combined_results_df |> | |
filter(hgnc_symbol != "XIST") |> | |
pivot_longer( | |
cols = c(ends_with(compared_from_suffixes)), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
dplyr::mutate(comparison = case_when(comparison == "dox_effect" ~ "Dox Effect", | |
comparison == "dwdT21SIT_vs_T21" ~ "XIST Correction", | |
comparison == "T21_vs_D21" ~ "T21/D21" | |
)) |> | |
dplyr::mutate(comparison = factor(comparison, levels = c("T21/D21", "XIST Correction", "Dox Effect"))) %>% | |
filter(padj <= .1 ) |> | |
ggplot(aes(x = abs(log2FoldChange), color=comparison)) + | |
geom_density(aes(y = ..count.., fill=comparison)) + | |
scale_fill_manual(limits=c("XIST Correction", "Dox Effect"), values = c("lightgrey", "white"), guide="none") + | |
scale_color_aaas() + | |
geom_density(aes(y = ..count..)) + | |
coord_cartesian(xlim=c(0, 1)) + | |
scale_x_continuous(limits = c(0, 1), oob = scales::squish, expand = c(0,0), | |
breaks = c(0,.25,.5,.75,1), | |
labels = c("0.00","0.25", "0.50", "0.75", "???1.00")) + | |
scale_y_continuous(expand=c(0,0))+ | |
labs( | |
y = "scaled densities", | |
x = "log2FC", | |
title = "Significant Fold-Changes", | |
color = "Diff. Expr" | |
) + | |
theme( | |
legend.position = "top", | |
legend.margin = margin(l = -5, b=-3, unit = "mm"), | |
legend.title = element_blank(), | |
legend.key.height = unit(3, "mm"), | |
legend.key.width = unit(3.5, "mm") | |
) | |
ggsave(sprintf("%s/sig_fold_changes.pdf", fig_output_dir), width = 2.5, height = 2.8) | |
## GSEA Overlaps | |
term_order <- c( | |
# "chr21", | |
# "ECM", | |
# "Folate", | |
"apoptosis", | |
"serine", | |
"stress", | |
"ER", | |
"mitochondria", | |
"ribosome", | |
"translation" | |
) | |
term_colors <- c( | |
# "#ff6c92", | |
"#8e000c", | |
"#da5c29", | |
"#f5cf87", | |
"#a90aff", | |
"#017c2f", | |
"#00208a", | |
"#fe85f3" | |
# "#000000" | |
) | |
compared_to_NES <- sprintf("NES_%s", gsea_compare_to) | |
compared_to_padj <- sprintf("p.adjust_%s", gsea_compare_to) | |
# compared_from_suffixes <- sprintf("_%s", gsea_comparisons) | |
gsea_comparisons <- c("dwdT21SIT_vs_T21") | |
compared_from_suffixes <- purrr::cross(list( | |
c("NES_", "p.adjust_", "qvalues_", "enrichmentScore_", "setSize_", "leadingEdgeTags_", "leadingEdgeList_", "leadingEdgeSignal_", "numCoreEnrichment_"), | |
gsea_comparisons | |
)) |> map(lift(paste0)) |> unlist() | |
gsea_combined_pval_cutoff <- gsea_combined_padj_cutoff | |
combined_enrichment_df |> | |
# filter(hgnc_symbol != "XIST") |> | |
pivot_longer( | |
cols = c(ends_with(compared_from_suffixes)), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
mutate(combined_pval = vectorized_sumlog(!!as.name(compared_to_padj), p.adjust)) |> | |
filter(combined_pval <= gsea_combined_pval_cutoff & category != "chr21") |> | |
# mutate(comparison = case_when( | |
# comparison == "wd_nd" ~ "w/d", | |
# comparison == "wd_nd_powered" ~ "w/d-p", | |
# comparison == "d_t21xist_t21" ~ "+dox", | |
# comparison == "dox" ~ "Dox Effect", | |
# comparison == "neural_t21_d21" ~ "Neural T21/D21" | |
# )) %>% | |
ggplot(aes(x = NES, y = !!as.name(compared_to_NES), size = -log10(combined_pval), color = category, ID = ID)) + | |
# facet_grid(cols = vars(factor(comparison, levels = gsea_comparisons))) + | |
geom_point(data = . %>% filter(category == "other"), alpha = .5) + | |
geom_point(data = . %>% filter(category != "other"), alpha = .7) + | |
labs( | |
title = "Rescue of Acute Enrichment Terms", | |
y = "NES (iPSC T21/D21)", #sprintf("NES (%s)", gsea_compare_to), | |
x = "NES (iPSC T21XIST/T21)", | |
color = "Category" | |
) + | |
scale_color_manual( | |
limits = c(term_order, "other"), | |
values = c(term_colors, "lightgrey") | |
) + | |
scale_size_area(max_size = 3.5, breaks = c(1, 2, 5, 10), name="-log10(c.padj)") + | |
geom_vline(xintercept = 0, size = .3) + | |
geom_hline(yintercept = 0, size = .3) + | |
theme(legend.box = "horizontal") | |
ggsave(sprintf("%s/acute_enrrichment_rescue.pdf", fig_output_dir), width = 3, height = 2.3) | |
ggsave(sprintf("%s/acute_enrrichment_rescue_cpval guide.pdf", fig_output_dir), width = 4, height = 2.3) | |
compared_to_NES <- sprintf("NES_%s", "neural_t21_d21") | |
compared_to_padj <- sprintf("p.adjust_%s", "neural_t21_d21") | |
# compared_from_suffixes <- sprintf("_%s", gsea_comparisons) | |
gsea_comparisons <- c("neural_t21xistdoxcombined_t21") | |
compared_from_suffixes <- purrr::cross(list( | |
c("NES_", "p.adjust_", "qvalues_", "enrichmentScore_", "setSize_", "leadingEdgeTags_", "leadingEdgeList_", "leadingEdgeSignal_", "numCoreEnrichment_"), | |
gsea_comparisons | |
)) |> map(lift(paste0)) |> unlist() | |
gsea_combined_pval_cutoff <- gsea_combined_padj_cutoff | |
combined_enrichment_df |> | |
# filter(hgnc_symbol != "XIST") |> | |
pivot_longer( | |
cols = c(ends_with(compared_from_suffixes)), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
mutate(combined_pval = vectorized_sumlog(!!as.name(compared_to_padj), p.adjust)) |> | |
filter(combined_pval <= gsea_combined_pval_cutoff & category != "chr21") |> | |
# mutate(comparison = case_when( | |
# comparison == "wd_nd" ~ "w/d", | |
# comparison == "wd_nd_powered" ~ "w/d-p", | |
# comparison == "d_t21xist_t21" ~ "+dox", | |
# comparison == "dox" ~ "Dox Effect", | |
# comparison == "neural_t21_d21" ~ "Neural T21/D21" | |
# )) %>% | |
ggplot(aes(x = NES, y = !!as.name(compared_to_NES), size = -log10(combined_pval), color = category, ID = ID)) + | |
# facet_grid(cols = vars(factor(comparison, levels = gsea_comparisons))) + | |
geom_point(data = . %>% filter(category == "other"), alpha = .5) + | |
geom_point(data = . %>% filter(category != "other"), alpha = .7) + | |
labs( | |
title = "Neural Rescue of Acute Enrichment Terms", | |
y = "NES (Neural T21/D21)", #sprintf("NES (%s)", gsea_compare_to), | |
x = "NES (Neural XIST/T21)", | |
color = "Category" | |
) + | |
scale_color_manual( | |
limits = c(term_order, "other"), | |
values = c(term_colors, "lightgrey") | |
) + | |
scale_size_area(max_size = 3.5, breaks = c(1, 2, 5, 10), name="-log10(c.padj)") + | |
geom_vline(xintercept = 0, size = .3) + | |
geom_hline(yintercept = 0, size = .3) + | |
theme(legend.box = "vertical", | |
legend.box.margin = margin(l=-5), | |
legend.spacing.y = unit(.5, "mm")) | |
ggsave(sprintf("%s/acute_enrrichment_rescue_neural_2.pdf", fig_output_dir), width = 3.6, height = 2.5) | |
compared_to_NES <- sprintf("NES_%s", gsea_compare_to) | |
compared_to_padj <- sprintf("p.adjust_%s", gsea_compare_to) | |
compared_from_suffixes <- sprintf("_%s", gsea_comparisons) | |
gsea_comparisons <- c("neural_t21_d21", | |
"neural_t21xistwd_t21", | |
"neural_t21xistdox_t21", | |
"neural_t21xistdoxcombined_t21") | |
compared_from_suffixes <- purrr::cross(list( | |
c("NES_", "p.adjust_", "qvalues_", "enrichmentScore_", "setSize_", "leadingEdgeTags_", "leadingEdgeList_", "leadingEdgeSignal_", "numCoreEnrichment_"), | |
gsea_comparisons | |
)) |> map(lift(paste0)) |> unlist() | |
gsea_combined_pval_cutoff <- .2 | |
compared_to_NES <- sprintf("NES_%s", "T21_vs_D21") | |
compared_to_padj <- sprintf("p.adjust_%s", "T21_vs_D21") | |
combined_enrichment_df |> | |
# filter(hgnc_symbol != "XIST") |> | |
pivot_longer( | |
cols = c(ends_with(gsea_comparisons | |
# purrr::cross(list( | |
# c("NES_", "p.adjust_", "qvalues_", "enrichmentScore_", "setSize_", "leadingEdgeTags_", "leadingEdgeList_", "leadingEdgeSignal_", "numCoreEnrichment_"), | |
# gsea_comparisons | |
# ) | |
# ) |> map(lift(paste0)) |> unlist() | |
)), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
mutate(combined_pval = vectorized_sumlog(!!as.name(compared_to_padj), p.adjust)) |> | |
filter(combined_pval <= gsea_combined_pval_cutoff & category != "chr21") |> | |
# mutate(comparison = case_when( | |
# comparison == "wd_nd" ~ "w/d", | |
# comparison == "wd_nd_powered" ~ "w/d-p", | |
# comparison == "d_t21xist_t21" ~ "+dox", | |
# comparison == "dox" ~ "Dox Effect", | |
# comparison == "neural_t21_d21" ~ "Neural T21/D21" | |
# )) %>% | |
ggplot(aes(x = NES, y = !!as.name(compared_to_NES), size = -log10(combined_pval), color = category, ID = ID)) + | |
facet_grid(cols = vars(factor(comparison, levels = gsea_comparisons))) + | |
geom_point(data = . %>% filter(category == "other"), alpha = .5) + | |
geom_point(data = . %>% filter(category != "other"), alpha = .7) + | |
labs( | |
title = "iPSC and Neural Term Overlap", | |
y = "NES (iPSC T21/D21)", #sprintf("NES (%s)", gsea_compare_to), | |
# x = "NES (Neural T21/D21)", | |
color = "Category" | |
) + | |
scale_color_manual( | |
limits = c(term_order, "other"), | |
values = c(term_colors, "lightgrey") | |
) + | |
scale_size_area(max_size = 3.5, guide = "none") + | |
geom_vline(xintercept = 0, size = .3) + | |
geom_hline(yintercept = 0, size = .3) | |
# ggsave(sprintf("%s/acute_enrrichment_ipsc_neural.pdf", fig_output_dir), width = 3.25, height = 2.5) | |
ggsave(sprintf("%s/acute_enrrichment_ipsc_neural.pdf", fig_output_dir), width = 8, height = 2.5) | |
compared_to_NES <- sprintf("NES_%s", "neural_t21_d21") | |
compared_to_padj <- sprintf("p.adjust_%s", "neural_t21_d21") | |
combined_enrichment_df |> | |
# filter(hgnc_symbol != "XIST") |> | |
pivot_longer( | |
cols = c(ends_with(c("neural_t21xistwd_t21", | |
"neural_t21xistdox_t21", | |
"neural_t21xistdoxcombined_t21") | |
# purrr::cross(list( | |
# c("NES_", "p.adjust_", "qvalues_", "enrichmentScore_", "setSize_", "leadingEdgeTags_", "leadingEdgeList_", "leadingEdgeSignal_", "numCoreEnrichment_"), | |
# gsea_comparisons | |
# ) | |
# ) |> map(lift(paste0)) |> unlist() | |
)), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
mutate(combined_pval = vectorized_sumlog(!!as.name(compared_to_padj), p.adjust)) |> | |
filter(combined_pval <= gsea_combined_pval_cutoff & category != "chr21") |> | |
# mutate(comparison = case_when( | |
# comparison == "wd_nd" ~ "w/d", | |
# comparison == "wd_nd_powered" ~ "w/d-p", | |
# comparison == "d_t21xist_t21" ~ "+dox", | |
# comparison == "dox" ~ "Dox Effect", | |
# comparison == "neural_t21_d21" ~ "Neural T21/D21" | |
# )) %>% | |
ggplot(aes(x = NES, y = !!as.name(compared_to_NES), size = -log10(combined_pval), color = category, ID = ID)) + | |
facet_grid(cols = vars(factor(comparison, levels = gsea_comparisons))) + | |
geom_point(data = . %>% filter(category == "other"), alpha = .5) + | |
geom_point(data = . %>% filter(category != "other"), alpha = .7) + | |
labs( | |
title = "iPSC and Neural Term Overlap", | |
y = "NES (iPSC T21/D21)", #sprintf("NES (%s)", gsea_compare_to), | |
# x = "NES (Neural T21/D21)", | |
color = "Category" | |
) + | |
scale_color_manual( | |
limits = c(term_order, "other"), | |
values = c(term_colors, "lightgrey") | |
) + | |
scale_size_area(max_size = 3.5, guide = "none") + | |
geom_vline(xintercept = 0, size = .3) + | |
geom_hline(yintercept = 0, size = .3) | |
# ggsave(sprintf("%s/acute_enrrichment_ipsc_neural.pdf", fig_output_dir), width = 3.25, height = 2.5) | |
ggsave(sprintf("%s/neural_rescue.pdf", fig_output_dir), width = 8, height = 2.5) | |
gsea_combined_pval_cutoff <- .1 | |
selected_terms <- combined_enrichment_df |> | |
# filter(hgnc_symbol != "XIST") |> | |
pivot_longer( | |
cols = c(ends_with("_dwdT21SIT_vs_T21")), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
mutate(combined_pval = vectorized_sumlog(p.adjust_T21_vs_D21, p.adjust)) |> | |
filter(combined_pval <= gsea_combined_pval_cutoff & category != "chr21") |> | |
mutate(category = case_when(category == "ribosome" & str_detect(ID, "PRERIBOSOME") ~ "preribosome", | |
T ~ category), | |
quadrant = case_when( | |
NES > 0 & NES_T21_vs_D21 > 0 ~ 1, | |
NES < 0 & NES_T21_vs_D21 > 0 ~ 2, | |
NES < 0 & NES_T21_vs_D21 < 0 ~ 3, | |
NES > 0 & NES_T21_vs_D21 < 0 ~ 4 | |
)) |> | |
filter(category != "other" & quadrant %in% c(2,4)) |> | |
mutate(dist_from_center = sqrt(NES^2 + NES_T21_vs_D21^2)) |> | |
group_by(category) |> | |
arrange(desc(dist_from_center)) |> | |
filter(row_number() <= 3) |> | |
ungroup() |> | |
dplyr::select(ID, dist_from_center, category2=category) | |
selected_terms2 <- combined_enrichment_df |> | |
# filter(hgnc_symbol != "XIST") |> | |
# pivot_longer( | |
# cols = c(ends_with("_dwdT21SIT_vs_T21", "_neural_t21_d21")), | |
# names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
# ) |> | |
# pivot_wider(names_from = "value_type", values_from = "value") |> | |
mutate(combined_pval = vectorized_sumlog(vectorized_sumlog(`p.adjust_neural_t21_d21`, `p.adjust_T21_vs_D21`), `p.adjust_dwdT21SIT_vs_T21`)) |> | |
filter(combined_pval <= gsea_combined_pval_cutoff & category != "chr21") |> | |
mutate(quadrant = case_when( | |
NES_T21_vs_D21 > 0 & NES_neural_t21_d21 > 0 & NES_dwdT21SIT_vs_T21 < 0 ~ 2, | |
NES_T21_vs_D21 < 0 & NES_neural_t21_d21 < 0 & NES_dwdT21SIT_vs_T21 > 0 ~ 4, | |
T ~ 0 | |
)) |> | |
filter(category != "other" & quadrant %in% c(2,4)) |> | |
mutate(dist_from_center = sqrt(NES_T21_vs_D21^2 + NES_dwdT21SIT_vs_T21^2 + NES_neural_t21_d21^2)) |> | |
group_by(category) |> | |
arrange(desc(dist_from_center)) |> | |
filter(row_number() <= 3) |> | |
ungroup() |> | |
dplyr::select(ID, dist_from_center, category2=category) | |
selected_terms_ipsc <- combined_enrichment_df |> | |
# filter(hgnc_symbol != "XIST") |> | |
pivot_longer( | |
cols = c(ends_with("_dwdT21SIT_vs_T21")), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
mutate(combined_pval = vectorized_sumlog(p.adjust_T21_vs_D21, p.adjust)) |> | |
filter(combined_pval <= gsea_combined_pval_cutoff & category != "chr21") |> | |
mutate(quadrant = case_when( | |
NES > 0 & NES_T21_vs_D21 > 0 ~ 1, | |
NES < 0 & NES_T21_vs_D21 > 0 ~ 2, | |
NES < 0 & NES_T21_vs_D21 < 0 ~ 3, | |
NES > 0 & NES_T21_vs_D21 < 0 ~ 4 | |
)) |> | |
filter(category != "other" & quadrant %in% c(2,4)) |> | |
mutate(dist_from_center = sqrt(NES^2 + NES_T21_vs_D21^2)) |> | |
group_by(category) |> | |
arrange(desc(dist_from_center)) |> | |
filter(row_number() <= 3) |> | |
ungroup() |> | |
dplyr::select(ID, dist_from_center, category2=category) | |
selected_terms_neural <- combined_enrichment_df |> | |
# filter(hgnc_symbol != "XIST") |> | |
pivot_longer( | |
cols = c(ends_with("_neural_t21xistdoxcombined_t21")), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
mutate(combined_pval = vectorized_sumlog(p.adjust_neural_t21_d21, p.adjust)) |> | |
filter(combined_pval <= gsea_combined_pval_cutoff & category != "chr21") |> | |
mutate(quadrant = case_when( | |
NES > 0 & NES_neural_t21_d21 > 0 ~ 1, | |
NES < 0 & NES_neural_t21_d21 > 0 ~ 2, | |
NES < 0 & NES_neural_t21_d21 < 0 ~ 3, | |
NES > 0 & NES_neural_t21_d21 < 0 ~ 4 | |
)) |> | |
filter(category != "other" & quadrant %in% c(2,4)) |> | |
mutate(dist_from_center = sqrt(NES^2 + NES_neural_t21_d21^2)) |> | |
group_by(category) |> | |
arrange(desc(dist_from_center)) |> | |
filter(row_number() <= 3) |> | |
ungroup() |> | |
dplyr::select(ID, dist_from_center, category2=category) | |
library(ggh4x) | |
combined_enrichment_df |> | |
inner_join(selected_terms2) |> | |
dplyr::select(-starts_with("combined_pval_")) |> | |
pivot_longer(cols = starts_with(c("NES_", "p.adjust_", "qvalues_", "enrichmentScore_", "setSize_", "leadingEdgeTags_", "leadingEdgeList_", "leadingEdgeSignal_", "numCoreEnrichment_")), | |
names_pattern = "([A-Za-z\\.]*)_(.*)", names_to=c("value_type", "comparison")) |> | |
pivot_wider(names_from = "value_type", values_from="value") |> | |
mutate(ID=str_replace_all(ID, "_", " ")) |> | |
filter(comparison %in% c( | |
"T21_vs_D21", | |
"dwdT21SIT_vs_T21", | |
"neural_t21_d21", | |
"neural_t21xistdoxcombined_t21" | |
# # "dox", | |
# "d_t21xist_t21", | |
# "wd_nd" | |
)) |> | |
mutate( | |
ID = str_replace_all(ID, "PROCESS", "PROC"), | |
ID = str_replace_all(ID, "POSITIVE", "(+)"), | |
ID = str_replace_all(ID, "REGULATION", "REG"), | |
ID = str_replace_all(ID, "FAMILY", "FAM"), | |
ID = str_replace_all(ID, "OXIDATIVE", "OXI"), | |
ID = str_replace_all(ID, "LOCALIZATION", "LOCAL"), | |
ID = str_replace_all(ID, "MITOCHONDRIAL", "MITO"), | |
ID = str_replace_all(ID, "CYTOCHROME C", "CYT C"), | |
ID = str_replace_all(ID, "DEPENDENT", "DEP"), | |
ID = str_replace_all(ID, "TRANSLATION", "TRANS"), | |
ID = str_replace_all(ID, "CHEMICAL", "CHEM"), | |
ID = str_replace_all(ID, "RIBOSOME", "RIBO"), | |
ID = str_replace_all(ID, "PROTEIN", "PROT"), | |
ID = str_replace_all(ID, "ESTABLISHMENT", "ESTAB"), | |
ID = str_replace_all(ID, "TRANSCRIPTIONAL", "TRANSCRIP"), | |
ID = str_replace_all(ID, "TARGETING", "TARGET"), | |
ID = str_replace_all(ID, "GOBP |WP |GOCC |REACTOME |GOCC |GOMF", ""), | |
ID = str_replace_all(ID, "RIBOSOMAL", "RIBO"), | |
ID = str_replace_all(ID, "CYTOPLASMIC|CYTOSOLIC", "CYTO"), | |
ID = str_replace_all(ID, "ENDOPLASMIC RETICULUM", "ER"), | |
ID = str_replace_all(ID, "EUKARYOTIC", "EUKAR"), | |
ID = str_replace_all(ID, "EXTRINSIC", "EXT"), | |
ID = str_replace_all(ID, "TARGET", "TRGT"), | |
ID = str_replace_all(ID, "MEMBRANE", "MEM"), | |
ID = str_replace_all(ID, "COMPONENT", "COMP"), | |
ID = str_replace_all(ID, "PHYTO", "PHYT"), | |
ID = str_replace_all(ID, "AMINO ACID", "AA"), | |
ID = str_replace_all(ID, "SMOOTH MUSCLE CELL", "SMC"), | |
ID = str_replace_all(ID, "NRF2 TRANSCRIP", "NRF2"), | |
ID = str_replace_all(ID, "APOPTOTIC", "APOP"), | |
ID = str_replace_all(ID, "DISEASES", "DIS"), | |
ID = str_replace_all(ID, "PROGRAMMED", "PROG"), | |
ID = str_replace_all(ID, "BIOSYNTHETIC", "BIOSYNTH"), | |
ID = str_replace_all(ID, "ACTIVITY", "ACT"), | |
ID = str_replace_all(ID, "SENESCENCE", "SENSNCE"), | |
ID = str_replace_all(ID, "COTRANSAL", "COTRANS"), | |
ID = str_replace_all(ID, "SMALL", "SML"), | |
ID = str_replace_all(ID, "SUBUNIT", "SUB"), | |
ID = str_replace_all(ID, "PRECURSOR", "PRECUR"), | |
ID = str_replace_all(ID, "ACTIVATION", "ACTIV"), | |
ID = str_replace_all(ID, "APOPTOSIS", "APOP"), | |
ID = str_replace_all(ID, "PATHWAY", "PATH"), | |
ID = str_replace_all(ID, "SIGNALING", "SIG"), | |
ID = str_replace_all(ID, "RESPONSE", "RESP"), | |
ID = str_replace_all(ID, "MULTICELLULAR", "MULTICELL"), | |
ID = str_replace_all(ID, "RESPIRATORY", "RESP"), | |
ID = str_replace_all(ID, "RELATED", "REL"), | |
ID = str_replace_all(ID, "INDUCER", "IND"), | |
ID = str_replace_all(ID, "ENDOPEPTIDASE", "ENDOPEP"), | |
ID = str_replace_all(ID, "ORGANISMAL", "ORGNSML"), | |
ID = str_replace_all(ID, "NETWORK", "NTWRK"), | |
ID = str_replace_all(ID, "ALTERED", "ALT"), | |
ID = str_replace_all(ID, "OVARIAN", "OVRIN"), | |
ID = str_replace_all(ID, "CANCER", "CNCR"), | |
) |> | |
ggplot(aes(x=comparison, y=fct_reorder(ID, dplyr::desc(dist_from_center)), color=NES, size=-log10(p.adjust))) + | |
geom_point() + | |
ggh4x::facet_grid2(rows=vars(factor(category2, | |
levels=c("apoptosis", "serine", "stress","ER", "ribosome", "mitochondria", "translation")) | |
), scales = "free", space = "free", strip=ggh4x::strip_nested(clip="off")) + | |
# scale_color_viridis_c() + | |
scale_color_gradient2(high = scales::muted("red"), low = scales::muted("blue")) + | |
# scale_y_discrete(label=scales::label_wrap(30)) + | |
scale_x_discrete(limits=c( | |
"neural_t21_d21", | |
"T21_vs_D21", | |
"dwdT21SIT_vs_T21", | |
"neural_t21xistdoxcombined_t21" | |
), | |
labels = c("iPSC T21 vs. D21", "iPSC T21XIST vs. T21", "Neural T21 vs. D21", "Neural T21XIST vs. T21")) + | |
theme(axis.text.x = element_text(angle=65, hjust=1), | |
axis.title.x = element_blank(), | |
axis.text.y = element_text(size=6), | |
axis.title.y=element_blank(), | |
legend.position = c(-1.5, -.1), | |
legend.box = "horizontal", | |
legend.margin = margin(r=2, unit = "mm"), | |
legend.key.width = unit(2, units = "mm"), | |
plot.margin = margin(r=-2, unit="mm"), | |
plot.title = element_text(hjust=1.8), | |
# axis.text.y = element_blank(), | |
# axis.ticks.y=element_blank(), | |
strip.text.y = element_text(angle=45, hjust = 0, vjust = .5)) + | |
scale_size_area(max_size=4) + | |
labs(title="Enriched Pathways") | |
ggsave(sprintf("%s/enriched_pathways_with_neural.pdf", fig_output_dir), width = 3.9, height = 6) | |
combined_enrichment_df |> | |
inner_join(selected_terms_ipsc) |> | |
dplyr::select(-starts_with("combined_pval_")) |> | |
pivot_longer(cols = starts_with(c("NES_", "p.adjust_", "qvalues_", "enrichmentScore_", "setSize_", "leadingEdgeTags_", "leadingEdgeList_", "leadingEdgeSignal_", "numCoreEnrichment_")), | |
names_pattern = "([A-Za-z\\.]*)_(.*)", names_to=c("value_type", "comparison")) |> | |
pivot_wider(names_from = "value_type", values_from="value") |> | |
mutate(ID=str_replace_all(ID, "_", " ")) |> | |
filter(comparison %in% c( | |
"T21_vs_D21", | |
"dwdT21SIT_vs_T21", | |
"neural_t21_d21", | |
"neural_t21xistdoxcombined_t21" | |
# # "dox", | |
# "d_t21xist_t21", | |
# "wd_nd" | |
)) |> | |
mutate( | |
ID = str_replace_all(ID, "PROCESS", "PROC"), | |
ID = str_replace_all(ID, "POSITIVE", "(+)"), | |
ID = str_replace_all(ID, "REGULATION", "REG"), | |
ID = str_replace_all(ID, "FAMILY", "FAM"), | |
ID = str_replace_all(ID, "OXIDATIVE", "OXI"), | |
ID = str_replace_all(ID, "LOCALIZATION", "LOCAL"), | |
ID = str_replace_all(ID, "MITOCHONDRIAL", "MITO"), | |
ID = str_replace_all(ID, "CYTOCHROME C", "CYT C"), | |
ID = str_replace_all(ID, "DEPENDENT", "DEP"), | |
ID = str_replace_all(ID, "TRANSLATION", "TRANS"), | |
ID = str_replace_all(ID, "CHEMICAL", "CHEM"), | |
ID = str_replace_all(ID, "RIBOSOME", "RIBO"), | |
ID = str_replace_all(ID, "PROTEIN", "PROT"), | |
ID = str_replace_all(ID, "ESTABLISHMENT", "ESTAB"), | |
ID = str_replace_all(ID, "TRANSCRIPTIONAL", "TRANSCRIP"), | |
ID = str_replace_all(ID, "TARGETING", "TARGET"), | |
ID = str_replace_all(ID, "GOBP |WP |GOCC |REACTOME ", ""), | |
ID = str_replace_all(ID, "RIBOSOMAL", "RIBO"), | |
ID = str_replace_all(ID, "CYTOPLASMIC|CYTOSOLIC", "CYTO"), | |
ID = str_replace_all(ID, "ENDOPLASMIC RETICULUM", "ER"), | |
ID = str_replace_all(ID, "EUKARYOTIC", "EUKAR"), | |
ID = str_replace_all(ID, "EXTRINSIC", "EXT"), | |
ID = str_replace_all(ID, "TARGET", "TRGT"), | |
ID = str_replace_all(ID, "MEMBRANE", "MEM"), | |
ID = str_replace_all(ID, "COMPONENT", "COMP"), | |
ID = str_replace_all(ID, "PHYTO", "PHYT"), | |
ID = str_replace_all(ID, "AMINO ACID", "AA"), | |
ID = str_replace_all(ID, "SMOOTH MUSCLE CELL", "SMC"), | |
ID = str_replace_all(ID, "NRF2 TRANSCRIP", "NRF2"), | |
ID = str_replace_all(ID, "APOPTOTIC", "APOP"), | |
ID = str_replace_all(ID, "DISEASES", "DIS"), | |
ID = str_replace_all(ID, "PROGRAMMED", "PROG"), | |
ID = str_replace_all(ID, "BIOSYNTHETIC", "BIOSYNTH"), | |
ID = str_replace_all(ID, "ACTIVITY", "ACT"), | |
ID = str_replace_all(ID, "SENESCENCE", "SENSNCE"), | |
ID = str_replace_all(ID, "COTRANSAL", "COTRANS"), | |
ID = str_replace_all(ID, "SMALL", "SML"), | |
ID = str_replace_all(ID, "SUBUNIT", "SUB"), | |
ID = str_replace_all(ID, "PRECURSOR", "PRECUR"), | |
ID = str_replace_all(ID, "ACTIVATION", "ACTIV"), | |
ID = str_replace_all(ID, "RESPIRATORY", "RESP"), | |
ID = str_replace_all(ID, "RESPONSE", "RESP"), | |
) |> | |
ggplot(aes(x=comparison, y=fct_reorder(ID, dplyr::desc(dist_from_center)), color=NES, size=-log10(p.adjust))) + | |
geom_point() + | |
ggh4x::facet_grid2(rows=vars(factor(category2, | |
levels=c("apoptosis", "serine", "stress", "ER", "ribosome", "mitochondria", "translation")) | |
), scales = "free", space = "free", strip=ggh4x::strip_nested(clip="off")) + | |
# scale_color_viridis_c() + | |
scale_color_gradient2(high = scales::muted("red"), low = scales::muted("blue")) + | |
# scale_y_discrete(label=scales::label_wrap(30)) + | |
scale_x_discrete(limits=c( | |
"T21_vs_D21", | |
"dwdT21SIT_vs_T21" | |
), | |
labels = c("T21 vs. D21", "T21XIST vs. T21")) + | |
scale_size_area(max_size=4) + | |
labs(title="Top GSEA Terms - iPSCs") + | |
theme(axis.text.x = element_text(angle=45, hjust=1), | |
axis.title.x = element_blank(), | |
axis.text.y = element_text(size=6), | |
axis.title.y=element_blank(), | |
legend.position = c(-3, -.1), | |
legend.box = "horizontal", | |
legend.margin = margin(r=2, unit = "mm"), | |
legend.key.width = unit(2, units = "mm"), | |
plot.margin = margin(r=-2, unit="mm"), | |
plot.title = element_text(hjust=1.8), | |
# axis.text.y = element_blank(), | |
# axis.ticks.y=element_blank(), | |
strip.text.y = element_text(angle=45, hjust = 0, vjust = .5)) | |
ggsave(sprintf("%s/enriched_pathways_ipsc.pdf", fig_output_dir), width = 3.15, height = 5.5) | |
combined_enrichment_df |> | |
inner_join(selected_terms_neural, by="ID") |> | |
dplyr::select(-starts_with("combined_pval_")) |> | |
pivot_longer(cols = starts_with(c("NES_", "p.adjust_", "qvalues_", "enrichmentScore_", "setSize_", "leadingEdgeTags_", "leadingEdgeList_", "leadingEdgeSignal_", "numCoreEnrichment_")), | |
names_pattern = "([A-Za-z\\.]*)_(.*)", names_to=c("value_type", "comparison")) |> | |
pivot_wider(names_from = "value_type", values_from="value") |> | |
mutate(ID=str_replace_all(ID, "_", " ")) |> | |
filter(comparison %in% c( | |
"neural_t21_d21", | |
"neural_t21xistdoxcombined_t21" | |
# # "dox", | |
# "d_t21xist_t21", | |
# "wd_nd" | |
)) |> | |
mutate( | |
ID = str_replace_all(ID, "REACTOME MITOCHONDRIAL TRANSLATION", "RCTME MTO TRANS"), | |
ID = str_replace_all(ID, "GOBP MITOCHONDRIAL TRANSLATION", "GO MTO TRANS"), | |
ID = str_replace_all(ID, "GOBP |WP |GOCC |REACTOME ", ""), | |
ID = str_replace_all(ID, "PROCESS", "PROC"), | |
ID = str_replace_all(ID, "POSITIVE", "(+)"), | |
ID = str_replace_all(ID, "REGULATION", "REG"), | |
ID = str_replace_all(ID, "FAMILY", "FAM"), | |
ID = str_replace_all(ID, "OXIDATIVE", "OXI"), | |
ID = str_replace_all(ID, "LOCALIZATION", "LOCAL"), | |
ID = str_replace_all(ID, "MITOCHONDRIAL", "MITO"), | |
ID = str_replace_all(ID, "CYTOCHROME C", "CYT C"), | |
ID = str_replace_all(ID, "DEPENDENT", "DEP"), | |
ID = str_replace_all(ID, "TRANSLATION", "TRANS"), | |
ID = str_replace_all(ID, "CHEMICAL", "CHEM"), | |
ID = str_replace_all(ID, "RIBOSOME", "RIBO"), | |
ID = str_replace_all(ID, "PROTEIN", "PROT"), | |
ID = str_replace_all(ID, "ESTABLISHMENT", "ESTAB"), | |
ID = str_replace_all(ID, "TRANSCRIPTIONAL", "TRANSCRIP"), | |
ID = str_replace_all(ID, "TARGETING", "TARGET"), | |
ID = str_replace_all(ID, "RIBOSOMAL", "RIBO"), | |
ID = str_replace_all(ID, "CYTOPLASMIC|CYTOSOLIC", "CYTO"), | |
ID = str_replace_all(ID, "ENDOPLASMIC RETICULUM", "ER"), | |
ID = str_replace_all(ID, "EUKARYOTIC", "EUKAR"), | |
ID = str_replace_all(ID, "EXTRINSIC", "EXT"), | |
ID = str_replace_all(ID, "TARGET", "TRGT"), | |
ID = str_replace_all(ID, "MEMBRANE", "MEM"), | |
ID = str_replace_all(ID, "COMPONENT", "COMP"), | |
ID = str_replace_all(ID, "PHYTO", "PHYT"), | |
ID = str_replace_all(ID, "AMINO ACID", "AA"), | |
ID = str_replace_all(ID, "SMOOTH MUSCLE CELL", "SMC"), | |
ID = str_replace_all(ID, "NRF2 TRANSCRIP", "NRF2"), | |
ID = str_replace_all(ID, "APOPTOTIC", "APOP"), | |
ID = str_replace_all(ID, "DISEASES", "DIS"), | |
ID = str_replace_all(ID, "PROGRAMMED", "PROG"), | |
ID = str_replace_all(ID, "BIOSYNTHETIC", "BIOSYNTH"), | |
ID = str_replace_all(ID, "ACTIVITY", "ACT"), | |
ID = str_replace_all(ID, "SENESCENCE", "SENSNCE"), | |
ID = str_replace_all(ID, "COTRANSAL", "COTRANS"), | |
ID = str_replace_all(ID, "SMALL", "SML"), | |
ID = str_replace_all(ID, "SUBUNIT", "SUB"), | |
ID = str_replace_all(ID, "PRECURSOR", "PRECUR"), | |
ID = str_replace_all(ID, "ACTIVATION", "ACTIV"), | |
ID = str_replace_all(ID, "RESPIRATORY", "RESP"), | |
ID = str_replace_all(ID, "RESPONSE", "RESP"), | |
ID = str_replace_all(ID, "TRANSCRIPTION", "TRNCRIP"), | |
ID = str_replace_all(ID, "TEMPLATED", "TMPLT"), | |
ID = str_replace_all(ID, "INITIATION", "INIT"), | |
ID = str_replace_all(ID, "PHOSPHORYLATION", "PHOS"), | |
ID = str_replace_all(ID, "ALPHA", "α"), | |
ID = str_replace_all(ID, "INHIBITOR", "INHIB"), | |
ID = str_replace_all(ID, "ELECTRON TRANSPORT CHAIN", "E.T.C."), | |
ID = str_replace_all(ID, "SYSTEM", "SYS"), | |
ID = str_replace_all(ID, "MITOCHONDRIA", "MITO"), | |
ID = str_replace_all(ID, "INFLAMMATORY", "INFLAM"), | |
ID = str_replace_all(ID, "EFFECTS", "EFF."), | |
ID = str_replace_all(ID, "PATHWAY", "PATH"), | |
) |> | |
ggplot(aes(x=comparison, y=fct_reorder(ID, dplyr::desc(dist_from_center)), color=NES, size=-log10(p.adjust))) + | |
geom_point() + | |
ggh4x::facet_grid2(rows=vars(factor(category2, | |
levels=c("apoptosis", "serine", "stress", "ER", "ribosome", "mitochondria", "translation")) | |
), scales = "free", space = "free", strip=ggh4x::strip_nested(clip="off")) + | |
# scale_color_viridis_c() + | |
scale_color_gradient2(high = scales::muted("red"), low = scales::muted("blue")) + | |
# scale_y_discrete(label=scales::label_wrap(30)) + | |
scale_x_discrete(limits=c( | |
"neural_t21_d21", | |
"neural_t21xistdoxcombined_t21" | |
), | |
labels = c("T21 vs. D21", "T21XIST vs. T21")) + | |
scale_size_area(max_size=4) + | |
labs(title="Top GSEA Terms - Neural") + | |
theme(axis.text.x = element_text(angle=45, hjust=1), | |
axis.title.x = element_blank(), | |
axis.text.y = element_text(size=6), | |
axis.title.y=element_blank(), | |
legend.position = c(-2.6, 0), | |
legend.box = "horizontal", | |
legend.margin = margin(r=2, unit = "mm"), | |
legend.key.width = unit(2, units = "mm"), | |
plot.margin = margin(r=-2, unit="mm"), | |
plot.title = element_text(hjust=1.8), | |
# axis.text.y = element_blank(), | |
# axis.ticks.y=element_blank(), | |
strip.text.y = element_text(angle=45, hjust = 0, vjust = .5)) | |
ggsave(sprintf("%s/enriched_pathways_neural.pdf", fig_output_dir), width = 3.6, height = 4.8,device = cairo_pdf) | |
## Lollipop Plots | |
gsea_compare_to <- "T21_vs_D21" | |
gsea_comparisons <- c("neural_t21_d21", "neural_t21xistdoxcombined_t21", "dwdT21SIT_vs_T21", "dox_effect") | |
compared_to_NES <- sprintf("NES_%s", gsea_compare_to) | |
compared_to_padj <- sprintf("p.adjust_%s", gsea_compare_to) | |
compared_from_suffixes <- sprintf("_%s", c(gsea_compare_to, gsea_comparisons)) | |
gsea_lollipop_cutoff_1 <- lol_cutoff_primary | |
gsea_lollipop_cutoff_2 <- .2 | |
combined_enrichment_df |> | |
mutate( | |
ontology = str_extract(ID, "[A-Z]+(?=_)"), | |
ontology = case_when( | |
is.na(ontology) | !(ontology %in% c("GOBP", "GOCC", "GOMF", "HALLMARK", "KEGG", "REACTOME", "WP")) ~ "other", | |
T ~ ontology | |
) | |
) |> | |
filter(ontology != "other", | |
!!as.name(compared_to_padj) <= gsea_lollipop_cutoff_1) |> | |
# dplyr::select(-starts_with("combined_pval_")) |> | |
mutate( | |
ID = str_replace_all(ID, "_", " "), | |
ID = str_replace_all(ID, "PROCESS", "PROC"), | |
ID = str_replace_all(ID, "POSITIVE", "(+)"), | |
ID = str_replace_all(ID, "REGULATION", "REG"), | |
ID = str_replace_all(ID, "FAMILY", "FAM"), | |
ID = str_replace_all(ID, "OXIDATIVE", "OXI"), | |
ID = str_replace_all(ID, "LOCALIZATION", "LOCAL"), | |
ID = str_replace_all(ID, "MITOCHONDRIAL", "MITO"), | |
ID = str_replace_all(ID, "CYTOCHROME C", "CYT C"), | |
ID = str_replace_all(ID, "DEPENDENT", "DEP"), | |
ID = str_replace_all(ID, "TRANSLATION", "TRANS"), | |
ID = str_replace_all(ID, "CHEMICAL", "CHEM"), | |
ID = str_replace_all(ID, "RIBOSOME", "RIBO"), | |
ID = str_replace_all(ID, "PROTEIN", "PROT"), | |
ID = str_replace_all(ID, "ESTABLISHMENT", "ESTAB"), | |
ID = str_replace_all(ID, "TRANSCRIPTIONAL", "TRANSCRIP"), | |
ID = str_replace_all(ID, "TARGETING", "TARGET"), | |
ID = str_replace_all(ID, "GOBP |WP |GOCC |REACTOME |HALLMARK |KEGG ", ""), | |
ID = str_replace_all(ID, "RIBOSOMAL", "RIBO"), | |
ID = str_replace_all(ID, "CYTOPLASMIC|CYTOSOLIC", "CYTO"), | |
ID = str_replace_all(ID, "ENDOPLASMIC RETICULUM", "ER"), | |
ID = str_replace_all(ID, "EUKARYOTIC", "EUKAR"), | |
ID = str_replace_all(ID, "EXTRINSIC", "EXT"), | |
ID = str_replace_all(ID, "TARGET", "TRGT"), | |
ID = str_replace_all(ID, "MEMBRANE", "MEM"), | |
ID = str_replace_all(ID, "COMPONENT", "COMP"), | |
ID = str_replace_all(ID, "PHYTO", "PHYT"), | |
ID = str_replace_all(ID, "AMINO ACID", "AA"), | |
ID = str_replace_all(ID, "SMOOTH MUSCLE CELL", "SMC"), | |
ID = str_replace_all(ID, "NRF2 TRANSCRIP", "NRF2"), | |
ID = str_replace_all(ID, "APOPTOTIC", "APOP"), | |
ID = str_replace_all(ID, "DISEASES", "DIS"), | |
ID = str_replace_all(ID, "PROGRAMMED", "PROG"), | |
ID = str_replace_all(ID, "BIOSYNTHETIC", "BIOSYNTH"), | |
ID = str_replace_all(ID, "ACTIVITY", "ACT"), | |
ID = str_replace_all(ID, "SENESCENCE", "SENSNCE"), | |
ID = str_replace_all(ID, "COTRANSAL", "COTRANS"), | |
ID = str_replace_all(ID, "SMALL", "SML"), | |
ID = str_replace_all(ID, "SUBUNIT", "SUB"), | |
ID = str_replace_all(ID, "PRECURSOR", "PRECUR"), | |
ID = str_replace_all(ID, "ACTIVATION", "ACTIV"), | |
ID = str_replace_all(ID, "HEPATOCELLULAR", "HEPATOCELL"), | |
ID = str_replace_all(ID, "TRANSITION", "TRANS"), | |
ID = str_replace_all(ID, "EXTRACELLULAR", "EXTRACELL"), | |
) |> | |
dplyr::mutate(ID = fct_reorder(ID, -log10(!!as.name(compared_to_padj)) * sign(!!as.name(compared_to_NES)), .desc = T)) |> | |
pivot_longer( | |
cols = c(ends_with(compared_from_suffixes)), | |
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison") | |
) |> | |
pivot_wider(names_from = "value_type", values_from = "value") |> | |
dplyr::mutate( | |
direction = case_when( | |
sign(NES) == -1 ~ "supressed", | |
T ~ "activated" | |
), | |
GeneRatio = numCoreEnrichment / setSize, | |
comparison = factor(comparison, levels = c(gsea_compare_to, gsea_comparisons)), | |
signed_log10padj = -log10(p.adjust) * sign(NES) | |
) |> | |
filter(p.adjust <= gsea_lollipop_cutoff_2 & | |
comparison %in% c(gsea_compare_to, gsea_comparisons) & | |
ontology %in% c("WP", "KEGG", "HALLMARK")) |> | |
mutate(comparison = case_when(comparison == "T21_vs_D21" ~ "iPSC T21", | |
comparison == "dwdT21SIT_vs_T21" ~ "iPSC Rescue", | |
comparison == "neural_t21_d21" ~ "Neural T21", | |
comparison == "neural_t21xistdoxcombined_t21" ~ "Neural Rescue", | |
comparison == "dox_effect" ~ "iPSC Dox"), | |
comparison = factor(comparison, levels = c("iPSC T21", "iPSC Rescue", "iPSC Dox", "Neural T21", "Neural Rescue"))) |> | |
ggplot() + | |
geom_segment(aes(x = 0, y = ID, xend = signed_log10padj, yend = ID), size = .3) + | |
geom_point(aes(x = signed_log10padj, y = ID, size = GeneRatio, color = NES)) + | |
scale_color_gradient2(midpoint = 0, low = "blue", high = "red") + | |
ggh4x::facet_grid2(cols = vars(comparison), rows = vars(ontology), scales = "free", space = "free_y", strip = ggh4x::strip_nested(clip = "off"), ) + | |
scale_y_discrete(label = function(x) abbreviate(x, minlength = 70, dot = T)) + | |
# scale_x_continuous(breaks = c(-4, 0, 4), expand = c(.2, 0.2)) + | |
geom_vline(xintercept = 0, size = .5) + | |
theme( | |
axis.text.x = element_text(angle = 65, hjust = 1), | |
axis.title.y = element_blank(), | |
axis.text.y = element_text(size=8), | |
axis.ticks.y = element_blank(), | |
strip.text.y = element_text(angle = 45, hjust = 0, vjust = .5), | |
legend.position = "top", | |
legend.title = element_text(margin = margin(r = 2, unit = "mm"), hjust = .5), | |
legend.key.height = unit(3, "mm"), | |
legend.key.width = unit(3, "mm"), | |
legend.margin = margin(r = 2, l = 2, unit = "mm") | |
) + | |
scale_size_area(max_size = 3) + | |
labs( | |
title = "Top T21 vs D21 GSEA Terms", | |
x = "signed\nlog10(p-adj)", | |
color = "NES" | |
) | |
ggsave(sprintf("%s/lollipop.pdf", fig_output_dir), width=7.4, height = 7.8) | |
## Make lineage score card for iPSCs ----------------------- | |
vsd_df <- batch_corrected_vsd_df |> | |
left_join(gene_info_df |> | |
dplyr::select(ensembl_id = ensembl_gene_id_version, gene_name = hgnc_symbol)) | |
thermo_lineage_genes <- read_csv("../Thermo_Lineage_Scorecard_genes.csv") | |
lineage_gene_counts_df <- vsd_df %>% | |
inner_join(thermo_lineage_genes, by=c("gene_name"="symbol")) %>% | |
reshape2::melt(id.vars=c("gene_name", "Lineage"), measure.vars=sample_sheet_df$sample_id, variable.name="sample_id") %>% | |
left_join(sample_sheet_df) | |
ordered_genes <- lineage_gene_counts_df %>% | |
group_by(gene_name) %>% | |
summarise(mean_expr=mean(value)) %>% | |
arrange(mean_expr) %>% | |
pull(gene_name) | |
lineage_gene_counts_df |> | |
mutate(`Cell Line`=str_replace(`Cell Line`, "rtTA-XIST", "T21XIST")) |> | |
ggplot(aes(x=factor(gene_name, levels=ordered_genes), y=value, fill=Lineage, color=Lineage)) + | |
geom_boxplot() + | |
labs(title="Lineage Scorecard for iPSCs", x="Genes", y="VST Counts") + | |
scale_fill_npg(guide="none") + | |
scale_color_npg(guide="none") + | |
facet_grid(rows=vars(`Cell Line`), cols=vars(Lineage), scales = "free", space="free") + | |
theme(axis.text.x = element_text(angle=90, vjust=.5, hjust=1, size=7)) | |
ggsave(sprintf("%s/lineage_scorecard.pdf", fig_output_dir), width=7.25, height = 4.8) |