Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
## ---------------------------
##
## Purpose of script: Generate figures for thesis
##
## Author: Prakhar Bansal
##
## Date Created: 2022-03-27
##
## Copyright (c) Prakhar Bansal, 2022
## Email: pbansal@uchc.edu
##
## ---------------------------
##
## Notes:
##
##
## ---------------------------
## set working directory
setwd(dirname(unlist(rstudioapi::getSourceEditorContext()['path'])))
## ---------------------------
## load up the packages we will need: (uncomment as required)
require(tidyverse)
theme_set(
theme_classic() +
theme(
text=element_text(size=9, family="sans"),
plot.title=element_text(hjust=0.5, size=10, family = "sans", face="bold"),
axis.title = element_text(size=9, face="bold"),
axis.text = element_text(size=9),
legend.spacing.x = unit(0, "cm"),
legend.spacing.y = unit(1, "mm"),
legend.key.height = unit(4, "mm"),
legend.margin = margin(0,0,0,0),
axis.line = element_line(size=0),
strip.background = element_blank(),
strip.text = element_text(margin = margin(1,1,1,1, unit="mm")),
panel.border = element_rect(color="black", fill=NA, size=.5)
)
)
require(ggsci)
library(ggdendro)
library(ComplexHeatmap)
library(rstatix)
library(ggpubr)
## ---------------------------
output_dir <- "outputs/2024-02-17 21-39-53.641348 timepoint+dox_status+effective_genotype"
diff_expr_compare_to <- "T21_vs_D21"
diff_expr_comparisons <- c("dox_effect", "dT21XIST_vs_T21", "wdT21XIST_vs_T21", "dwdT21SIT_vs_T21")
diff_expr_combined_padj_cutoff <- .1
gsea_compare_to <- "T21_vs_D21"
gsea_comparisons <- c("dox_effect", "dT21XIST_vs_T21", "wdT21XIST_vs_T21", "dwdT21SIT_vs_T21")
chr21_changes_comparisons <- c("dT21XIST_vs_T21", "wdT21XIST_vs_T21")
gsea_combined_padj_cutoff <- .2
lol_cutoff_primary <- .1
lol_cutoff_overlap <- .1
fig_output_dir <- sprintf("revision_figs/%s", str_replace(output_dir, "outputs/", ""))
table_output_dir <- sprintf("revision_tables/%s", str_replace(output_dir, "outputs/", ""))
dir.create(fig_output_dir)
dir.create(table_output_dir)
gene_info_df <- read_csv("input_data/gene_info_df.csv.gz")
sample_sheet_df <- read_csv(paste0(output_dir, "/tables/sample_sheet_df.csv.gz"))
batch_corrected_vsd_df <- read_csv(paste0(output_dir, "/tables/batch_corrected_vsd_salmon_df.csv.gz"))
combined_results_df <- read_csv(paste0(output_dir, "/tables/combined_results_df.csv.gz"))
neural_output_dir <- (read_csv(paste0(output_dir, "/params.csv.gz")) |>
filter(parameter == "neural_output_dir") |>
pull(value))[1]
neural_results_nonrtta_T21_vs_D21 <- read_csv(paste0(neural_output_dir, "/tables/results_neural_t21_d21.csv.gz"))
neural_combined_results_df <- read_csv(paste0(neural_output_dir, "/tables/combined_results_df.csv.gz"))
combined_results_df <- read_csv(paste0(output_dir, "/tables/combined_results_df.csv.gz")) |>
full_join(neural_combined_results_df) |>
mutate_at(vars(starts_with("padj")), list(~ if_else(is.na(.), 1, .))) |>
mutate_at(vars(starts_with("pvalue")), list(~ if_else(is.na(.), 1, .))) |>
mutate_at(vars(starts_with("log2FoldChange")), list(~ if_else(is.na(.), 0, .))) |>
mutate_at(vars(starts_with("stat")), list(~ if_else(is.na(.), 0, .))) |>
left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version"))
write_csv(combined_results_df, paste0(table_output_dir, "/deseq2_results.csv"))
combined_results_df |>
dplyr::select(ensembl_id, hgnc_symbol, chromosome_name, start_position, gene_biotype,
ends_with(c("T21_vs_D21", "D21_vs_T21", "dT21XIST_vs_T21",
"wdT21XIST_vs_T21", "dwdT21SIT_vs_T21",
"neural_t21_d21", "neural_t21xistdox_t21", "neural_t21xistwd_t21", "neural_t21xistdoxcombined_t21"))) |>
rename_all(
.funs = list(
~str_replace_all(.,"dwdT21SIT_vs_T21", "combinedT21XIST_vs_T21") |>
str_replace_all("neural_t21_d21", "neural_T21_vs_D21") |>
str_replace_all("neural_t21xistdox_t21", "neural_dT21XIST_vs_T21") |>
str_replace_all("neural_t21xistwd_t21", "neural_wdT21XIST_vs_T21") |>
str_replace_all("neural_t21xistdoxcombined_t21", "neural_combinedT21XIST_vs_T21")
)
) |>
write_csv(paste0(table_output_dir, "/Data_S1_bulk_deseq2.csv.gz"))
######################## Diff Expr 198-1 and 198-2 #############################
dds <- readRDS(sprintf("%s/tables/dds.rds", output_dir))
vsd <- varianceStabilizingTransformation(dds, blind = F)
sprintf("Outputting DESeq2 Results...\n")
diff_exprs <- list(
"1981_vs_1982" = c(
'cell_line == "198_1"',
'cell_line == "198_2"',
"timepoint|dox_status")
)
for (de_name in names(diff_exprs)) {
write_results(de_name,
diff_exprs[[de_name]][1],
diff_exprs[[de_name]][2],
dds,
outdir,
diff_exprs[[de_name]][3])
}
# MA plot between D21 and T21XIST
vsd_salmon_df <- read_csv(paste0(output_dir, "/tables/vsd_salmon_df.csv.gz"))
mean_d21_t21xist_vst_df <- vsd_salmon_df |>
pivot_longer(cols = -ensembl_id,
names_to = "sample_id",
values_to = "vst_count") |>
left_join(sample_sheet_df, by="sample_id") |>
filter(genotype == "D21" | (genotype == "T21XIST" & dox_status != "no")) |>
group_by(ensembl_id) |>
summarise(mean_vst_count = mean(vst_count))
aov_d21_t21xist_df <- aov(vst_count ~ group,
vsd_salmon_df |>
pivot_longer(cols = -ensembl_id,
names_to = "sample_id",
values_to = "vst_count") |>
left_join(sample_sheet_df, by="sample_id") |>
left_join(gene_info_df |> dplyr::select(ensembl_id=ensembl_gene_id_version, chromosome_name)) |>
# filter(chromosome_name != "21") |>
filter(genotype == "D21" | (genotype == "T21XIST" & dox_status != "no")) |>
mutate(group = case_when(
genotype == "D21" ~ "D21",
genotype == "T21XIST" & dox_status != "no" ~ "T21XIST"
))) |>
broom::tidy()
aov_t21_d21_df <- aov(vst_count ~ group,
vsd_salmon_df |>
pivot_longer(cols = -ensembl_id,
names_to = "sample_id",
values_to = "vst_count") |>
left_join(sample_sheet_df, by="sample_id") |>
left_join(gene_info_df |> dplyr::select(ensembl_id=ensembl_gene_id_version, chromosome_name)) |>
# filter(chromosome_name != "21") |>
filter(genotype == "D21" | genotype == "T21") |>
mutate(group = case_when(
genotype == "D21" ~ "D21",
genotype == "T21" ~ "T21"
))) |>
broom::tidy()
aov_t21_t21xist_df <- aov(vst_count ~ group,
vsd_salmon_df |>
pivot_longer(cols = -ensembl_id,
names_to = "sample_id",
values_to = "vst_count") |>
left_join(sample_sheet_df, by="sample_id") |>
left_join(gene_info_df |> dplyr::select(ensembl_id=ensembl_gene_id_version, chromosome_name)) |>
# filter(chromosome_name != "21") |>
filter(genotype == "T21" | (genotype == "T21XIST" & dox_status != "no")) |>
mutate(group = case_when(
genotype == "T21" ~ "T21",
genotype == "T21XIST" & dox_status != "no" ~ "T21XIST"
))) |>
broom::tidy()
aov_d21_t21xist_df |>
mutate(comparison = "T21XIST vs D21") |>
rbind(
aov_t21_t21xist_df |>
mutate(comparison = "T21XIST vs T21")) |>
rbind(
aov_t21_d21_df |>
mutate(comparison = "T21 vs D21")) |>
filter(term == "group") |>
ggplot(aes(x=comparison, y=statistic)) +
geom_col() +
labs(
title="ANOVA Test Statistic w/ chr21",
# title="ANOVA Test Statistic",
x="Groups Compared") +
geom_text(aes(label=sprintf("p = %.1g", p.value)), vjust=-0.2, size=3) +
# scale_y_continuous(limits=c(0,1)) +
scale_y_continuous(limits=c(0,2.4)) +
theme(
axis.text.x = element_text(angle=30, hjust=1),
axis.title.x = element_blank()
)
ggsave(sprintf("%s/anova_statistic.pdf", fig_output_dir), width = 2, height=2)
ggsave(sprintf("%s/anova_statistic_withchr21.pdf", fig_output_dir), width = 2, height=2)
ggplotly(combined_results_df |>
# filter(!(gene_biotype %in% c("rRNA", "TEC")) & !is.na(gene_biotype))|>
filter((gene_biotype %in% c("protein_coding")) & !is.na(gene_biotype))|>
select(ensembl_id, chromosome_name, hgnc_symbol, gene_biotype, ends_with("_dwdT21SIT_vs_D21")) |>
left_join(mean_d21_t21xist_vst_df) |>
mutate(colorby = case_when(
padj_dwdT21SIT_vs_D21 > .1 ~ "nonDE",
padj_dwdT21SIT_vs_D21 <= .1 & log2FoldChange_dwdT21SIT_vs_D21 > 0 ~ "upReg",
padj_dwdT21SIT_vs_D21 <= .1 & log2FoldChange_dwdT21SIT_vs_D21 < 0 ~ "downReg"
)) |>
ggplot(aes(x=mean_vst_count, y=log2FoldChange_dwdT21SIT_vs_D21, color=colorby, name=ensembl_id, biotype=gene_biotype)) +
geom_point() +
scale_color_manual(values=c("blue", "grey", "red"), limits=c("downReg", "nonDE", "upReg")))
# MA plot between D21 and T21
vsd_salmon_df <- read_csv(paste0(output_dir, "/tables/vsd_salmon_df.csv.gz"))
mean_d21_t21xist_vst_df <- vsd_salmon_df |>
pivot_longer(cols = -ensembl_id,
names_to = "sample_id",
values_to = "vst_count") |>
left_join(sample_sheet_df, by="sample_id") |>
filter(genotype == "D21" | (genotype == "T21")) |>
group_by(ensembl_id) |>
summarise(mean_vst_count = mean(vst_count))
ggplotly(combined_results_df |>
# filter(!(gene_biotype %in% c("rRNA", "TEC")) & !is.na(gene_biotype))|>
filter((gene_biotype %in% c("protein_coding")) & !is.na(gene_biotype))|>
select(ensembl_id, chromosome_name, hgnc_symbol, gene_biotype, ends_with("_T21_vs_D21")) |>
left_join(mean_d21_t21xist_vst_df) |>
mutate(colorby = case_when(
padj_T21_vs_D21 > .1 ~ "nonDE",
padj_T21_vs_D21 <= .1 & log2FoldChange_T21_vs_D21 > 0 ~ "upReg",
padj_T21_vs_D21 <= .1 & log2FoldChange_T21_vs_D21 < 0 ~ "downReg"
)) |>
ggplot(aes(x=mean_vst_count, y=log2FoldChange_T21_vs_D21, color=colorby, name=ensembl_id, biotype=gene_biotype)) +
geom_point() +
scale_color_manual(values=c("blue", "grey", "red"), limits=c("downReg", "nonDE", "upReg")))
################################################################################
combined_results_df |>
pivot_longer(
cols = c(tidyselect::ends_with(c(
"D21_vs_T21", "dT21XIST_vs_T21", "wdT21XIST_vs_T21", "dwdT21SIT_vs_T21",
"T21_vs_D21", "dT21XIST_vs_D21", "wdT21XIST_vs_D21", "dwdT21SIT_vs_D21"))),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(id_cols = c(ensembl_id, comparison),
names_from = value_type,
values_from = value) |>
left_join(gene_info_df |> dplyr::select(ensembl_gene_id_version, chromosome_name),
by=c("ensembl_id"="ensembl_gene_id_version")) |>
mutate(padj = case_when(is.na(padj) ~ 1,
T ~ padj),
chromosome_name = case_when(is.na(chromosome_name) ~ "NA",
T ~ chromosome_name)) |>
dplyr::count(comparison, chromosome_name == "21", log2FoldChange > 0, padj <= .01) |>
pivot_wider(names_from = comparison,
values_from = n) |>
arrange(`padj <= 0.01`, desc(`chromosome_name == "21"`), desc(`log2FoldChange > 0`)) |>
dplyr::select(`padj <= 0.01`, `chromosome_name == "21"`, `log2FoldChange > 0`,
T21_vs_D21, dT21XIST_vs_D21, dwdT21SIT_vs_D21,
dT21XIST_vs_T21, dwdT21SIT_vs_T21) |>
write_csv("revision_figs/diff_expr_counts.csv")
gene_updown_df <- combined_results_df |>
pivot_longer(
cols = c(tidyselect::ends_with(c(
"dT21XIST_vs_T21", "wdT21XIST_vs_T21", "dwdT21SIT_vs_T21",
"T21_vs_D21"))),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(id_cols = c(ensembl_id, comparison),
names_from = value_type,
values_from = value) |>
left_join(gene_info_df |> dplyr::select(ensembl_gene_id_version, chromosome_name),
by=c("ensembl_id"="ensembl_gene_id_version")) |>
mutate(padj = case_when(is.na(padj) ~ 1,
T ~ padj),
chromosome_name = case_when(is.na(chromosome_name) ~ "NA",
T ~ chromosome_name)) |>
dplyr::count(comparison, chromosome_name == "21", log2FoldChange > 0, padj <= .1) |>
pivot_wider(names_from = comparison,
values_from = n) |>
arrange(`padj <= 0.1`, desc(`chromosome_name == "21"`), desc(`log2FoldChange > 0`)) |>
dplyr::select(`padj <= 0.1`,
`chromosome_name == "21"`,
`log2FoldChange > 0`,
T21_vs_D21, dT21XIST_vs_T21, wdT21XIST_vs_T21, dwdT21SIT_vs_T21) |>
pivot_longer(cols = -c(`padj <= 0.1`,
`chromosome_name == "21"`,
`log2FoldChange > 0`),
names_to = "comparison",
values_to= "num_genes" ) |>
filter(`padj <= 0.1` == T) |>
mutate(`chromosome_name == "21"` = case_when(
`chromosome_name == "21"` == T ~ "chr21",
T ~ "non chr21"
))
binom_test_data_df <- gene_updown_df |>
pivot_wider(names_from = `log2FoldChange > 0`, values_from = "num_genes") |>
mutate(total = `TRUE` + `FALSE`) |>
mutate(binom_pval = purrr::pmap(.l = list(x=`TRUE`, tot=total),
.f = \(x, tot) (binom.test(x = x,
n = tot,
alternative = "two.sided")$p.value)[1])) |>
tidyr::unnest(binom_pval)
ggplot(gene_updown_df) +
geom_col(aes(x=comparison, y=num_genes, fill=`log2FoldChange > 0`), position = position_dodge()) +
facet_grid(rows = vars(`chromosome_name == "21"`), scales = "free_y") +
geom_text(data = binom_test_data_df |>
mutate(max_count = pmax(`TRUE`, `FALSE`)),
mapping = aes(x=comparison, y=max_count*1.2, label=sprintf("%.2g", binom_pval), vjust=1),
size=3) +
scale_fill_aaas() +
scale_x_discrete(limits = c("T21_vs_D21", "dT21XIST_vs_T21", "wdT21XIST_vs_T21", "dwdT21SIT_vs_T21"),
labels = c("T21 vs D21", "+dox vs T21", "w/d vs T21", "combined vs T21")) +
theme(axis.title.x = element_blank()) +
labs(y = "# Genes",
title = "Differential Expression by Chromosome")
ggsave(sprintf("%s/diff_expr_by_chr.pdf", fig_output_dir), width = 4, height = 3)
combined_results_df |>
pivot_longer(
cols = c(tidyselect::ends_with(c(
"D21_vs_T21", "dT21XIST_vs_T21", "wdT21XIST_vs_T21", "dwdT21SIT_vs_T21",
"T21_vs_D21", "dT21XIST_vs_D21", "wdT21XIST_vs_D21", "dwdT21SIT_vs_D21"))),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(id_cols = c(ensembl_id, comparison),
names_from = value_type,
values_from = value) |>
left_join(gene_info_df |> dplyr::select(ensembl_gene_id_version, chromosome_name),
by=c("ensembl_id"="ensembl_gene_id_version")) |>
mutate(padj = case_when(is.na(padj) ~ 1,
T ~ padj),
chromosome_name = case_when(is.na(chromosome_name) ~ "NA",
T ~ chromosome_name),
is21 = case_when(chromosome_name == "21" ~ "chr21",
T ~ "non chr21")) |>
filter(comparison %in% c("dwdT21SIT_vs_T21", "dwdT21SIT_vs_D21")) |>
ggplot() +
geom_density(aes(x=padj, color = comparison)) +
facet_grid(rows = vars(is21)) +
scale_color_aaas(breaks = c("dwdT21SIT_vs_T21", "dwdT21SIT_vs_D21"), labels = c("T21XIST vs T21", "T21XIST vs D21")) +
scale_x_log10(limits=c(1e-4, 1e0), oob = scales::squish) +
labs(title="Distribution of padj")
ggsave(sprintf("%s/2_dist_padj.png", fig_output_dir))
ggsave(sprintf("%s/2_dist_padj_linear.png", fig_output_dir))
combined_results_df |>
pivot_longer(
cols = c(tidyselect::ends_with(c(
"D21_vs_T21", "dT21XIST_vs_T21", "wdT21XIST_vs_T21", "dwdT21SIT_vs_T21",
"T21_vs_D21", "dT21XIST_vs_D21", "wdT21XIST_vs_D21", "dwdT21SIT_vs_D21"))),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(id_cols = c(ensembl_id, comparison),
names_from = value_type,
values_from = value) |>
left_join(gene_info_df |> dplyr::select(ensembl_gene_id_version, chromosome_name),
by=c("ensembl_id"="ensembl_gene_id_version")) |>
mutate(padj = case_when(is.na(padj) ~ 1,
T ~ padj),
chromosome_name = case_when(is.na(chromosome_name) ~ "NA",
T ~ chromosome_name),
is21 = case_when(chromosome_name == "21" ~ "chr21",
T ~ "non chr21")) |>
filter(comparison %in% c("dwdT21SIT_vs_T21", "dwdT21SIT_vs_D21") &
padj <= .01) |>
ggplot() +
geom_density(aes(x=log2FoldChange, color = comparison)) +
facet_grid(rows = vars(is21)) +
geom_vline(xintercept = 0) +
scale_color_aaas(breaks = c("dwdT21SIT_vs_T21", "dwdT21SIT_vs_D21"), labels = c("T21XIST vs T21", "T21XIST vs D21")) +
scale_x_continuous(limits = c(-1.5, 1.5), breaks = c(seq(0,15, by=2)/10, seq(0,15, by=2)/-10), oob = scales::squish) +
labs(title="Distribution of significant log2FC (padj <= .01)")
ggsave(sprintf("%s/2_dist_log2fc_01.png", fig_output_dir))
file.copy(fs::dir_ls(path = sprintf("%s/tables", neural_output_dir),
glob = sprintf(
"%s/tables/gsea_*csv.gz",
neural_output_dir |>
str_replace_all("\\+", "\\\\+") |>
str_replace_all("\\,", "\\\\,")
)),
sprintf("%s/tables", output_dir),
overwrite=TRUE)
gsea_files <- fs::dir_ls(
path = sprintf("%s/tables", output_dir),
glob = sprintf(
"%s/tables/gsea_*csv.gz",
output_dir |>
str_replace_all("\\+", "\\\\+") |>
str_replace_all("\\,", "\\\\,")
)
)
print(gsea_files)
combined_enrichment_df <- read_csv(gsea_files,
id = "origin_file") |>
mutate(comparison = str_replace_all(origin_file, sprintf("(%s/tables/gsea_)|(.csv.gz)", output_dir |>
str_replace_all("\\+", "\\\\+") |>
str_replace_all("\\(", "\\\\(") |>
str_replace_all("\\)", "\\\\)") |>
str_replace_all("\\,", "\\\\,")), "")) |>
mutate(
leadingEdgeTags = as.numeric(str_replace_all(str_split(leading_edge, ", ", simplify = T)[, 1], "tags=|%", "")) / 100,
leadingEdgeList = as.numeric(str_replace_all(str_split(leading_edge, ", ", simplify = T)[, 2], "list=|%", "")) / 100,
leadingEdgeSignal = as.numeric(str_replace_all(str_split(leading_edge, ", ", simplify = T)[, 3], "signal=|%", "")) / 100,
numCoreEnrichment = str_count(str_split(core_enrichment, "/")) + 1
) |>
pivot_wider(id_cols = ID, names_from = comparison, values_from = c(NES, p.adjust, qvalue, enrichmentScore, setSize, leadingEdgeTags, leadingEdgeList, leadingEdgeSignal, numCoreEnrichment)) |>
mutate_at(vars(starts_with("p.adjust")), list(~ if_else(is.na(.), 1, .))) |>
mutate_at(vars(starts_with("qvalues")), list(~ if_else(is.na(.), 1, .))) |>
mutate_at(vars(starts_with("NES")), list(~ if_else(is.na(.), 0, .))) |>
mutate_at(vars(starts_with("enrichmentScore")), list(~ if_else(is.na(.), 0, .))) |>
# left_join(term_clusters_df, by = c("ID" = "term_name")) |>
# left_join(advanced_terms_clusters_df |> dplyr::select(clustered, top_words)) |>
dplyr::mutate(category = dplyr::case_when(
str_detect(ID, "STRESS") ~ "stress",
str_detect(ID, "ENDOPLASMIC_RETICULUM") ~ "ER",
str_detect(ID, "chr21") ~ "chr21",
str_detect(ID, "TRANSLATION") ~ "translation",
str_detect(ID, "RIBOSOM") ~ "ribosome",
str_detect(ID, "SERINE") & (!str_detect(ID, "SERINE_THREONINE")) ~ "serine",
str_detect(ID, "MITOCH") ~ "mitochondria",
str_detect(ID, "NRF2") ~ "stress",
# str_detect(ID, "FOLATE") ~ "Folate",
# str_detect(ID, "MTORC1") ~ "MTORC1",
str_detect(ID, "APOPTO") | str_detect(ID, "PROGRAMMED_CELL_DEATH") ~ "apoptosis",
# str_detect(top_words, "EXTRACELLULAR|MATRIX") ~ "ECM",
T ~ "other"
))
write_csv(combined_enrichment_df, paste0(table_output_dir, "/bulk_gsea_results.csv"))
combined_enrichment_df |>
dplyr::select(ID, category,
ends_with(c("T21_vs_D21", "D21_vs_T21", "dT21XIST_vs_T21",
"wdT21XIST_vs_T21", "dwdT21SIT_vs_T21",
"neural_t21_d21", "neural_t21xistdox_t21", "neural_t21xistwd_t21", "neural_t21xistdoxcombined_t21"))) |>
rename_all(
.funs = list(
~str_replace_all(.,"dwdT21SIT_vs_T21", "combinedT21XIST_vs_T21") |>
str_replace_all("neural_t21_d21", "neural_T21_vs_D21") |>
str_replace_all("neural_t21xistdox_t21", "neural_dT21XIST_vs_T21") |>
str_replace_all("neural_t21xistwd_t21", "neural_wdT21XIST_vs_T21") |>
str_replace_all("neural_t21xistdoxcombined_t21", "neural_combinedT21XIST_vs_T21")
)
) |>
write_csv(paste0(table_output_dir, "/Data_S2_bulk_gsea.csv.gz"))
## PCA & dendro
batch_corrected_vsd <- batch_corrected_vsd_df |>
column_to_rownames(var="ensembl_id")
pca <- batch_corrected_vsd |>
t() |>
prcomp()
pca["x"] |>
as.data.frame() |>
rownames_to_column(var = "sample_id") |>
rename_with(~ str_remove(.x, "x\\.")) |>
left_join(sample_sheet_df, by="sample_id") |>
mutate(
designation = case_when(
cell_line %in% c("198_1", "198_2") ~ "Euploid",
cell_line == "198_5" ~ "T21",
dox_status == "no" ~ "T21",
dox_status == "yes" ~ "Euploid",
dox_status == "withdrawal" ~ "w/d"
),
cell_line = factor(case_when(
cell_line == "rtTA_XIST_c1" ~ "c1",
cell_line == "rtTA_XIST_c3" ~ "c3",
cell_line == "rtTA_XIST_c4" ~ "c4",
cell_line == "rtTA_XIST_c7" ~ "c7",
cell_line == "198_1" ~ "198-1",
cell_line == "198_2" ~ "198-2",
cell_line == "198_5" ~ "198-5"
),
levels = c("c1", "c3", "c4", "c7", "198-1", "198-2", "198-5")
)
) |>
ggplot(aes(x = PC1, y = PC2, color = designation, shape = cell_line)) +
geom_point(size = 1.5) +
scale_color_manual(values = pal_d3()(5)[3:5], name = "Designation") +
scale_shape_manual(limits=c("c1", "c4", "c7", "198-1", "198-2", "198-5"), values = c(15,16,17,0,1,2), name="Cell Line") +
# geom_label(aes(label = sample_id)) +
labs(
x = sprintf("PC%d (%.2f%%)", 1, (pca$sdev^2 / sum(pca$sdev^2) * 100)[1]),
y = sprintf("PC%d (%.2f%%)", 2, (pca$sdev^2 / sum(pca$sdev^2) * 100)[2]),
title = "PCA"
) +
theme(legend.box.margin = margin(l=-3, r=-2, unit = "mm"))
ggsave(sprintf("%s/pca.pdf", fig_output_dir), width = 3.12, height = 2.3)
sampleTree <- hclust(dist(t(batch_corrected_vsd)), method = "average")
# ggdendrogram(sampleTree) + labs(title = "WGCNA Sample Dendrogram")
sample_dend_data <- dendro_data(sampleTree, type = "rectangle")
sample_dend_data$labels <- sample_dend_data$labels %>%
left_join(sample_sheet_df |>
dplyr::select(sample_id, dox_status, genotype, effective_ploidy), by = c("label" = "sample_id"))
ggplot() +
geom_segment(
data = sample_dend_data$segments,
mapping = aes(x = x, y = y, xend = xend, yend = yend)
) +
geom_text(
data = sample_dend_data$labels,
aes(x = x, y = y, label = label),
angle = 90, size = 3
) +
geom_tile(
data = sample_dend_data$labels,
aes(x = x, y = -6, fill = effective_ploidy), height = 2
) +
annotate(geom = "text", x = 50, y = -6, label = "Effective Ploidy", vjust = .5) +
scale_x_continuous(limits = c(0, 60)) +
scale_fill_npg() +
geom_tile(
data = sample_dend_data$labels,
aes(x = x, y = -9, fill = genotype), height = 2
) +
annotate(geom = "text", x = 50, y = -9, label = "Genotype", hjust = 0) +
geom_tile(
data = sample_dend_data$labels,
aes(x = x, y = -12, fill = dox_status), height = 2
) +
annotate(geom = "text", x = 50, y = -12, label = "Dox Status", hjust = 0)
## DESeq2 Results Summary
setups_df <- read_csv(paste0(output_dir, "/diff_exprs.csv.gz"))
## Chr21 Changes
gene_counts_df <- batch_corrected_vsd_df |>
pivot_longer(cols = -ensembl_id, names_to = "sample_id", values_to = "vsd_count") |>
left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |>
left_join(sample_sheet_df)
gene_counts_df |>
filter(hgnc_symbol == "XIST" &
genotype == "T21XIST") |>
mutate(
dox_status = case_when(
dox_status == "yes" ~ "+dox",
dox_status == "no" ~ "-dox",
dox_status == "withdrawal" ~ "w/d"
),
dox_status = factor(dox_status, levels = c("-dox", "+dox", "w/d")),
clone = case_when(
cell_line == "rtTA_XIST_c1" ~ "c1",
cell_line == "rtTA_XIST_c3" ~ "c3",
cell_line == "rtTA_XIST_c4" ~ "c4",
cell_line == "rtTA_XIST_c7" ~ "c7"
),
timepoint = case_when(
timepoint == "14wk" ~ "14wk (6)",
timepoint == "17wk" ~ "17wk (6)",
timepoint == "20wk" ~ "20wk (9)",
timepoint == "3wk" ~ "3wk (6)",
timepoint == "6wk" ~ "6wk (9)",
timepoint == "9wk" ~ "9wk (8)",
T ~ as.character(timepoint)
),
timepoint = factor(timepoint, levels=rev(c("3wk (6)", "6wk (9)", "9wk (8)",
"14wk (6)", "17wk (6)", "20wk (9)")))
) |>
arrange(timepoint) |>
ggplot(aes(y = vsd_count, x = clone, color = timepoint)) +
facet_grid(cols = vars(dox_status)) +
geom_violin(color = "lightgrey") +
ggbeeswarm::geom_quasirandom(size = 1) +
scale_color_manual(values = c(pal_aaas()(4), pal_d3()(2)[2], pal_cosmic()(4)[4]), name="Timepoint",
limits=c("3wk (6)", "6wk (9)", "9wk (8)",
"14wk (6)", "17wk (6)", "20wk (9)")) +
# scale_color_d3(name = "Timepoint") +
theme(axis.title.x = element_blank()) +
labs(
title = "XIST Expression",
y = "VST Count"
)
ggsave(sprintf("%s/xist_expression.pdf", fig_output_dir), width = 3, height = 1.7)
# using complexheatmap to add more sample and gene annotations
chr21_rownorm_vsd <- batch_corrected_vsd_df |>
pivot_longer(cols = -ensembl_id, names_to = "sample_id", values_to = "vst_count") |>
left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |>
left_join(sample_sheet_df, by = c("sample_id")) |>
filter(chromosome_name == "21") |>
group_by(ensembl_id) |>
mutate(
row_norm_count = vst_count - mean(cur_data() %>% filter(`Cell Line` == "198-5") %>% pull(vst_count)),
mean_1985_count = mean(cur_data() %>% filter(`Cell Line` == "198-5") %>% pull(vst_count))
) |>
ungroup() |>
mutate(dox_status = factor(dox_status, levels = c("no", "yes", "withdrawal")))
chr21_rownorm_hm_df <- chr21_rownorm_vsd |>
filter( # mean_1985_count>8 &
!(gene_biotype %in% c("rRNA", "TEC"))) |>
mutate(
gene_biotype = factor(case_when(
gene_biotype == "protein_coding" ~ "Protein\nCoding",
gene_biotype == "processed_pseudogene" ~ "Pseudo-\ngene",
gene_biotype == "transcribed_processed_pseudogene" ~ "Pseudo-\ngene",
T ~ gene_biotype),
levels = c("Protein\nCoding", "lncRNA", "Pseudo-\ngene")))
ipsc_rownorm_mat <- chr21_rownorm_hm_df |>
pivot_wider(id_cols = ensembl_id,
names_from = sample_id,
values_from = row_norm_count) |>
column_to_rownames(var="ensembl_id")
t21count_col_fun <- circlize::colorRamp2(c(8,16), c("#ffffff", "#7105E0"))
gene_annotation <- rowAnnotation(
`Mean T21 Count` =
anno_simple(
x=chr21_rownorm_hm_df |>
dplyr::select(ensembl_id, mean_1985_count) |>
dplyr::distinct() |>
deframe(),
col = t21count_col_fun,
simple_anno_size = unit(3, "mm")),
annotation_name_gp= gpar(fontsize = 7))
lgd_t21count = Legend(title = "Mean T21\nCount", col_fun = t21count_col_fun, at = c(8,10,12,14,16),
labels = c(8,10,12,14,16),
direction = "horizontal",
labels_gp=gpar(fontsize=7),
title_gp=gpar(fontsize=7),
grid_height = unit(2, "mm"))
sample_annotation <- columnAnnotation(
`Cell Line` = chr21_rownorm_hm_df |>
dplyr::select(sample_id, `Cell Line`) |>
dplyr::distinct() |>
mutate(`Cell Line` = str_replace(`Cell Line`, "rtTA-XIST", "T21XIST")) |>
column_to_rownames(var="sample_id") |>
deframe(),
`Batch` = chr21_rownorm_hm_df |>
dplyr::select(sample_id, `timepoint`) |>
dplyr::distinct() |>
column_to_rownames(var="sample_id") |>
deframe() |>
factor(levels=c("3wk", "6wk", "9wk", "14wk", "17wk", "20wk")),
simple_anno_size = unit(3, "mm"),
annotation_legend_param = list(labels_gp=gpar(fontsize=7), title_gp=gpar(fontsize=7), grid_height = unit(2, "mm"), grid_width = unit(2, "mm")),
col = list(`Cell Line` = c("198-1"="#1697a6", "198-2"="#0e606b", "198-5"="#ffc24b",
"T21XIST c1"="#fff4f1", "T21XIST c4"="#ffb3ae", "T21XIST c7"="#f47068"),
`Batch` = c("3wk"="#faedcd", "6wk"="#e7c8a0", "9wk"="#d4a373",
"14wk"="#a1c349", "17wk"="#87a330", "20wk"="#6a8532")),
annotation_name_gp= gpar(fontsize = 7)
)
ipsc_hm <- Heatmap(ipsc_rownorm_mat,
col=circlize::colorRamp2(c(-1, 0, 1), c("#3784AF", "#f7f7f7", "#D34B16")),
# col=circlize::colorRamp2(c(-1, 0, 1), c("#0571B0", "#f7f7f7", "#CA0020")),
name="Row Norm\nVST Count",
border=T,
cluster_rows = F,
row_order = chr21_rownorm_hm_df |> dplyr::select(ensembl_id, mean_1985_count) |> deframe() |> sort() |> names() |> unique() |> rev(),
row_split = chr21_rownorm_hm_df |>
dplyr::select(ensembl_id, gene_biotype) |>
distinct() |>
deframe(),
row_title_side = "right",
row_gap = unit(1, "mm"),
show_row_names = F,
row_title_gp = gpar(fontsize = 7),
row_title_rot = 45,
column_title_gp = gpar(fontsize = 7),
column_names_gp = gpar(fontsize = 7),
left_annotation = gene_annotation,
show_column_dend=F,
column_split = chr21_rownorm_hm_df |>
mutate(split_label = case_when(
genotype == "T21XIST" & dox_status == "yes" ~ "+dox",
genotype == "T21XIST" & dox_status == "no" ~ "-dox",
genotype == "T21XIST" & dox_status == "withdrawal" ~ "w/d",
T ~ genotype)) |>
dplyr::select(sample_id, split_label) |>
distinct() |>
deframe() |>
factor(levels=c("T21", "-dox", "+dox", "w/d", "D21")),
cluster_column_slices=F,
column_gap = unit(1, "mm"),
show_column_names = F,
bottom_annotation = sample_annotation,
heatmap_legend_param = list(direction = "horizontal",
labels_gp=gpar(fontsize=7),
title_gp=gpar(fontsize=7),
grid_height = unit(2, "mm")))
pdf(sprintf("%s/expression_relative_to_1985.pdf", fig_output_dir), width = 3.5, height = 4.7)
draw(ipsc_hm, annotation_legend_list = list(lgd_t21count),
merge_legend=T, heatmap_legend_side = "bottom", annotation_legend_side = "bottom")
dev.off()
chr21_changes_comparisons <- c("D21_vs_T21", "dT21XIST_vs_T21", "wdT21XIST_vs_T21")
chr21_changes_comparison_cols <- purrr::cross(list(
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"),
chr21_changes_comparisons
)) |> map(lift(paste0)) |> unlist()
chr21_changes_df <- combined_results_df |>
# left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |>
dplyr::filter(chromosome_name == "21") |>
pivot_longer(
cols = c(ends_with(chr21_changes_comparison_cols)),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
dplyr::mutate(
# log2FoldChange = log2FoldChange_dox_t21xist_vs_1985,
# padj = padj_dox_t21xist_vs_1985,
# baseMean = baseMean_dox_t21xist_vs_1985,
color_label = case_when(
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .1), log2((2 / 3) + .1)) ~ "padj≤.1 & FC=(2/3)±.1",
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .15), log2((2 / 3)+.15)) ~ "padj≤.1 & FC=(2/3)±.15",
padj <= .1 ~ "padj≤.1",
T ~ "padj>.1"
)
)
chr21_arms <- read_tsv("http://hgdownload.cse.ucsc.edu/goldenpath/hg38/database/cytoBand.txt.gz",
col_names = c("chrom","chromStart","chromEnd","name","gieStain")) |>
filter(chrom == "chr21") |>
mutate(arm = substring(name, 1, 1)) |>
group_by(chrom, arm) |>
summarise(start_pos = min(chromStart),
end_pos = max(chromEnd),
len = end_pos - start_pos)
chr21_changes_df |>
dplyr::select(ensembl_id, start_position, end_position) |>
unique() |>
filter(start_position < 12000000) |>
arrange(start_position) |>
top_n(-1)
ggplot(
chr21_changes_df |>
mutate(comparison = factor(case_when(
comparison == "D21_vs_T21" ~ "D21",
comparison == "dT21XIST_vs_T21" ~ "+dox",
comparison == "wdT21XIST_vs_T21" ~ "w/d",
T ~ comparison), levels = c("D21", "+dox", "w/d"))),
aes(x = fct_reorder(ensembl_id, start_position), y = log2FoldChange, color = color_label, size = baseMean)
) +
facet_grid(rows = vars(comparison)) +
geom_point(alpha = .5) +
scale_color_manual(
values = c(pal_d3()(3)[c(1, 3, 2)], pal_uchicago()(2)[2]),
limits = c("padj≤.1 & FC=(2/3)±.1", "padj≤.1 & FC=(2/3)±.15", "padj≤.1", "padj>.1")
) +
scale_size_area(limits = c(NA, 10000), oob = scales::squish, max_size = 4, breaks=c(1,10,100,1000,10000), guide = "none") +
labs(
title = "chr21 Dosage Correction",
color = "Gene Category",
size = "Base Mean Expression",
x = "chr21 Genes (ordered by position)",
y = "log2(FC)"
) +
geom_hline(yintercept = log2(2 / 3), linetype = "dashed", size = .3) +
geom_hline(yintercept = 0, linetype = "solid") +
geom_vline(xintercept = "ENSG00000157540.22", linetype = "dotdash", size = .3) +
# geom_vline(xintercept = "ENSG00000277117.5", linetype = "dotted", size=.8) +
guides(color=guide_legend(ncol=2, label.position = "right", label.hjust = .1)) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "top",
# legend.position = "right",
legend.box.margin = margin(t=-5,b=-9),
legend.spacing.y = unit(1, "mm")
# legend.spacing.x = unit(3, "mm"),
# legend.margin = margin(3,3,3,3, "mm"),
# legend.box.background = element_rect(color = "black", size=.5)
) +
scale_y_continuous(breaks = c(-1,0, 1)) +
coord_cartesian(ylim = c(-1.5, 1.5))
ggsave(sprintf("%s/hsa21_dosage_correction.pdf", fig_output_dir), width = 3.5, height = 2.75, device = cairo_pdf)
ggsave(sprintf("%s/hsa21_dosage_correction_basemeanscale.pdf", fig_output_dir), width = 3.5, height = 2.75, device = cairo_pdf)
chr21_changes_df |>
mutate(comparison = factor(case_when(
comparison == "D21_vs_T21" ~ "D21",
comparison == "dT21XIST_vs_T21" ~ "+dox",
comparison == "wdT21XIST_vs_T21" ~ "w/d",
T ~ comparison), levels = c("D21", "+dox", "w/d")),
baseMean = case_when(is.na(baseMean) ~ 0,
T ~ baseMean)) |>
dplyr::count(comparison, color_label, baseMean >= 1000) |>
pivot_wider(names_from = `baseMean >= 1000`, values_from = n) |>
dplyr::rename("baseMean>=1000"=`TRUE`, "baseMean<1000"=`FALSE`) |>
write_csv(sprintf("%s/ipsc_hsa21_dosage_correction_counts.csv", table_output_dir), quote = "all")
chr21_changes_comparisons <- c("dT21XIST_vs_T21", "wdT21XIST_vs_T21")
combined_results_df |>
# left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |>
dplyr::filter(chromosome_name == "21") |>
pivot_longer(
cols = c(ends_with(purrr::cross(list(
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"),
c(chr21_changes_comparisons, "dwdT21SIT_vs_T21")
)) |> map(lift(paste0)) |> unlist())),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
dplyr::mutate(
# log2FoldChange = log2FoldChange_dox_t21xist_vs_1985,
# padj = padj_dox_t21xist_vs_1985,
# baseMean = baseMean_dox_t21xist_vs_1985,
color_label = case_when(
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .1), log2((2 / 3) + .1)) ~ "padj≤.1 & FC=(2/3)±.1",
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .15), log2((2 / 3)+.15)) ~ "padj≤.1 & FC=(2/3)±.15",
padj <= .1 ~ "padj≤.1",
T ~ "padj>.1"
)
) |>
mutate(comparison = case_when(comparison == "dT21XIST_vs_T21" ~ "+dox",
comparison == "wdT21XIST_vs_T21" ~ "w/d",
comparison == "dwdT21SIT_vs_T21" ~ "combined",
T ~ comparison)) |>
# filter(padj <= .1) |>
filter(baseMean >= 1000) |>
ggplot() +
geom_vline(xintercept = log2(.9 * (2 / 3)), color = pal_d3()(3)[1], linetype="dotted", size=.3) +
geom_vline(xintercept = log2(1.1 * (2 / 3)), color = pal_d3()(3)[1], linetype="dotted", size=.3) +
geom_vline(xintercept = log2(.85 * (2 / 3)), color = pal_d3()(3)[3], linetype="dotted", size=.3) +
geom_vline(xintercept = log2(1.15 * (2 / 3)), color = pal_d3()(3)[3], linetype="dotted", size=.3) +
annotate("rect", xmin = log2(.9 * (2 / 3)), xmax = log2(1.1 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[1], alpha = .3) +
annotate("rect", xmin = log2(.85 * (2 / 3)), xmax = log2(.9 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[3], alpha = .3) +
annotate("rect", xmin = log2(1.1 * (2 / 3)), xmax = log2(1.15 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[3], alpha = .3) +
geom_density(aes(x = log2FoldChange, color = comparison)) +
scale_color_manual(limits=c("+dox", "combined", "w/d"), values=pal_d3()(5)[c(4,2,5)]) +
geom_vline(xintercept = log2(2 / 3), linetype = "dashed", color = "black", size = .3) +
geom_vline(xintercept = 0, linetype = "solid", color = "black", size = .3) +
annotate("text", x = -1.2, y = 2, label = "log2(2/3)", color = "black", size = 2) +
coord_cartesian(xlim = c(-1.5, 1.5)) +
labs(y = "Density", x = "log2(FC)", title = "chr21 Log2(FC) (baseMean >= 1000)") +
theme(legend.position = c(.8, .5))
ggsave(sprintf("%s/hsa21_log2fc_1000.pdf", fig_output_dir), width = 3.5, height = 1.5)
# ggsave(sprintf("%s/hsa21_log2fc.pdf", fig_output_dir), width = 3.25, height = 1.5)
combined_results_df |>
# left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |>
dplyr::filter(chromosome_name == "21") |>
pivot_longer(
cols = c(ends_with(purrr::cross(list(
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"),
c(chr21_changes_comparisons, "dwdT21SIT_vs_T21")
)) |> map(lift(paste0)) |> unlist())),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
dplyr::mutate(
# log2FoldChange = log2FoldChange_dox_t21xist_vs_1985,
# padj = padj_dox_t21xist_vs_1985,
# baseMean = baseMean_dox_t21xist_vs_1985,
color_label = case_when(
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .1), log2((2 / 3) + .1)) ~ "padj≤.1 & FC=(2/3)±.1",
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .15), log2((2 / 3)+.15)) ~ "padj≤.1 & FC=(2/3)±.15",
padj <= .1 ~ "padj≤.1",
T ~ "padj>.1"
)
) |>
mutate(comparison = case_when(comparison == "dT21XIST_vs_T21" ~ "+dox",
comparison == "wdT21XIST_vs_T21" ~ "w/d",
comparison == "dwdT21SIT_vs_T21" ~ "combined",
T ~ comparison)) |>
filter(padj <= .1) |>
group_by(comparison) |>
summarise(num_genes = length(unique(ensembl_id)))
combined_results_df |>
dplyr::filter(chromosome_name == "21") |>
pivot_longer(
cols = c(ends_with(purrr::cross(list(
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"),
c("dT21XIST_vs_D21", "wdT21XIST_vs_D21", "dwdT21SIT_vs_D21")
)) |> map(lift(paste0)) |> unlist())),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
dplyr::mutate(
color_label = case_when(
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .1), log2((2 / 3) + .1)) ~ "padj≤.1 & FC=(2/3)±.1",
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .15), log2((2 / 3)+.15)) ~ "padj≤.1 & FC=(2/3)±.15",
padj <= .1 ~ "padj≤.1",
T ~ "padj>.1"
)
) |>
mutate(comparison = case_when(comparison == "dT21XIST_vs_D21" ~ "+dox",
comparison == "wdT21XIST_vs_D21" ~ "w/d",
comparison == "dwdT21SIT_vs_D21" ~ "combined",
T ~ comparison)) |>
# filter(padj <= .1) |>
filter(baseMean >= 1000) |>
ggplot() +
# geom_vline(xintercept = log2(.9 * (2 / 3)), color = pal_d3()(3)[1], linetype="dotted", size=.3) +
# geom_vline(xintercept = log2(1.1 * (2 / 3)), color = pal_d3()(3)[1], linetype="dotted", size=.3) +
# geom_vline(xintercept = log2(.85 * (2 / 3)), color = pal_d3()(3)[3], linetype="dotted", size=.3) +
# geom_vline(xintercept = log2(1.15 * (2 / 3)), color = pal_d3()(3)[3], linetype="dotted", size=.3) +
# annotate("rect", xmin = log2(.9 * (2 / 3)), xmax = log2(1.1 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[1], alpha = .3) +
# annotate("rect", xmin = log2(.85 * (2 / 3)), xmax = log2(.9 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[3], alpha = .3) +
# annotate("rect", xmin = log2(1.1 * (2 / 3)), xmax = log2(1.15 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[3], alpha = .3) +
geom_density(aes(x = log2FoldChange, color = comparison)) +
scale_color_manual(limits=c("+dox", "combined", "w/d"), values=pal_d3()(5)[c(4,2,5)]) +
# geom_vline(xintercept = log2(2 / 3), linetype = "dashed", color = "black", size = .3) +
geom_vline(xintercept = 0, linetype = "solid", color = "black", size = .3) +
# annotate("text", x = -1.2, y = 2, label = "log2(2/3)", color = "black", size = 2) +
coord_cartesian(xlim = c(-1.5, 1.5)) +
labs(y = "Density", x = "log2(FC)", title = "chr21 Log2(FC) from D21\niPSCs (baseMean >= 1000)") +
theme(legend.position = c(.8, .7))
ggsave(sprintf("%s/hsa21_log2fc_d21_1000.pdf", fig_output_dir), width = 3.5, height = 2)
combined_results_df |>
# left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |>
dplyr::filter(chromosome_name == "21") |>
pivot_longer(
cols = c(ends_with(purrr::cross(list(
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"),
c(chr21_changes_comparisons, "dwdT21SIT_vs_T21")
)) |> map(lift(paste0)) |> unlist())),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
dplyr::mutate(
# log2FoldChange = log2FoldChange_dox_t21xist_vs_1985,
# padj = padj_dox_t21xist_vs_1985,
# baseMean = baseMean_dox_t21xist_vs_1985,
color_label = case_when(
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .1), log2((2 / 3) + .1)) ~ "padj≤.1 & FC=(2/3)±.1",
padj <= .1 & dplyr::between(log2FoldChange, log2((2 / 3) - .15), log2((2 / 3)+.15)) ~ "padj≤.1 & FC=(2/3)±.15",
padj <= .1 ~ "padj≤.1",
T ~ "padj>.1"
)
) |>
mutate(comparison = case_when(comparison == "dT21XIST_vs_T21" ~ "+dox",
comparison == "wdT21XIST_vs_T21" ~ "w/d",
comparison == "dwdT21SIT_vs_T21" ~ "combined",
T ~ comparison)) |>
filter(padj <= .1) |>
ggplot() +
geom_vline(xintercept = log2(.9 * (2 / 3)), color = pal_d3()(3)[1], linetype="dotted", size=.3) +
geom_vline(xintercept = log2(1.1 * (2 / 3)), color = pal_d3()(3)[1], linetype="dotted", size=.3) +
geom_vline(xintercept = log2(.85 * (2 / 3)), color = pal_d3()(3)[3], linetype="dotted", size=.3) +
geom_vline(xintercept = log2(1.15 * (2 / 3)), color = pal_d3()(3)[3], linetype="dotted", size=.3) +
annotate("rect", xmin = log2(.9 * (2 / 3)), xmax = log2(1.1 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[1], alpha = .3) +
annotate("rect", xmin = log2(.85 * (2 / 3)), xmax = log2(.9 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[3], alpha = .3) +
annotate("rect", xmin = log2(1.1 * (2 / 3)), xmax = log2(1.15 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[3], alpha = .3) +
geom_density(aes(x = log2FoldChange, color = comparison)) +
scale_color_manual(limits=c("+dox", "combined", "w/d"), values=pal_d3()(5)[c(4,2,5)]) +
geom_vline(xintercept = log2(2 / 3), linetype = "dashed", color = "black", size = .3) +
geom_vline(xintercept = 0, linetype = "solid", color = "black", size = .3) +
annotate("text", x = -1.2, y = 2, label = "log2(2/3)", color = "black", size = 2) +
coord_cartesian(xlim = c(-1.5, 1.5)) +
labs(y = "Density", x = "log2(FC)", title = "HSA21 Significant Log2(FC)") +
theme(legend.position = c(.8, .7))
batch_corrected_vsd_df |>
pivot_longer(
cols = -ensembl_id,
names_to = "sample_id",
values_to = "vst_count"
) |>
left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |>
dplyr::filter(chromosome_name == "21") |>
left_join(sample_sheet_df, by="sample_id") |>
mutate(sample_group = case_when(genotype %in% c("D21", "T21") ~ genotype,
genotype == "T21XIST" ~ paste(dox_status, timepoint, sep="_"))) |>
group_by(ensembl_id, sample_group) |>
summarise(mean_vst = mean(vst_count)) |>
ungroup() |>
pivot_wider(id_cols = "ensembl_id", names_from="sample_group", values_from = "mean_vst") |>
pivot_longer(cols = -c(ensembl_id, T21), names_to = "sample_group", values_to = "mean_vst") |>
mutate(diff_from_t21 = mean_vst - T21,
dox_status = str_extract(sample_group, "D21|no|yes|withdrawal"),
timepoint = str_extract(sample_group, "3wk|6wk|9wk|14wk|17wk|20wk")) |>
filter(dox_status == "withdrawal") |>
mutate(dox_group = case_when(
str_detect(timepoint, "14wk|17wk|20wk") ~ "2m",
T ~ "3w"),
wd_group = case_when(
timepoint %in% c("6wk", "17wk") ~ "3wk",
timepoint %in% c("9wk", "20wk") ~ "6wk"
)) |>
ggplot() +
geom_density(aes(x = diff_from_t21, color = wd_group, linetype = dox_group), alpha=.7) +
geom_vline(xintercept = 0, linetype = "solid", color = "black", size = .3) +
coord_cartesian(xlim = c(-1, 1)) +
scale_color_manual(values=pal_simpsons()(5)[c(2,5)]) +
scale_linetype_manual(limits=c("3w", "2m"), values=c("solid", "dashed")) +
labs(y = "Density", x = "VST Diff", title = "HSA21 Mean VST diff from T21") +
theme(legend.position = c(.8, .57))
ggsave(sprintf("%s/hsa21_withdrawal_vst_diff.pdf", fig_output_dir), width = 3.25, height = 2)
# only looking at allelic escapees
potential_escapees <- c("ENSG00000142166.13", "ENSG00000142192.21", "ENSG00000154734.16", "ENSG00000157557.13", "ENSG00000159110.21", "ENSG00000182362.14", "ENSG00000243927.6", "ENSG00000249624.10")
ggplot(
chr21_changes_df |>
mutate(comparison = factor(case_when(
comparison == "D21_vs_T21" ~ "D21",
comparison == "dT21XIST_vs_T21" ~ "+dox",
comparison == "wdT21XIST_vs_T21" ~ "w/d",
T ~ comparison), levels = c("D21", "+dox", "w/d"))),
aes(x = fct_reorder(ensembl_id, start_position), y = log2FoldChange, color = color_label, size = baseMean)
) +
facet_grid(rows = vars(comparison)) +
geom_point(data = chr21_changes_df |>
mutate(comparison = factor(case_when(
comparison == "D21_vs_T21" ~ "D21",
comparison == "dT21XIST_vs_T21" ~ "+dox",
comparison == "wdT21XIST_vs_T21" ~ "w/d",
T ~ comparison), levels = c("D21", "+dox", "w/d"))), color="white") +
geom_point(data = chr21_changes_df |>
mutate(comparison = factor(case_when(
comparison == "D21_vs_T21" ~ "D21",
comparison == "dT21XIST_vs_T21" ~ "+dox",
comparison == "wdT21XIST_vs_T21" ~ "w/d",
T ~ comparison), levels = c("D21", "+dox", "w/d"))) |>
filter(ensembl_id %in% potential_escapees), alpha=.5) +
scale_color_manual(
values = c(pal_d3()(3)[c(1, 3, 2)], pal_uchicago()(2)[2]), limits = c("padj≤.1 & FC=(2/3)±.1", "padj≤.1 & FC=(2/3)±.15", "padj≤.1", "padj>.1"),
guide="none"
) +
scale_size_area(limits = c(NA, 10000), oob = scales::squish, guide = "none", max_size = 4) +
labs(
title = "chr21 Dosage Correction - Allelic Escapees",
color = "Gene Category",
size = "Base Mean Expression",
x = "chr21 Genes (ordered by position)",
y = "log2(FC)"
) +
ggrepel::geom_text_repel(data = chr21_changes_df |>
mutate(comparison = factor(case_when(
comparison == "D21_vs_T21" ~ "D21",
comparison == "dT21XIST_vs_T21" ~ "+dox",
comparison == "wdT21XIST_vs_T21" ~ "w/d",
T ~ comparison), levels = c("D21", "+dox", "w/d")))|>
filter(ensembl_id %in% potential_escapees),
mapping=aes(label=hgnc_symbol), size=2,
min.segment.length=0) +
geom_hline(yintercept = log2(2 / 3), linetype = "dashed", size = .3) +
geom_hline(yintercept = 0, linetype = "solid") +
geom_vline(xintercept = "ENSG00000157540.22", linetype = "dotdash", size = .3) +
# guides(color=guide_legend(ncol=2, label.position = "right", label.hjust = .1)) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "top",
# legend.spacing.x = unit(3, "mm"),
# legend.margin = margin(3,3,3,3, "mm"),
legend.box.background = element_rect(color = "black", size=1)
) +
coord_cartesian(ylim = c(-1.5, 1.5))
ggsave(sprintf("%s/hsa21_dosage_correction_allelic_escapees.pdf", fig_output_dir), width = 3.5, height = 2.28)
# ggsave(sprintf("%s/hsa21_dosage_correction_allelic_escapees.png", fig_output_dir), width = 3.25, height = 1.97, dpi=300)
chr21_changes_comparison_cols <- purrr::cross(list(
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"),
chr21_changes_comparisons
)) |> map(lift(paste0)) |> unlist()
combined_results_df |>
# left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |>
filter(chromosome_name %in% c(
"1", "2", "3", "4", "5", "6", "7",
"8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19",
"20", "21", "22", "X", "Y"
)) |>
pivot_longer(
# cols = c(ends_with("nonrtta_d21_vs_t21"), ends_with("rtta_wd_vs_nd"), ends_with("rtta_wd_vs_nd_powered"), ends_with("_rtta_d_vs_nd"), ends_with("dox_t21xist_vs_1985")),
cols = c(starts_with(c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"))),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
mutate(category = factor(case_when(
padj < .1 & log2FoldChange > 0 ~ "gain",
padj < .1 & log2FoldChange < 0 ~ "loss",
T ~ "nochange"
), levels = c("gain", "nochange", "loss"))) |>
dplyr::count(chromosome_name, category, comparison) |>
filter(comparison %in% c("dwdT21SIT_vs_T21", "T21_vs_D21")) |>
mutate(comparison = case_when(comparison == "dwdT21SIT_vs_T21" ~ "T21XIST/T21",
comparison == "T21_vs_D21" ~ "T21/D21"),
comparison= factor(comparison, levels = c("T21/D21", "T21XIST/T21"))) |>
ggplot() +
geom_bar(aes(
x = factor(
chromosome_name,
levels = c(
"1", "2", "3", "4", "5", "6", "7",
"8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19",
"20", "21", "22", "X", "Y"
)
),
y = n,
fill = category
),
stat = "identity",
position = position_fill()
) +
# facet_grid(rows = vars(factor(comparison, levels = c("nonrtta_d21_vs_t21", "rtta_wd_vs_nd", "rtta_wd_vs_nd_powered", "rtta_d_vs_nd", "dox_t21xist_vs_1985")))) +
facet_grid(rows = vars(comparison)) +
geom_hline(yintercept = .5, linetype = "dashed", size = .3) +
scale_fill_uchicago(
limits = c("gain", "nochange", "loss"),
labels = c("Inc", "~0", "Dec")
) +
labs(
title = "Global Changes in Gene Expression - iPSCs",
fill = "Direction",
x = "Chromosome",
y = "% Genes"
) +
theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1, size=8),
plot.margin = margin(t=.5,0,0,0, "mm"),
legend.margin = margin(l=-3, unit = "mm"),
axis.title.x = element_blank())
ggsave(sprintf("%s/global_changes_in_gene_expression.pdf", fig_output_dir), width = 4, height = 2.3)
combined_results_df |>
# left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |>
filter(chromosome_name %in% c(
"1", "2", "3", "4", "5", "6", "7",
"8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19",
"20", "21", "22", "X", "Y"
)) |>
pivot_longer(
# cols = c(ends_with("nonrtta_d21_vs_t21"), ends_with("rtta_wd_vs_nd"), ends_with("rtta_wd_vs_nd_powered"), ends_with("_rtta_d_vs_nd"), ends_with("dox_t21xist_vs_1985")),
cols = c(starts_with(c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"))),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
mutate(category = factor(case_when(
padj < .1 & log2FoldChange > 0 ~ "gain",
padj < .1 & log2FoldChange < 0 ~ "loss",
T ~ "nochange"
), levels = c("gain", "nochange", "loss"))) |>
dplyr::count(chromosome_name, category, comparison) |>
filter(comparison %in% c("dT21XIST_vs_T21")) |>
mutate(comparison = case_when(comparison == "dT21XIST_vs_T21" ~ "+dox",
comparison == "wdT21XIST_vs_T21" ~ "w/d")) |>
ggplot() +
geom_bar(aes(
x = factor(
chromosome_name,
levels = c(
"1", "2", "3", "4", "5", "6", "7",
"8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19",
"20", "21", "22", "X", "Y"
)
),
y = n,
fill = category
),
stat = "identity",
position = position_fill()
) +
# facet_grid(rows = vars(factor(comparison, levels = c("nonrtta_d21_vs_t21", "rtta_wd_vs_nd", "rtta_wd_vs_nd_powered", "rtta_d_vs_nd", "dox_t21xist_vs_1985")))) +
# facet_grid(rows = vars(comparison)) +
geom_hline(yintercept = .5, linetype = "dashed", size = .3) +
scale_fill_uchicago(
limits = c("gain", "nochange", "loss"),
labels = c("Inc", "~0", "Dec")
) +
labs(
title = "Global Changes in Gene Expression",
fill = "Direction",
x = "Chromosome",
y = "% Genes"
) +
theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1, size=8),
plot.margin = margin(t=.5,0,0,0, "mm"),
legend.margin = margin(l=-3, unit = "mm"),
axis.title.x = element_blank())
ggsave(sprintf("%s/global_changes_in_gene_expression.pdf", fig_output_dir), width = 4, height = 1.3)
## Diff. Expression Overlaps
combined_results_df |>
# left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |>
dplyr::filter(chromosome_name == "21") |>
pivot_longer(
cols = c(ends_with(purrr::cross(list(
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"),
c("neural_t21xistdoxcombined_t21",
"neural_t21xistdox_t21",
"neural_t21xistwd_t21")
)) |> map(lift(paste0)) |> unlist())),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
mutate(comparison = case_when(comparison == "neural_t21xistdox_t21" ~ "+dox",
comparison == "neural_t21xistwd_t21" ~ "w/d",
comparison == "neural_t21xistdoxcombined_t21" ~ "combined",
T ~ comparison)) |>
filter(baseMean >= 1000) |>
ggplot() +
geom_vline(xintercept = log2(.9 * (2 / 3)), color = pal_d3()(3)[1], linetype="dotted", size=.3) +
geom_vline(xintercept = log2(1.1 * (2 / 3)), color = pal_d3()(3)[1], linetype="dotted", size=.3) +
geom_vline(xintercept = log2(.85 * (2 / 3)), color = pal_d3()(3)[3], linetype="dotted", size=.3) +
geom_vline(xintercept = log2(1.15 * (2 / 3)), color = pal_d3()(3)[3], linetype="dotted", size=.3) +
annotate("rect", xmin = log2(.9 * (2 / 3)), xmax = log2(1.1 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[1], alpha = .3) +
annotate("rect", xmin = log2(.85 * (2 / 3)), xmax = log2(.9 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[3], alpha = .3) +
annotate("rect", xmin = log2(1.1 * (2 / 3)), xmax = log2(1.15 * (2 / 3)), ymin = -Inf, ymax = Inf, fill = pal_d3()(3)[3], alpha = .3) +
geom_density(aes(x = log2FoldChange, color = comparison)) +
scale_color_manual(limits=c("+dox", "combined", "w/d"), values=pal_d3()(5)[c(4,2,5)]) +
geom_vline(xintercept = log2(2 / 3), linetype = "dashed", color = "black", size = .3) +
geom_vline(xintercept = 0, linetype = "solid", color = "black", size = .3) +
annotate("text", x = -1, y = .75, label = "log2(2/3)", color = "black", size = 2) +
coord_cartesian(xlim = c(-1.5, 1.5)) +
labs(y = "Density", x = "log2(FC)", title = "Neural chr21 Log2(FC) (baseMean >= 1000)") +
theme(legend.position = c(.8, .7))
ggsave(sprintf("%s/chr21_dist_neural_1000.pdf", fig_output_dir), width = 3.5, height = 1.5)
combined_results_df |>
# left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |>
dplyr::filter(chromosome_name == "21") |>
pivot_longer(
cols = c(ends_with(purrr::cross(list(
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"),
c("neural_d21_t21xistdoxcombined",
"neural_d21_t21xistdox",
"neural_d21_t21xistwd")
)) |> map(lift(paste0)) |> unlist())),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
mutate(comparison = case_when(comparison == "neural_d21_t21xistdox" ~ "+dox",
comparison == "neural_d21_t21xistwd" ~ "w/d",
comparison == "neural_d21_t21xistdoxcombined" ~ "combined",
T ~ comparison)) |>
filter(baseMean >= 1000) |>
ggplot() +
geom_density(aes(x = -1 * log2FoldChange, color = comparison)) +
scale_color_manual(limits=c("+dox", "combined", "w/d"), values=pal_d3()(5)[c(4,2,5)]) +
geom_vline(xintercept = 0, linetype = "solid", color = "black", size = .3) +
coord_cartesian(xlim = c(-1.5, 1.5)) +
labs(y = "Density", x = "log2(FC)", title = "chr21 Log2(FC) from D21\nNeural (baseMean >= 1000)") +
theme(legend.position = c(.8, .7))
ggsave(sprintf("%s/chr21_dist_neural_d21_1000.pdf", fig_output_dir), width = 3.5, height = 2)
# batch_corrected_vsd_df_neural <- read_csv(paste0(neural_output_dir, "/tables/batch_corrected_vsd_salmon_df.csv.gz"))
batch_corrected_vsd_df_neural <- read_csv(paste0(neural_output_dir, "/tables/vsd_salmon_df.csv.gz"))
sample_sheet_df_neural <- read_csv(paste0(neural_output_dir, "/tables/sample_sheet_df.csv.gz"))
write_csv(sample_sheet_df |>
dplyr::select(sample_id,
timepoint,
dox_status,
genotype,
cell_line
), paste0(table_output_dir, "/bulk_ipsc_rnaseq_samples.csv"))
write_csv(sample_sheet_df_neural |>
dplyr::select(sample_id,
cell_line,
neural_diff,
genotype
), paste0(table_output_dir, "/bulk_neural_rnaseq_samples.csv"))
chr21_rownorm_vsd <- batch_corrected_vsd_df_neural |>
pivot_longer(cols = -ensembl_id, names_to = "sample_id", values_to = "vst_count") |>
left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version")) |>
left_join(sample_sheet_df_neural, by = c("sample_id")) |>
filter(chromosome_name == "21") |>
group_by(ensembl_id) |>
mutate(
row_norm_count = vst_count - mean(cur_data() %>% filter(cell_line == "198_5") %>% pull(vst_count)),
mean_1985_count = mean(cur_data() %>% filter(cell_line == "198_5") %>% pull(vst_count))
) |>
ungroup()
# complexheatmap for neurons
chr21_rownorm_hm_df <- chr21_rownorm_vsd |>
filter( # mean_1985_count>8 &
!(gene_biotype %in% c("rRNA", "TEC"))) |>
mutate(
gene_biotype = factor(case_when(
gene_biotype == "protein_coding" ~ "Protein\nCoding",
gene_biotype == "processed_pseudogene" ~ "Pseudo-\ngene",
gene_biotype == "transcribed_processed_pseudogene" ~ "Pseudo-\ngene",
T ~ gene_biotype),
levels = c("Protein\nCoding", "lncRNA", "Pseudo-\ngene")))
neural_rownorm_mat <- chr21_rownorm_hm_df |>
pivot_wider(id_cols = ensembl_id,
names_from = sample_id,
values_from = row_norm_count) |>
column_to_rownames(var="ensembl_id")
t21count_col_fun <- circlize::colorRamp2(c(8,16), c("#ffffff", "#7105E0"))
gene_annotation <- rowAnnotation(
`Mean T21 Count` =
anno_simple(
x=chr21_rownorm_hm_df |>
dplyr::select(ensembl_id, mean_1985_count) |>
dplyr::distinct() |>
deframe(),
col = t21count_col_fun,
simple_anno_size = unit(3, "mm")),
annotation_name_gp= gpar(fontsize = 7))
lgd_t21count = Legend(title = "Mean T21 Count", col_fun = t21count_col_fun, at = c(8,10,12,14,16),
labels = c(8,10,12,14,16),
direction = "horizontal",
labels_gp=gpar(fontsize=7),
title_gp=gpar(fontsize=7),
grid_height = unit(2, "mm"))
sample_annotation <- columnAnnotation(
`Cell Line` = chr21_rownorm_hm_df |>
dplyr::select(sample_id, `cell_line`) |>
dplyr::distinct() |>
mutate(`cell_line` = case_when(
cell_line == "198_1" ~ "198-1",
cell_line == "198_2" ~ "198-2",
cell_line == "198_5" ~ "198-5",
cell_line == "C1_8" ~ "T21XIST c1",
cell_line == "C4_8" ~ "T21XIST c4",
cell_line == "C7_9" ~ "T21XIST c7")) |>
column_to_rownames(var="sample_id") |>
deframe(),
`Diff Round` = chr21_rownorm_hm_df |>
dplyr::select(sample_id, `neural_diff`) |>
dplyr::distinct() |>
column_to_rownames(var="sample_id") |>
deframe() |>
factor(levels=c("ND6", "ND7", "ND9", "ND11", "ND12", "ND18", "ND20")),
simple_anno_size = unit(3, "mm"),
annotation_legend_param = list(labels_gp=gpar(fontsize=7), title_gp=gpar(fontsize=7), grid_height = unit(2, "mm"), grid_width = unit(2, "mm")),
col = list(`Cell Line` = c("198-1"="#1697a6", "198-2"="#0e606b", "198-5"="#ffc24b",
"T21XIST c1"="#fff4f1", "T21XIST c4"="#ffb3ae", "T21XIST c7"="#f47068"),
`Diff Round` = c("ND6"="#ffadad", "ND7"="#ffd6a5", "ND9"="#fdffb6",
"ND11"="#caffbf", "ND12"="#9bf6ff", "ND18"="#a0c4ff",
"ND20"="#bdb2ff")),
annotation_name_gp= gpar(fontsize = 7)
)
neural_hm <- Heatmap(neural_rownorm_mat,
col=circlize::colorRamp2(c(-1, 0, 1), c("#3784AF", "#f7f7f7", "#D34B16")),
# col=circlize::colorRamp2(c(-1, 0, 1), c("#0571B0", "#f7f7f7", "#CA0020")),
name="Row Norm VST Count",
border=T,
cluster_rows = F,
row_order = chr21_rownorm_hm_df |> dplyr::select(ensembl_id, mean_1985_count) |> deframe() |> sort() |> names() |> unique() |> rev(),
row_split = chr21_rownorm_hm_df |>
dplyr::select(ensembl_id, gene_biotype) |>
distinct() |>
deframe(),
row_title_side = "right",
row_gap = unit(1, "mm"),
show_row_names = F,
row_title_gp = gpar(fontsize = 7),
row_title_rot = 45,
column_title_gp = gpar(fontsize = 7),
column_names_gp = gpar(fontsize = 7),
left_annotation = gene_annotation,
show_column_dend=F,
column_split = chr21_rownorm_hm_df |>
mutate(split_label = case_when(
genotype == "T21XIST" & !is.na(description.y) & description.y != "wd" ~ "+dox",
genotype == "T21XIST" & dox_status == "no" ~ "-dox",
genotype == "T21XIST" & description.y == "wd" ~ "w/d",
T ~ genotype)) |>
dplyr::select(sample_id, split_label) |>
distinct() |>
deframe() |>
factor(levels=c("T21", "-dox", "+dox", "w/d", "D21")),
cluster_column_slices=F,
column_gap = unit(1, "mm"),
show_column_names = F,
bottom_annotation = sample_annotation,
heatmap_legend_param = list(direction = "horizontal",
labels_gp=gpar(fontsize=7),
title_gp=gpar(fontsize=7),
grid_height = unit(2, "mm")))
pdf(sprintf("%s/expression_relative_to_1985_neural.pdf", fig_output_dir), width = 3.5, height = 4.7)
draw(neural_hm, annotation_legend_list = list(lgd_t21count),
merge_legend=T, heatmap_legend_side = "bottom", annotation_legend_side = "bottom")
dev.off()
chr21_changes_comparison_cols <- purrr::cross(list(
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"),
c("neural_t21_d21", "neural_t21xistwd_t21", "neural_t21xistdox_t21")
)) |> map(lift(paste0)) |> unlist()
chr21_changes_df <- combined_results_df |>
dplyr::filter(chromosome_name == "21") |>
pivot_longer(
cols = c(ends_with(chr21_changes_comparison_cols)),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
dplyr::mutate(
# log2FoldChange = log2FoldChange_dox_t21xist_vs_1985,
# padj = padj_dox_t21xist_vs_1985,
# baseMean = baseMean_dox_t21xist_vs_1985,
color_label = case_when(
padj <= .1 & dplyr::between(log2FoldChange, log2((3 / 2) - .1), log2((3 / 2) + .1)) ~ "padj≤.1 & FC=(3/2)±.1",
padj <= .1 & dplyr::between(log2FoldChange, log2((3 / 2) - .15), log2((3 / 2)+.15)) ~ "padj≤.1 & FC=(3/2)±.15",
padj <= .1 ~ "padj≤.1",
T ~ "padj>.1"
)
)
ggplot(
chr21_changes_df |>
mutate(comparison = case_when(comparison == "dT21XIST_vs_T21" ~ "+dox",
comparison == "wdT21XIST_vs_T21" ~ "w/d",
T ~ comparison)),
aes(x = fct_reorder(ensembl_id, start_position), y = log2FoldChange, color = color_label, size = baseMean)
) +
facet_grid(rows = vars(comparison)) +
scale_color_manual(
values = c(pal_d3()(3)[c(1, 3, 2)], pal_uchicago()(2)[2]), limits = c("padj≤.1 & FC=(3/2)±.1", "padj≤.1 & FC=(3/2)±.15", "padj≤.1", "padj>.1")
) +
scale_size_area(limits = c(NA, 10000), oob = scales::squish, guide = "none", max_size = 4) +
labs(
title = "Neural HSA21 Excess Dosage",
color = "Gene Category",
size = "Base Mean Expression",
x = "HSA21 Genes",
y = "log2(FC)"
) +
geom_hline(yintercept = log2(3 / 2), linetype = "dashed", size = .3) +
geom_hline(yintercept = log2(2 / 3), linetype = "dashed", size = .3) +
geom_hline(yintercept = 0, linetype = "solid") +
geom_vline(xintercept = "ENSG00000157540.22", linetype = "dotdash", size = .3) +
geom_point(alpha = .5) +
guides(color=guide_legend(ncol=2, label.position = "right", label.hjust = .1)) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "top",
# legend.spacing.x = unit(3, "mm"),
# legend.margin = margin(3,3,3,3, "mm"),
legend.box.background = element_rect(color = "black", size=.5)
) +
coord_cartesian(ylim = c(-1.5, 1.5))
ggsave(sprintf("%s/hsa21_dosage_correction_neural.pdf", fig_output_dir), width = 3.25, height = 2)
chr21_changes_df |>
# mutate(comparison = case_when(comparison == "dT21XIST_vs_T21" ~ "+dox",
# comparison == "wdT21XIST_vs_T21" ~ "w/d",
# T ~ comparison)) |>
ggplot(aes(x=log2FoldChange, color=comparison)) +
# facet_grid(rows=vars(comparison)) +
geom_density() +
coord_cartesian(xlim=c(-2, 2)) +
geom_vline(xintercept = 0) +
geom_vline(xintercept = log2(3/2), linetype="dashed") +
geom_vline(xintercept = log2(2/3), linetype="dashed") +
labs(title="Log2(FC) Neural T21/D21",
x="log2(FC)")
ggsave(sprintf("%s/log2fc_dist_neural.pdf", fig_output_dir), width = 3, height = 1.7)
vectorized_sumlog <- function(x, y) {
if (length(x) != length(y)) {
stop("lengths of lists must be the same")
}
combined_pvals <- c()
for (i in 1:length(x)) {
combined_pvals <- c(combined_pvals, (metap::sumlog(c(x[i], y[i])))$p)
}
return(combined_pvals)
}
compared_to_log2FC <- sprintf("log2FoldChange_%s", diff_expr_compare_to)
compared_to_padj <- sprintf("padj_%s", diff_expr_compare_to)
# compared_from_suffixes <- sprintf("%s_%s", c("baseMean", "log2FoldChange", "pvalue", "padj", "stat"), diff_expr_comparisons)
diff_expr_comparisons <- c("dox_effect", "dwdT21SIT_vs_T21")
compared_from_suffixes <- purrr::cross(list(
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"),
diff_expr_comparisons
)) |> map(lift(paste0)) |> unlist()
combined_results_df |>
filter(hgnc_symbol != "XIST") |>
pivot_longer(
cols = c(ends_with(compared_from_suffixes)),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
dplyr::mutate(comparison = case_when(comparison == "dox_effect" ~ "Dox Effect",
comparison == "dwdT21SIT_vs_T21" ~ "XIST Correction")) |>
dplyr::mutate(combined_pval := vectorized_sumlog(!!as.name(compared_to_padj), padj)) %>%
# filter(combined_pval <= diff_expr_combined_padj_cutoff) |>
ggplot(aes(x = log2FoldChange, y = !!as.name(compared_to_log2FC))) +
facet_grid(cols = vars(comparison)) +
scale_color_viridis_c(limits = c(1, 5), oob = scales::squish) +
geom_hline(yintercept = 0, size = .3) +
geom_vline(xintercept = 0, size = .3) +
geom_point(data = . %>% filter(combined_pval > .1),color="lightgrey", size = 1.5, alpha = .5) +
geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) +
# geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) +
# geom_smooth(method = "lm", color = "black", size = .3, linetype = "dashed") +
geom_smooth(data = . %>% filter(combined_pval <= .1), method = "lm", color = "black", size = .3, linetype = "dashed") +
coord_cartesian(xlim = c(-5, 5), ylim = c(-5, 5)) +
# ggpubr::stat_cor(method = "spearman", label.x = -4, label.y = 4, size = 3) +
ggpubr::stat_cor(data = . %>% filter(combined_pval <= .1), method = "spearman", label.x = -4, label.y = 4, size = 3) +
labs(
y = "log2FC(T21 vs. D21)",
x = "log2FC",
title = "Correlation with T21-driven Gene Expression",
color = "-log10\n(c.padj)"
) +
theme(
legend.position = "top",
legend.margin = margin(l = -5, b=-3, unit = "mm"),
legend.title = element_text(margin = margin(r = 2, unit = "mm")),
legend.key.height = unit(3, "mm"),
legend.key.width = unit(3.5, "mm")
)
# ggsave(sprintf("%s/rescue_of_t21_dysregulated_gene_expression.pdf", fig_output_dir), width=3, height = 4.125)
ggsave(sprintf("%s/rescue_of_t21_ipsc.pdf", fig_output_dir), width=4.1, height = 3.3)
ggsave(sprintf("%s/rescue_of_t21_ipsc_sigcoor.pdf", fig_output_dir), width=4.1, height = 3.3)
# ggsave(sprintf("%s/rescue_of_t21_dysregulated_gene_expression_horizontal.pdf", fig_output_dir), width=4, height = 4.125-.5)
# ggsave(sprintf("%s/rescue_of_t21_dysregulated_gene_expression_horizontal_sigcoor.pdf", fig_output_dir), width=4, height = 4.125-.5)
# ggsave(sprintf("%s/rescue_of_t21_dysregulated_gene_expression_vertical.pdf", fig_output_dir), width=3, height = 4.125)
# ggsave(sprintf("%s/rescue_of_t21_dysregulated_gene_expression_vertical_sigcoor.pdf", fig_output_dir), width=3, height = 4.125)
compared_to_log2FC <- sprintf("log2FoldChange_%s", diff_expr_compare_to)
compared_to_padj <- sprintf("padj_%s", diff_expr_compare_to)
# compared_from_suffixes <- sprintf("%s_%s", c("baseMean", "log2FoldChange", "pvalue", "padj", "stat"), diff_expr_comparisons)
diff_expr_comparisons <- c("dox_effect", "dwdT21SIT_vs_T21", "dwdT21SIT_vs_D21")
compared_from_suffixes <- purrr::cross(list(
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"),
diff_expr_comparisons
)) |> map(lift(paste0)) |> unlist()
combined_results_df |>
filter(hgnc_symbol != "XIST" & chromosome_name != "21") |>
pivot_longer(
cols = c(ends_with(compared_from_suffixes)),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
dplyr::mutate(comparison = factor(case_when(comparison == "dox_effect" ~ "Dox Effect",
comparison == "dwdT21SIT_vs_T21" ~ "T21XIST/T21",
comparison == "dwdT21SIT_vs_D21" ~ "T21XIST/D21"),
levels = c("Dox Effect", "T21XIST/T21","T21XIST/D21"))) |>
dplyr::mutate(combined_pval := vectorized_sumlog(!!as.name(compared_to_padj), padj)) %>%
# filter(combined_pval <= diff_expr_combined_padj_cutoff) |>
ggplot(aes(x = log2FoldChange, y = !!as.name(compared_to_log2FC))) +
facet_grid(cols = vars(comparison)) +
scale_color_viridis_c(limits = c(1, 5), oob = scales::squish) +
geom_hline(yintercept = 0, size = .3) +
geom_vline(xintercept = 0, size = .3) +
geom_point(data = . %>% filter(combined_pval > .1),color="lightgrey", size = 1.5, alpha = .5) +
geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) +
# geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) +
geom_smooth(data = . %>% filter(combined_pval <= .1 & hgnc_symbol != "XIST" & chromosome_name != "21"), method = "lm", color = "black", size = .3, linetype = "dashed") +
# geom_smooth(method = "lm", color = "black", size = .3, linetype = "dashed") +
coord_cartesian(xlim = c(-5, 5), ylim = c(-5, 5)) +
ggpubr::stat_cor(data = . %>% filter(combined_pval <= .1 & hgnc_symbol != "XIST" & chromosome_name != "21"), method = "spearman", label.x = -4, label.y = 4, size = 3) +
# ggpubr::stat_cor(method = "spearman", label.x = -4, label.y = 4, size = 3) +
labs(
y = "log2FC(T21 vs. D21)",
x = "log2FC",
title = "Correlation with T21-driven Gene Expression\nexcluding Chr21",
color = "-log10\n(c.padj)"
) +
theme(
legend.position = "top",
legend.margin = margin(l = -5, b=-3, unit = "mm"),
legend.title = element_text(margin = margin(r = 2, unit = "mm")),
legend.key.height = unit(3, "mm"),
legend.key.width = unit(3.5, "mm")
)
ggsave(sprintf("%s/rescue_of_t21_ipsc_nochr21.pdf", fig_output_dir), width=3.55, height = 3.3)
ggsave(sprintf("%s/rescue_of_t21_ipsc_nochr21_witht21xistd21.pdf", fig_output_dir), width=3.55, height = 3.3)
ggsave(sprintf("%s/rescue_of_t21_ipsc_nochr21_witht21xistd21_sigcoor.pdf", fig_output_dir), width=4.5, height = 2.7)
compared_to_log2FC <- sprintf("log2FoldChange_%s", diff_expr_compare_to)
compared_to_padj <- sprintf("padj_%s", diff_expr_compare_to)
# compared_from_suffixes <- sprintf("%s_%s", c("baseMean", "log2FoldChange", "pvalue", "padj", "stat"), diff_expr_comparisons)
diff_expr_comparisons <- c("neural_T21_D21")
compared_from_suffixes <- purrr::cross(list(
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"),
diff_expr_comparisons
)) |> map(lift(paste0)) |> unlist()
combined_results_df |>
filter(hgnc_symbol != "XIST") |>
pivot_longer(
cols = c(ends_with(compared_from_suffixes)),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
dplyr::mutate(comparison = case_when(comparison == "neural_T21_D21" ~ "Neural T21/D21")) |>
dplyr::mutate(combined_pval := vectorized_sumlog(!!as.name(compared_to_padj), padj)) %>%
# filter(combined_pval <= diff_expr_combined_padj_cutoff) |>
ggplot(aes(x = log2FoldChange, y = !!as.name(compared_to_log2FC))) +
# facet_grid(cols = vars(comparison)) +
scale_color_viridis_c(limits = c(1, 5), oob = scales::squish) +
geom_hline(yintercept = 0, size = .3) +
geom_vline(xintercept = 0, size = .3) +
geom_point(data = . %>% filter(combined_pval > .1),color="lightgrey", size = 1.5, alpha = .5) +
geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) +
# geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) +
# geom_smooth( method = "lm", color = "black", size = .3, linetype = "dashed") +
geom_smooth(data = . %>% filter(combined_pval <= .1), method = "lm", color = "black", size = .3, linetype = "dashed") +
coord_cartesian(xlim = c(-5, 5), ylim = c(-5, 5)) +
# ggpubr::stat_cor( method = "spearman", label.x = -4, label.y = 4, size = 3) +
ggpubr::stat_cor(data = . %>% filter(combined_pval <= .1), method = "spearman", label.x = -4, label.y = 4, size = 3) +
labs(
y = "log2FC(iPSC T21 vs. D21)",
x = "log2FC(Neural T21 vs. D21)",
title = "iPSC and Neural T21 Overlap",
color = "-log10\n(c.padj)"
) +
theme(
legend.position = "top",
legend.margin = margin(l = -5, b=-3, unit = "mm"),
legend.title = element_text(margin = margin(r = 2, unit = "mm")),
legend.key.height = unit(3, "mm"),
legend.key.width = unit(3.5, "mm")
)
ggsave(sprintf("%s/neural_ipsc_gene_overlap.pdf", fig_output_dir), width=3, height = 3.1)
ggsave(sprintf("%s/neural_ipsc_gene_overlap_sigcoor.pdf", fig_output_dir), width=3, height = 3.1)
compared_to_log2FC <- sprintf("log2FoldChange_%s", "neural_t21_d21")
compared_to_padj <- sprintf("padj_%s", "neural_t21_d21")
# compared_from_suffixes <- sprintf("%s_%s", c("baseMean", "log2FoldChange", "pvalue", "padj", "stat"), diff_expr_comparisons)
diff_expr_comparisons <- c("neural_t21xistdoxcombined_t21")
compared_from_suffixes <- purrr::cross(list(
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"),
diff_expr_comparisons
)) |> map(lift(paste0)) |> unlist()
combined_results_df |>
filter(hgnc_symbol != "XIST") |>
pivot_longer(
cols = c(ends_with(compared_from_suffixes)),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
dplyr::mutate(comparison = case_when(comparison == "neural_t21xistdox_t21" ~ "+Dox",
comparison == "neural_t21xistwd_t21" ~ "w/d")) |>
dplyr::mutate(combined_pval := vectorized_sumlog(!!as.name(compared_to_padj), padj)) %>%
# filter(combined_pval <= diff_expr_combined_padj_cutoff) |>
ggplot(aes(x = log2FoldChange, y = !!as.name(compared_to_log2FC))) +
# facet_grid(cols = vars(comparison)) +
scale_color_viridis_c(limits = c(1, 5), oob = scales::squish) +
geom_hline(yintercept = 0, size = .3) +
geom_vline(xintercept = 0, size = .3) +
geom_point(data = . %>% filter(combined_pval > .1),color="lightgrey", size = 1.5, alpha = .5) +
geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) +
# geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) +
# geom_smooth(method = "lm", color = "black", size = .3, linetype = "dashed") +
geom_smooth(data = . %>% filter(combined_pval <= .1), method = "lm", color = "black", size = .3, linetype = "dashed") +
coord_cartesian(xlim = c(-10, 10), ylim = c(-10, 10)) +
# ggpubr::stat_cor(method = "spearman", label.x = -4, label.y = 4, size = 3) +
ggpubr::stat_cor(data = . %>% filter(combined_pval <= .1), method = "spearman", label.x = -9, label.y = 6, size = 3) +
labs(
y = "log2FC(Neural T21 vs. D21)",
x = "log2FC (Neural T21XIST vs T21)",
title = "Correlation with T21-driven Expression - Neural",
color = "-log10\n(c.padj)"
) +
theme(
legend.position = "top",
legend.margin = margin(l = -5, b=-3, unit = "mm"),
legend.title = element_text(margin = margin(r = 2, unit = "mm")),
legend.key.height = unit(3, "mm"),
legend.key.width = unit(3.5, "mm")
)
# ggsave(sprintf("%s/rescue_of_t21_neural.pdf", fig_output_dir), width=3, height = 3.3)
ggsave(sprintf("%s/rescue_of_t21_neural_sigcoor.pdf", fig_output_dir), width=3.5, height = 2.4)
combined_results_df |>
filter(hgnc_symbol != "XIST") |>
pivot_longer(
cols = c(ends_with(compared_from_suffixes)),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
dplyr::mutate(comparison = case_when(comparison == "dox_effect" ~ "Dox Effect",
comparison == "dwdT21SIT_vs_T21" ~ "XIST Correction")) |>
dplyr::mutate(combined_pval := vectorized_sumlog(!!as.name(compared_to_padj), padj)) %>%
# filter(combined_pval <= diff_expr_combined_padj_cutoff) |>
ggplot(aes(x = log2FoldChange, y = !!as.name(compared_to_log2FC))) +
facet_grid(cols = vars(comparison)) +
scale_color_viridis_c(limits = c(1, 5), oob = scales::squish) +
geom_hline(yintercept = 0, size = .3) +
geom_vline(xintercept = 0, size = .3) +
geom_point(data = . %>% filter(combined_pval > .1),color="lightgrey", size = 1.5, alpha = .5) +
geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) +
# geom_point(data = . %>% filter(combined_pval <= .1), mapping = aes(color = -log10(combined_pval)), size = 1.5, alpha = .5) +
# geom_smooth(method = "lm", color = "black", size = .3, linetype = "dashed") +
geom_smooth(data = . %>% filter(combined_pval <= .1), method = "lm", color = "black", size = .3, linetype = "dashed") +
coord_cartesian(xlim = c(-5, 5), ylim = c(-5, 5)) +
# ggpubr::stat_cor(method = "spearman", label.x = -4, label.y = 4, size = 3) +
ggpubr::stat_cor(data = . %>% filter(combined_pval <= .1), method = "spearman", label.x = -4, label.y = 4, size = 3) +
labs(
y = "log2FC(T21 vs. D21)",
x = "log2FC",
title = "Rescue of T21 Dysregulation",
color = "-log10\n(c.padj)"
) +
theme(
legend.position = "top",
legend.margin = margin(l = -5, b=-3, unit = "mm"),
legend.title = element_text(margin = margin(r = 2, unit = "mm")),
legend.key.height = unit(3, "mm"),
legend.key.width = unit(3.5, "mm")
)
compared_from_suffixes <- purrr::cross(list(
c("baseMean_", "log2FoldChange_", "pvalue_", "padj_", "stat_"),
c("dox_effect", "dwdT21SIT_vs_T21", "T21_vs_D21")
)) |> map(lift(paste0)) |> unlist()
combined_results_df |>
filter(hgnc_symbol != "XIST") |>
pivot_longer(
cols = c(ends_with(compared_from_suffixes)),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
dplyr::mutate(comparison = case_when(comparison == "dox_effect" ~ "Dox Effect",
comparison == "dwdT21SIT_vs_T21" ~ "XIST Correction",
comparison == "T21_vs_D21" ~ "T21/D21"
)) |>
dplyr::mutate(comparison = factor(comparison, levels = c("T21/D21", "XIST Correction", "Dox Effect"))) %>%
filter(padj <= .1 ) |>
ggplot(aes(x = abs(log2FoldChange), color=comparison)) +
geom_density(aes(y = ..count.., fill=comparison)) +
scale_fill_manual(limits=c("XIST Correction", "Dox Effect"), values = c("lightgrey", "white"), guide="none") +
scale_color_aaas() +
geom_density(aes(y = ..count..)) +
coord_cartesian(xlim=c(0, 1)) +
scale_x_continuous(limits = c(0, 1), oob = scales::squish, expand = c(0,0),
breaks = c(0,.25,.5,.75,1),
labels = c("0.00","0.25", "0.50", "0.75", "???1.00")) +
scale_y_continuous(expand=c(0,0))+
labs(
y = "scaled densities",
x = "log2FC",
title = "Significant Fold-Changes",
color = "Diff. Expr"
) +
theme(
legend.position = "top",
legend.margin = margin(l = -5, b=-3, unit = "mm"),
legend.title = element_blank(),
legend.key.height = unit(3, "mm"),
legend.key.width = unit(3.5, "mm")
)
ggsave(sprintf("%s/sig_fold_changes.pdf", fig_output_dir), width = 2.5, height = 2.8)
## GSEA Overlaps
term_order <- c(
# "chr21",
# "ECM",
# "Folate",
"apoptosis",
"serine",
"stress",
"ER",
"mitochondria",
"ribosome",
"translation"
)
term_colors <- c(
# "#ff6c92",
"#8e000c",
"#da5c29",
"#f5cf87",
"#a90aff",
"#017c2f",
"#00208a",
"#fe85f3"
# "#000000"
)
compared_to_NES <- sprintf("NES_%s", gsea_compare_to)
compared_to_padj <- sprintf("p.adjust_%s", gsea_compare_to)
# compared_from_suffixes <- sprintf("_%s", gsea_comparisons)
gsea_comparisons <- c("dwdT21SIT_vs_T21")
compared_from_suffixes <- purrr::cross(list(
c("NES_", "p.adjust_", "qvalues_", "enrichmentScore_", "setSize_", "leadingEdgeTags_", "leadingEdgeList_", "leadingEdgeSignal_", "numCoreEnrichment_"),
gsea_comparisons
)) |> map(lift(paste0)) |> unlist()
gsea_combined_pval_cutoff <- gsea_combined_padj_cutoff
combined_enrichment_df |>
# filter(hgnc_symbol != "XIST") |>
pivot_longer(
cols = c(ends_with(compared_from_suffixes)),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
mutate(combined_pval = vectorized_sumlog(!!as.name(compared_to_padj), p.adjust)) |>
filter(combined_pval <= gsea_combined_pval_cutoff & category != "chr21") |>
# mutate(comparison = case_when(
# comparison == "wd_nd" ~ "w/d",
# comparison == "wd_nd_powered" ~ "w/d-p",
# comparison == "d_t21xist_t21" ~ "+dox",
# comparison == "dox" ~ "Dox Effect",
# comparison == "neural_t21_d21" ~ "Neural T21/D21"
# )) %>%
ggplot(aes(x = NES, y = !!as.name(compared_to_NES), size = -log10(combined_pval), color = category, ID = ID)) +
# facet_grid(cols = vars(factor(comparison, levels = gsea_comparisons))) +
geom_point(data = . %>% filter(category == "other"), alpha = .5) +
geom_point(data = . %>% filter(category != "other"), alpha = .7) +
labs(
title = "Rescue of Acute Enrichment Terms",
y = "NES (iPSC T21/D21)", #sprintf("NES (%s)", gsea_compare_to),
x = "NES (iPSC T21XIST/T21)",
color = "Category"
) +
scale_color_manual(
limits = c(term_order, "other"),
values = c(term_colors, "lightgrey")
) +
scale_size_area(max_size = 3.5, breaks = c(1, 2, 5, 10), name="-log10(c.padj)") +
geom_vline(xintercept = 0, size = .3) +
geom_hline(yintercept = 0, size = .3) +
theme(legend.box = "horizontal")
ggsave(sprintf("%s/acute_enrrichment_rescue.pdf", fig_output_dir), width = 3, height = 2.3)
ggsave(sprintf("%s/acute_enrrichment_rescue_cpval guide.pdf", fig_output_dir), width = 4, height = 2.3)
compared_to_NES <- sprintf("NES_%s", "neural_t21_d21")
compared_to_padj <- sprintf("p.adjust_%s", "neural_t21_d21")
# compared_from_suffixes <- sprintf("_%s", gsea_comparisons)
gsea_comparisons <- c("neural_t21xistdoxcombined_t21")
compared_from_suffixes <- purrr::cross(list(
c("NES_", "p.adjust_", "qvalues_", "enrichmentScore_", "setSize_", "leadingEdgeTags_", "leadingEdgeList_", "leadingEdgeSignal_", "numCoreEnrichment_"),
gsea_comparisons
)) |> map(lift(paste0)) |> unlist()
gsea_combined_pval_cutoff <- gsea_combined_padj_cutoff
combined_enrichment_df |>
# filter(hgnc_symbol != "XIST") |>
pivot_longer(
cols = c(ends_with(compared_from_suffixes)),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
mutate(combined_pval = vectorized_sumlog(!!as.name(compared_to_padj), p.adjust)) |>
filter(combined_pval <= gsea_combined_pval_cutoff & category != "chr21") |>
# mutate(comparison = case_when(
# comparison == "wd_nd" ~ "w/d",
# comparison == "wd_nd_powered" ~ "w/d-p",
# comparison == "d_t21xist_t21" ~ "+dox",
# comparison == "dox" ~ "Dox Effect",
# comparison == "neural_t21_d21" ~ "Neural T21/D21"
# )) %>%
ggplot(aes(x = NES, y = !!as.name(compared_to_NES), size = -log10(combined_pval), color = category, ID = ID)) +
# facet_grid(cols = vars(factor(comparison, levels = gsea_comparisons))) +
geom_point(data = . %>% filter(category == "other"), alpha = .5) +
geom_point(data = . %>% filter(category != "other"), alpha = .7) +
labs(
title = "Neural Rescue of Acute Enrichment Terms",
y = "NES (Neural T21/D21)", #sprintf("NES (%s)", gsea_compare_to),
x = "NES (Neural XIST/T21)",
color = "Category"
) +
scale_color_manual(
limits = c(term_order, "other"),
values = c(term_colors, "lightgrey")
) +
scale_size_area(max_size = 3.5, breaks = c(1, 2, 5, 10), name="-log10(c.padj)") +
geom_vline(xintercept = 0, size = .3) +
geom_hline(yintercept = 0, size = .3) +
theme(legend.box = "vertical",
legend.box.margin = margin(l=-5),
legend.spacing.y = unit(.5, "mm"))
ggsave(sprintf("%s/acute_enrrichment_rescue_neural_2.pdf", fig_output_dir), width = 3.6, height = 2.5)
compared_to_NES <- sprintf("NES_%s", gsea_compare_to)
compared_to_padj <- sprintf("p.adjust_%s", gsea_compare_to)
compared_from_suffixes <- sprintf("_%s", gsea_comparisons)
gsea_comparisons <- c("neural_t21_d21",
"neural_t21xistwd_t21",
"neural_t21xistdox_t21",
"neural_t21xistdoxcombined_t21")
compared_from_suffixes <- purrr::cross(list(
c("NES_", "p.adjust_", "qvalues_", "enrichmentScore_", "setSize_", "leadingEdgeTags_", "leadingEdgeList_", "leadingEdgeSignal_", "numCoreEnrichment_"),
gsea_comparisons
)) |> map(lift(paste0)) |> unlist()
gsea_combined_pval_cutoff <- .2
compared_to_NES <- sprintf("NES_%s", "T21_vs_D21")
compared_to_padj <- sprintf("p.adjust_%s", "T21_vs_D21")
combined_enrichment_df |>
# filter(hgnc_symbol != "XIST") |>
pivot_longer(
cols = c(ends_with(gsea_comparisons
# purrr::cross(list(
# c("NES_", "p.adjust_", "qvalues_", "enrichmentScore_", "setSize_", "leadingEdgeTags_", "leadingEdgeList_", "leadingEdgeSignal_", "numCoreEnrichment_"),
# gsea_comparisons
# )
# ) |> map(lift(paste0)) |> unlist()
)),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
mutate(combined_pval = vectorized_sumlog(!!as.name(compared_to_padj), p.adjust)) |>
filter(combined_pval <= gsea_combined_pval_cutoff & category != "chr21") |>
# mutate(comparison = case_when(
# comparison == "wd_nd" ~ "w/d",
# comparison == "wd_nd_powered" ~ "w/d-p",
# comparison == "d_t21xist_t21" ~ "+dox",
# comparison == "dox" ~ "Dox Effect",
# comparison == "neural_t21_d21" ~ "Neural T21/D21"
# )) %>%
ggplot(aes(x = NES, y = !!as.name(compared_to_NES), size = -log10(combined_pval), color = category, ID = ID)) +
facet_grid(cols = vars(factor(comparison, levels = gsea_comparisons))) +
geom_point(data = . %>% filter(category == "other"), alpha = .5) +
geom_point(data = . %>% filter(category != "other"), alpha = .7) +
labs(
title = "iPSC and Neural Term Overlap",
y = "NES (iPSC T21/D21)", #sprintf("NES (%s)", gsea_compare_to),
# x = "NES (Neural T21/D21)",
color = "Category"
) +
scale_color_manual(
limits = c(term_order, "other"),
values = c(term_colors, "lightgrey")
) +
scale_size_area(max_size = 3.5, guide = "none") +
geom_vline(xintercept = 0, size = .3) +
geom_hline(yintercept = 0, size = .3)
# ggsave(sprintf("%s/acute_enrrichment_ipsc_neural.pdf", fig_output_dir), width = 3.25, height = 2.5)
ggsave(sprintf("%s/acute_enrrichment_ipsc_neural.pdf", fig_output_dir), width = 8, height = 2.5)
compared_to_NES <- sprintf("NES_%s", "neural_t21_d21")
compared_to_padj <- sprintf("p.adjust_%s", "neural_t21_d21")
combined_enrichment_df |>
# filter(hgnc_symbol != "XIST") |>
pivot_longer(
cols = c(ends_with(c("neural_t21xistwd_t21",
"neural_t21xistdox_t21",
"neural_t21xistdoxcombined_t21")
# purrr::cross(list(
# c("NES_", "p.adjust_", "qvalues_", "enrichmentScore_", "setSize_", "leadingEdgeTags_", "leadingEdgeList_", "leadingEdgeSignal_", "numCoreEnrichment_"),
# gsea_comparisons
# )
# ) |> map(lift(paste0)) |> unlist()
)),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
mutate(combined_pval = vectorized_sumlog(!!as.name(compared_to_padj), p.adjust)) |>
filter(combined_pval <= gsea_combined_pval_cutoff & category != "chr21") |>
# mutate(comparison = case_when(
# comparison == "wd_nd" ~ "w/d",
# comparison == "wd_nd_powered" ~ "w/d-p",
# comparison == "d_t21xist_t21" ~ "+dox",
# comparison == "dox" ~ "Dox Effect",
# comparison == "neural_t21_d21" ~ "Neural T21/D21"
# )) %>%
ggplot(aes(x = NES, y = !!as.name(compared_to_NES), size = -log10(combined_pval), color = category, ID = ID)) +
facet_grid(cols = vars(factor(comparison, levels = gsea_comparisons))) +
geom_point(data = . %>% filter(category == "other"), alpha = .5) +
geom_point(data = . %>% filter(category != "other"), alpha = .7) +
labs(
title = "iPSC and Neural Term Overlap",
y = "NES (iPSC T21/D21)", #sprintf("NES (%s)", gsea_compare_to),
# x = "NES (Neural T21/D21)",
color = "Category"
) +
scale_color_manual(
limits = c(term_order, "other"),
values = c(term_colors, "lightgrey")
) +
scale_size_area(max_size = 3.5, guide = "none") +
geom_vline(xintercept = 0, size = .3) +
geom_hline(yintercept = 0, size = .3)
# ggsave(sprintf("%s/acute_enrrichment_ipsc_neural.pdf", fig_output_dir), width = 3.25, height = 2.5)
ggsave(sprintf("%s/neural_rescue.pdf", fig_output_dir), width = 8, height = 2.5)
gsea_combined_pval_cutoff <- .1
selected_terms <- combined_enrichment_df |>
# filter(hgnc_symbol != "XIST") |>
pivot_longer(
cols = c(ends_with("_dwdT21SIT_vs_T21")),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
mutate(combined_pval = vectorized_sumlog(p.adjust_T21_vs_D21, p.adjust)) |>
filter(combined_pval <= gsea_combined_pval_cutoff & category != "chr21") |>
mutate(category = case_when(category == "ribosome" & str_detect(ID, "PRERIBOSOME") ~ "preribosome",
T ~ category),
quadrant = case_when(
NES > 0 & NES_T21_vs_D21 > 0 ~ 1,
NES < 0 & NES_T21_vs_D21 > 0 ~ 2,
NES < 0 & NES_T21_vs_D21 < 0 ~ 3,
NES > 0 & NES_T21_vs_D21 < 0 ~ 4
)) |>
filter(category != "other" & quadrant %in% c(2,4)) |>
mutate(dist_from_center = sqrt(NES^2 + NES_T21_vs_D21^2)) |>
group_by(category) |>
arrange(desc(dist_from_center)) |>
filter(row_number() <= 3) |>
ungroup() |>
dplyr::select(ID, dist_from_center, category2=category)
selected_terms2 <- combined_enrichment_df |>
# filter(hgnc_symbol != "XIST") |>
# pivot_longer(
# cols = c(ends_with("_dwdT21SIT_vs_T21", "_neural_t21_d21")),
# names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
# ) |>
# pivot_wider(names_from = "value_type", values_from = "value") |>
mutate(combined_pval = vectorized_sumlog(vectorized_sumlog(`p.adjust_neural_t21_d21`, `p.adjust_T21_vs_D21`), `p.adjust_dwdT21SIT_vs_T21`)) |>
filter(combined_pval <= gsea_combined_pval_cutoff & category != "chr21") |>
mutate(quadrant = case_when(
NES_T21_vs_D21 > 0 & NES_neural_t21_d21 > 0 & NES_dwdT21SIT_vs_T21 < 0 ~ 2,
NES_T21_vs_D21 < 0 & NES_neural_t21_d21 < 0 & NES_dwdT21SIT_vs_T21 > 0 ~ 4,
T ~ 0
)) |>
filter(category != "other" & quadrant %in% c(2,4)) |>
mutate(dist_from_center = sqrt(NES_T21_vs_D21^2 + NES_dwdT21SIT_vs_T21^2 + NES_neural_t21_d21^2)) |>
group_by(category) |>
arrange(desc(dist_from_center)) |>
filter(row_number() <= 3) |>
ungroup() |>
dplyr::select(ID, dist_from_center, category2=category)
selected_terms_ipsc <- combined_enrichment_df |>
# filter(hgnc_symbol != "XIST") |>
pivot_longer(
cols = c(ends_with("_dwdT21SIT_vs_T21")),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
mutate(combined_pval = vectorized_sumlog(p.adjust_T21_vs_D21, p.adjust)) |>
filter(combined_pval <= gsea_combined_pval_cutoff & category != "chr21") |>
mutate(quadrant = case_when(
NES > 0 & NES_T21_vs_D21 > 0 ~ 1,
NES < 0 & NES_T21_vs_D21 > 0 ~ 2,
NES < 0 & NES_T21_vs_D21 < 0 ~ 3,
NES > 0 & NES_T21_vs_D21 < 0 ~ 4
)) |>
filter(category != "other" & quadrant %in% c(2,4)) |>
mutate(dist_from_center = sqrt(NES^2 + NES_T21_vs_D21^2)) |>
group_by(category) |>
arrange(desc(dist_from_center)) |>
filter(row_number() <= 3) |>
ungroup() |>
dplyr::select(ID, dist_from_center, category2=category)
selected_terms_neural <- combined_enrichment_df |>
# filter(hgnc_symbol != "XIST") |>
pivot_longer(
cols = c(ends_with("_neural_t21xistdoxcombined_t21")),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
mutate(combined_pval = vectorized_sumlog(p.adjust_neural_t21_d21, p.adjust)) |>
filter(combined_pval <= gsea_combined_pval_cutoff & category != "chr21") |>
mutate(quadrant = case_when(
NES > 0 & NES_neural_t21_d21 > 0 ~ 1,
NES < 0 & NES_neural_t21_d21 > 0 ~ 2,
NES < 0 & NES_neural_t21_d21 < 0 ~ 3,
NES > 0 & NES_neural_t21_d21 < 0 ~ 4
)) |>
filter(category != "other" & quadrant %in% c(2,4)) |>
mutate(dist_from_center = sqrt(NES^2 + NES_neural_t21_d21^2)) |>
group_by(category) |>
arrange(desc(dist_from_center)) |>
filter(row_number() <= 3) |>
ungroup() |>
dplyr::select(ID, dist_from_center, category2=category)
library(ggh4x)
combined_enrichment_df |>
inner_join(selected_terms2) |>
dplyr::select(-starts_with("combined_pval_")) |>
pivot_longer(cols = starts_with(c("NES_", "p.adjust_", "qvalues_", "enrichmentScore_", "setSize_", "leadingEdgeTags_", "leadingEdgeList_", "leadingEdgeSignal_", "numCoreEnrichment_")),
names_pattern = "([A-Za-z\\.]*)_(.*)", names_to=c("value_type", "comparison")) |>
pivot_wider(names_from = "value_type", values_from="value") |>
mutate(ID=str_replace_all(ID, "_", " ")) |>
filter(comparison %in% c(
"T21_vs_D21",
"dwdT21SIT_vs_T21",
"neural_t21_d21",
"neural_t21xistdoxcombined_t21"
# # "dox",
# "d_t21xist_t21",
# "wd_nd"
)) |>
mutate(
ID = str_replace_all(ID, "PROCESS", "PROC"),
ID = str_replace_all(ID, "POSITIVE", "(+)"),
ID = str_replace_all(ID, "REGULATION", "REG"),
ID = str_replace_all(ID, "FAMILY", "FAM"),
ID = str_replace_all(ID, "OXIDATIVE", "OXI"),
ID = str_replace_all(ID, "LOCALIZATION", "LOCAL"),
ID = str_replace_all(ID, "MITOCHONDRIAL", "MITO"),
ID = str_replace_all(ID, "CYTOCHROME C", "CYT C"),
ID = str_replace_all(ID, "DEPENDENT", "DEP"),
ID = str_replace_all(ID, "TRANSLATION", "TRANS"),
ID = str_replace_all(ID, "CHEMICAL", "CHEM"),
ID = str_replace_all(ID, "RIBOSOME", "RIBO"),
ID = str_replace_all(ID, "PROTEIN", "PROT"),
ID = str_replace_all(ID, "ESTABLISHMENT", "ESTAB"),
ID = str_replace_all(ID, "TRANSCRIPTIONAL", "TRANSCRIP"),
ID = str_replace_all(ID, "TARGETING", "TARGET"),
ID = str_replace_all(ID, "GOBP |WP |GOCC |REACTOME |GOCC |GOMF", ""),
ID = str_replace_all(ID, "RIBOSOMAL", "RIBO"),
ID = str_replace_all(ID, "CYTOPLASMIC|CYTOSOLIC", "CYTO"),
ID = str_replace_all(ID, "ENDOPLASMIC RETICULUM", "ER"),
ID = str_replace_all(ID, "EUKARYOTIC", "EUKAR"),
ID = str_replace_all(ID, "EXTRINSIC", "EXT"),
ID = str_replace_all(ID, "TARGET", "TRGT"),
ID = str_replace_all(ID, "MEMBRANE", "MEM"),
ID = str_replace_all(ID, "COMPONENT", "COMP"),
ID = str_replace_all(ID, "PHYTO", "PHYT"),
ID = str_replace_all(ID, "AMINO ACID", "AA"),
ID = str_replace_all(ID, "SMOOTH MUSCLE CELL", "SMC"),
ID = str_replace_all(ID, "NRF2 TRANSCRIP", "NRF2"),
ID = str_replace_all(ID, "APOPTOTIC", "APOP"),
ID = str_replace_all(ID, "DISEASES", "DIS"),
ID = str_replace_all(ID, "PROGRAMMED", "PROG"),
ID = str_replace_all(ID, "BIOSYNTHETIC", "BIOSYNTH"),
ID = str_replace_all(ID, "ACTIVITY", "ACT"),
ID = str_replace_all(ID, "SENESCENCE", "SENSNCE"),
ID = str_replace_all(ID, "COTRANSAL", "COTRANS"),
ID = str_replace_all(ID, "SMALL", "SML"),
ID = str_replace_all(ID, "SUBUNIT", "SUB"),
ID = str_replace_all(ID, "PRECURSOR", "PRECUR"),
ID = str_replace_all(ID, "ACTIVATION", "ACTIV"),
ID = str_replace_all(ID, "APOPTOSIS", "APOP"),
ID = str_replace_all(ID, "PATHWAY", "PATH"),
ID = str_replace_all(ID, "SIGNALING", "SIG"),
ID = str_replace_all(ID, "RESPONSE", "RESP"),
ID = str_replace_all(ID, "MULTICELLULAR", "MULTICELL"),
ID = str_replace_all(ID, "RESPIRATORY", "RESP"),
ID = str_replace_all(ID, "RELATED", "REL"),
ID = str_replace_all(ID, "INDUCER", "IND"),
ID = str_replace_all(ID, "ENDOPEPTIDASE", "ENDOPEP"),
ID = str_replace_all(ID, "ORGANISMAL", "ORGNSML"),
ID = str_replace_all(ID, "NETWORK", "NTWRK"),
ID = str_replace_all(ID, "ALTERED", "ALT"),
ID = str_replace_all(ID, "OVARIAN", "OVRIN"),
ID = str_replace_all(ID, "CANCER", "CNCR"),
) |>
ggplot(aes(x=comparison, y=fct_reorder(ID, dplyr::desc(dist_from_center)), color=NES, size=-log10(p.adjust))) +
geom_point() +
ggh4x::facet_grid2(rows=vars(factor(category2,
levels=c("apoptosis", "serine", "stress","ER", "ribosome", "mitochondria", "translation"))
), scales = "free", space = "free", strip=ggh4x::strip_nested(clip="off")) +
# scale_color_viridis_c() +
scale_color_gradient2(high = scales::muted("red"), low = scales::muted("blue")) +
# scale_y_discrete(label=scales::label_wrap(30)) +
scale_x_discrete(limits=c(
"neural_t21_d21",
"T21_vs_D21",
"dwdT21SIT_vs_T21",
"neural_t21xistdoxcombined_t21"
),
labels = c("iPSC T21 vs. D21", "iPSC T21XIST vs. T21", "Neural T21 vs. D21", "Neural T21XIST vs. T21")) +
theme(axis.text.x = element_text(angle=65, hjust=1),
axis.title.x = element_blank(),
axis.text.y = element_text(size=6),
axis.title.y=element_blank(),
legend.position = c(-1.5, -.1),
legend.box = "horizontal",
legend.margin = margin(r=2, unit = "mm"),
legend.key.width = unit(2, units = "mm"),
plot.margin = margin(r=-2, unit="mm"),
plot.title = element_text(hjust=1.8),
# axis.text.y = element_blank(),
# axis.ticks.y=element_blank(),
strip.text.y = element_text(angle=45, hjust = 0, vjust = .5)) +
scale_size_area(max_size=4) +
labs(title="Enriched Pathways")
ggsave(sprintf("%s/enriched_pathways_with_neural.pdf", fig_output_dir), width = 3.9, height = 6)
combined_enrichment_df |>
inner_join(selected_terms_ipsc) |>
dplyr::select(-starts_with("combined_pval_")) |>
pivot_longer(cols = starts_with(c("NES_", "p.adjust_", "qvalues_", "enrichmentScore_", "setSize_", "leadingEdgeTags_", "leadingEdgeList_", "leadingEdgeSignal_", "numCoreEnrichment_")),
names_pattern = "([A-Za-z\\.]*)_(.*)", names_to=c("value_type", "comparison")) |>
pivot_wider(names_from = "value_type", values_from="value") |>
mutate(ID=str_replace_all(ID, "_", " ")) |>
filter(comparison %in% c(
"T21_vs_D21",
"dwdT21SIT_vs_T21",
"neural_t21_d21",
"neural_t21xistdoxcombined_t21"
# # "dox",
# "d_t21xist_t21",
# "wd_nd"
)) |>
mutate(
ID = str_replace_all(ID, "PROCESS", "PROC"),
ID = str_replace_all(ID, "POSITIVE", "(+)"),
ID = str_replace_all(ID, "REGULATION", "REG"),
ID = str_replace_all(ID, "FAMILY", "FAM"),
ID = str_replace_all(ID, "OXIDATIVE", "OXI"),
ID = str_replace_all(ID, "LOCALIZATION", "LOCAL"),
ID = str_replace_all(ID, "MITOCHONDRIAL", "MITO"),
ID = str_replace_all(ID, "CYTOCHROME C", "CYT C"),
ID = str_replace_all(ID, "DEPENDENT", "DEP"),
ID = str_replace_all(ID, "TRANSLATION", "TRANS"),
ID = str_replace_all(ID, "CHEMICAL", "CHEM"),
ID = str_replace_all(ID, "RIBOSOME", "RIBO"),
ID = str_replace_all(ID, "PROTEIN", "PROT"),
ID = str_replace_all(ID, "ESTABLISHMENT", "ESTAB"),
ID = str_replace_all(ID, "TRANSCRIPTIONAL", "TRANSCRIP"),
ID = str_replace_all(ID, "TARGETING", "TARGET"),
ID = str_replace_all(ID, "GOBP |WP |GOCC |REACTOME ", ""),
ID = str_replace_all(ID, "RIBOSOMAL", "RIBO"),
ID = str_replace_all(ID, "CYTOPLASMIC|CYTOSOLIC", "CYTO"),
ID = str_replace_all(ID, "ENDOPLASMIC RETICULUM", "ER"),
ID = str_replace_all(ID, "EUKARYOTIC", "EUKAR"),
ID = str_replace_all(ID, "EXTRINSIC", "EXT"),
ID = str_replace_all(ID, "TARGET", "TRGT"),
ID = str_replace_all(ID, "MEMBRANE", "MEM"),
ID = str_replace_all(ID, "COMPONENT", "COMP"),
ID = str_replace_all(ID, "PHYTO", "PHYT"),
ID = str_replace_all(ID, "AMINO ACID", "AA"),
ID = str_replace_all(ID, "SMOOTH MUSCLE CELL", "SMC"),
ID = str_replace_all(ID, "NRF2 TRANSCRIP", "NRF2"),
ID = str_replace_all(ID, "APOPTOTIC", "APOP"),
ID = str_replace_all(ID, "DISEASES", "DIS"),
ID = str_replace_all(ID, "PROGRAMMED", "PROG"),
ID = str_replace_all(ID, "BIOSYNTHETIC", "BIOSYNTH"),
ID = str_replace_all(ID, "ACTIVITY", "ACT"),
ID = str_replace_all(ID, "SENESCENCE", "SENSNCE"),
ID = str_replace_all(ID, "COTRANSAL", "COTRANS"),
ID = str_replace_all(ID, "SMALL", "SML"),
ID = str_replace_all(ID, "SUBUNIT", "SUB"),
ID = str_replace_all(ID, "PRECURSOR", "PRECUR"),
ID = str_replace_all(ID, "ACTIVATION", "ACTIV"),
ID = str_replace_all(ID, "RESPIRATORY", "RESP"),
ID = str_replace_all(ID, "RESPONSE", "RESP"),
) |>
ggplot(aes(x=comparison, y=fct_reorder(ID, dplyr::desc(dist_from_center)), color=NES, size=-log10(p.adjust))) +
geom_point() +
ggh4x::facet_grid2(rows=vars(factor(category2,
levels=c("apoptosis", "serine", "stress", "ER", "ribosome", "mitochondria", "translation"))
), scales = "free", space = "free", strip=ggh4x::strip_nested(clip="off")) +
# scale_color_viridis_c() +
scale_color_gradient2(high = scales::muted("red"), low = scales::muted("blue")) +
# scale_y_discrete(label=scales::label_wrap(30)) +
scale_x_discrete(limits=c(
"T21_vs_D21",
"dwdT21SIT_vs_T21"
),
labels = c("T21 vs. D21", "T21XIST vs. T21")) +
scale_size_area(max_size=4) +
labs(title="Top GSEA Terms - iPSCs") +
theme(axis.text.x = element_text(angle=45, hjust=1),
axis.title.x = element_blank(),
axis.text.y = element_text(size=6),
axis.title.y=element_blank(),
legend.position = c(-3, -.1),
legend.box = "horizontal",
legend.margin = margin(r=2, unit = "mm"),
legend.key.width = unit(2, units = "mm"),
plot.margin = margin(r=-2, unit="mm"),
plot.title = element_text(hjust=1.8),
# axis.text.y = element_blank(),
# axis.ticks.y=element_blank(),
strip.text.y = element_text(angle=45, hjust = 0, vjust = .5))
ggsave(sprintf("%s/enriched_pathways_ipsc.pdf", fig_output_dir), width = 3.15, height = 5.5)
combined_enrichment_df |>
inner_join(selected_terms_neural, by="ID") |>
dplyr::select(-starts_with("combined_pval_")) |>
pivot_longer(cols = starts_with(c("NES_", "p.adjust_", "qvalues_", "enrichmentScore_", "setSize_", "leadingEdgeTags_", "leadingEdgeList_", "leadingEdgeSignal_", "numCoreEnrichment_")),
names_pattern = "([A-Za-z\\.]*)_(.*)", names_to=c("value_type", "comparison")) |>
pivot_wider(names_from = "value_type", values_from="value") |>
mutate(ID=str_replace_all(ID, "_", " ")) |>
filter(comparison %in% c(
"neural_t21_d21",
"neural_t21xistdoxcombined_t21"
# # "dox",
# "d_t21xist_t21",
# "wd_nd"
)) |>
mutate(
ID = str_replace_all(ID, "REACTOME MITOCHONDRIAL TRANSLATION", "RCTME MTO TRANS"),
ID = str_replace_all(ID, "GOBP MITOCHONDRIAL TRANSLATION", "GO MTO TRANS"),
ID = str_replace_all(ID, "GOBP |WP |GOCC |REACTOME ", ""),
ID = str_replace_all(ID, "PROCESS", "PROC"),
ID = str_replace_all(ID, "POSITIVE", "(+)"),
ID = str_replace_all(ID, "REGULATION", "REG"),
ID = str_replace_all(ID, "FAMILY", "FAM"),
ID = str_replace_all(ID, "OXIDATIVE", "OXI"),
ID = str_replace_all(ID, "LOCALIZATION", "LOCAL"),
ID = str_replace_all(ID, "MITOCHONDRIAL", "MITO"),
ID = str_replace_all(ID, "CYTOCHROME C", "CYT C"),
ID = str_replace_all(ID, "DEPENDENT", "DEP"),
ID = str_replace_all(ID, "TRANSLATION", "TRANS"),
ID = str_replace_all(ID, "CHEMICAL", "CHEM"),
ID = str_replace_all(ID, "RIBOSOME", "RIBO"),
ID = str_replace_all(ID, "PROTEIN", "PROT"),
ID = str_replace_all(ID, "ESTABLISHMENT", "ESTAB"),
ID = str_replace_all(ID, "TRANSCRIPTIONAL", "TRANSCRIP"),
ID = str_replace_all(ID, "TARGETING", "TARGET"),
ID = str_replace_all(ID, "RIBOSOMAL", "RIBO"),
ID = str_replace_all(ID, "CYTOPLASMIC|CYTOSOLIC", "CYTO"),
ID = str_replace_all(ID, "ENDOPLASMIC RETICULUM", "ER"),
ID = str_replace_all(ID, "EUKARYOTIC", "EUKAR"),
ID = str_replace_all(ID, "EXTRINSIC", "EXT"),
ID = str_replace_all(ID, "TARGET", "TRGT"),
ID = str_replace_all(ID, "MEMBRANE", "MEM"),
ID = str_replace_all(ID, "COMPONENT", "COMP"),
ID = str_replace_all(ID, "PHYTO", "PHYT"),
ID = str_replace_all(ID, "AMINO ACID", "AA"),
ID = str_replace_all(ID, "SMOOTH MUSCLE CELL", "SMC"),
ID = str_replace_all(ID, "NRF2 TRANSCRIP", "NRF2"),
ID = str_replace_all(ID, "APOPTOTIC", "APOP"),
ID = str_replace_all(ID, "DISEASES", "DIS"),
ID = str_replace_all(ID, "PROGRAMMED", "PROG"),
ID = str_replace_all(ID, "BIOSYNTHETIC", "BIOSYNTH"),
ID = str_replace_all(ID, "ACTIVITY", "ACT"),
ID = str_replace_all(ID, "SENESCENCE", "SENSNCE"),
ID = str_replace_all(ID, "COTRANSAL", "COTRANS"),
ID = str_replace_all(ID, "SMALL", "SML"),
ID = str_replace_all(ID, "SUBUNIT", "SUB"),
ID = str_replace_all(ID, "PRECURSOR", "PRECUR"),
ID = str_replace_all(ID, "ACTIVATION", "ACTIV"),
ID = str_replace_all(ID, "RESPIRATORY", "RESP"),
ID = str_replace_all(ID, "RESPONSE", "RESP"),
ID = str_replace_all(ID, "TRANSCRIPTION", "TRNCRIP"),
ID = str_replace_all(ID, "TEMPLATED", "TMPLT"),
ID = str_replace_all(ID, "INITIATION", "INIT"),
ID = str_replace_all(ID, "PHOSPHORYLATION", "PHOS"),
ID = str_replace_all(ID, "ALPHA", "α"),
ID = str_replace_all(ID, "INHIBITOR", "INHIB"),
ID = str_replace_all(ID, "ELECTRON TRANSPORT CHAIN", "E.T.C."),
ID = str_replace_all(ID, "SYSTEM", "SYS"),
ID = str_replace_all(ID, "MITOCHONDRIA", "MITO"),
ID = str_replace_all(ID, "INFLAMMATORY", "INFLAM"),
ID = str_replace_all(ID, "EFFECTS", "EFF."),
ID = str_replace_all(ID, "PATHWAY", "PATH"),
) |>
ggplot(aes(x=comparison, y=fct_reorder(ID, dplyr::desc(dist_from_center)), color=NES, size=-log10(p.adjust))) +
geom_point() +
ggh4x::facet_grid2(rows=vars(factor(category2,
levels=c("apoptosis", "serine", "stress", "ER", "ribosome", "mitochondria", "translation"))
), scales = "free", space = "free", strip=ggh4x::strip_nested(clip="off")) +
# scale_color_viridis_c() +
scale_color_gradient2(high = scales::muted("red"), low = scales::muted("blue")) +
# scale_y_discrete(label=scales::label_wrap(30)) +
scale_x_discrete(limits=c(
"neural_t21_d21",
"neural_t21xistdoxcombined_t21"
),
labels = c("T21 vs. D21", "T21XIST vs. T21")) +
scale_size_area(max_size=4) +
labs(title="Top GSEA Terms - Neural") +
theme(axis.text.x = element_text(angle=45, hjust=1),
axis.title.x = element_blank(),
axis.text.y = element_text(size=6),
axis.title.y=element_blank(),
legend.position = c(-2.6, 0),
legend.box = "horizontal",
legend.margin = margin(r=2, unit = "mm"),
legend.key.width = unit(2, units = "mm"),
plot.margin = margin(r=-2, unit="mm"),
plot.title = element_text(hjust=1.8),
# axis.text.y = element_blank(),
# axis.ticks.y=element_blank(),
strip.text.y = element_text(angle=45, hjust = 0, vjust = .5))
ggsave(sprintf("%s/enriched_pathways_neural.pdf", fig_output_dir), width = 3.6, height = 4.8,device = cairo_pdf)
## Lollipop Plots
gsea_compare_to <- "T21_vs_D21"
gsea_comparisons <- c("neural_t21_d21", "neural_t21xistdoxcombined_t21", "dwdT21SIT_vs_T21", "dox_effect")
compared_to_NES <- sprintf("NES_%s", gsea_compare_to)
compared_to_padj <- sprintf("p.adjust_%s", gsea_compare_to)
compared_from_suffixes <- sprintf("_%s", c(gsea_compare_to, gsea_comparisons))
gsea_lollipop_cutoff_1 <- lol_cutoff_primary
gsea_lollipop_cutoff_2 <- .2
combined_enrichment_df |>
mutate(
ontology = str_extract(ID, "[A-Z]+(?=_)"),
ontology = case_when(
is.na(ontology) | !(ontology %in% c("GOBP", "GOCC", "GOMF", "HALLMARK", "KEGG", "REACTOME", "WP")) ~ "other",
T ~ ontology
)
) |>
filter(ontology != "other",
!!as.name(compared_to_padj) <= gsea_lollipop_cutoff_1) |>
# dplyr::select(-starts_with("combined_pval_")) |>
mutate(
ID = str_replace_all(ID, "_", " "),
ID = str_replace_all(ID, "PROCESS", "PROC"),
ID = str_replace_all(ID, "POSITIVE", "(+)"),
ID = str_replace_all(ID, "REGULATION", "REG"),
ID = str_replace_all(ID, "FAMILY", "FAM"),
ID = str_replace_all(ID, "OXIDATIVE", "OXI"),
ID = str_replace_all(ID, "LOCALIZATION", "LOCAL"),
ID = str_replace_all(ID, "MITOCHONDRIAL", "MITO"),
ID = str_replace_all(ID, "CYTOCHROME C", "CYT C"),
ID = str_replace_all(ID, "DEPENDENT", "DEP"),
ID = str_replace_all(ID, "TRANSLATION", "TRANS"),
ID = str_replace_all(ID, "CHEMICAL", "CHEM"),
ID = str_replace_all(ID, "RIBOSOME", "RIBO"),
ID = str_replace_all(ID, "PROTEIN", "PROT"),
ID = str_replace_all(ID, "ESTABLISHMENT", "ESTAB"),
ID = str_replace_all(ID, "TRANSCRIPTIONAL", "TRANSCRIP"),
ID = str_replace_all(ID, "TARGETING", "TARGET"),
ID = str_replace_all(ID, "GOBP |WP |GOCC |REACTOME |HALLMARK |KEGG ", ""),
ID = str_replace_all(ID, "RIBOSOMAL", "RIBO"),
ID = str_replace_all(ID, "CYTOPLASMIC|CYTOSOLIC", "CYTO"),
ID = str_replace_all(ID, "ENDOPLASMIC RETICULUM", "ER"),
ID = str_replace_all(ID, "EUKARYOTIC", "EUKAR"),
ID = str_replace_all(ID, "EXTRINSIC", "EXT"),
ID = str_replace_all(ID, "TARGET", "TRGT"),
ID = str_replace_all(ID, "MEMBRANE", "MEM"),
ID = str_replace_all(ID, "COMPONENT", "COMP"),
ID = str_replace_all(ID, "PHYTO", "PHYT"),
ID = str_replace_all(ID, "AMINO ACID", "AA"),
ID = str_replace_all(ID, "SMOOTH MUSCLE CELL", "SMC"),
ID = str_replace_all(ID, "NRF2 TRANSCRIP", "NRF2"),
ID = str_replace_all(ID, "APOPTOTIC", "APOP"),
ID = str_replace_all(ID, "DISEASES", "DIS"),
ID = str_replace_all(ID, "PROGRAMMED", "PROG"),
ID = str_replace_all(ID, "BIOSYNTHETIC", "BIOSYNTH"),
ID = str_replace_all(ID, "ACTIVITY", "ACT"),
ID = str_replace_all(ID, "SENESCENCE", "SENSNCE"),
ID = str_replace_all(ID, "COTRANSAL", "COTRANS"),
ID = str_replace_all(ID, "SMALL", "SML"),
ID = str_replace_all(ID, "SUBUNIT", "SUB"),
ID = str_replace_all(ID, "PRECURSOR", "PRECUR"),
ID = str_replace_all(ID, "ACTIVATION", "ACTIV"),
ID = str_replace_all(ID, "HEPATOCELLULAR", "HEPATOCELL"),
ID = str_replace_all(ID, "TRANSITION", "TRANS"),
ID = str_replace_all(ID, "EXTRACELLULAR", "EXTRACELL"),
) |>
dplyr::mutate(ID = fct_reorder(ID, -log10(!!as.name(compared_to_padj)) * sign(!!as.name(compared_to_NES)), .desc = T)) |>
pivot_longer(
cols = c(ends_with(compared_from_suffixes)),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)", names_to = c("value_type", "comparison")
) |>
pivot_wider(names_from = "value_type", values_from = "value") |>
dplyr::mutate(
direction = case_when(
sign(NES) == -1 ~ "supressed",
T ~ "activated"
),
GeneRatio = numCoreEnrichment / setSize,
comparison = factor(comparison, levels = c(gsea_compare_to, gsea_comparisons)),
signed_log10padj = -log10(p.adjust) * sign(NES)
) |>
filter(p.adjust <= gsea_lollipop_cutoff_2 &
comparison %in% c(gsea_compare_to, gsea_comparisons) &
ontology %in% c("WP", "KEGG", "HALLMARK")) |>
mutate(comparison = case_when(comparison == "T21_vs_D21" ~ "iPSC T21",
comparison == "dwdT21SIT_vs_T21" ~ "iPSC Rescue",
comparison == "neural_t21_d21" ~ "Neural T21",
comparison == "neural_t21xistdoxcombined_t21" ~ "Neural Rescue",
comparison == "dox_effect" ~ "iPSC Dox"),
comparison = factor(comparison, levels = c("iPSC T21", "iPSC Rescue", "iPSC Dox", "Neural T21", "Neural Rescue"))) |>
ggplot() +
geom_segment(aes(x = 0, y = ID, xend = signed_log10padj, yend = ID), size = .3) +
geom_point(aes(x = signed_log10padj, y = ID, size = GeneRatio, color = NES)) +
scale_color_gradient2(midpoint = 0, low = "blue", high = "red") +
ggh4x::facet_grid2(cols = vars(comparison), rows = vars(ontology), scales = "free", space = "free_y", strip = ggh4x::strip_nested(clip = "off"), ) +
scale_y_discrete(label = function(x) abbreviate(x, minlength = 70, dot = T)) +
# scale_x_continuous(breaks = c(-4, 0, 4), expand = c(.2, 0.2)) +
geom_vline(xintercept = 0, size = .5) +
theme(
axis.text.x = element_text(angle = 65, hjust = 1),
axis.title.y = element_blank(),
axis.text.y = element_text(size=8),
axis.ticks.y = element_blank(),
strip.text.y = element_text(angle = 45, hjust = 0, vjust = .5),
legend.position = "top",
legend.title = element_text(margin = margin(r = 2, unit = "mm"), hjust = .5),
legend.key.height = unit(3, "mm"),
legend.key.width = unit(3, "mm"),
legend.margin = margin(r = 2, l = 2, unit = "mm")
) +
scale_size_area(max_size = 3) +
labs(
title = "Top T21 vs D21 GSEA Terms",
x = "signed\nlog10(p-adj)",
color = "NES"
)
ggsave(sprintf("%s/lollipop.pdf", fig_output_dir), width=7.4, height = 7.8)
## Make lineage score card for iPSCs -----------------------
vsd_df <- batch_corrected_vsd_df |>
left_join(gene_info_df |>
dplyr::select(ensembl_id = ensembl_gene_id_version, gene_name = hgnc_symbol))
thermo_lineage_genes <- read_csv("../Thermo_Lineage_Scorecard_genes.csv")
lineage_gene_counts_df <- vsd_df %>%
inner_join(thermo_lineage_genes, by=c("gene_name"="symbol")) %>%
reshape2::melt(id.vars=c("gene_name", "Lineage"), measure.vars=sample_sheet_df$sample_id, variable.name="sample_id") %>%
left_join(sample_sheet_df)
ordered_genes <- lineage_gene_counts_df %>%
group_by(gene_name) %>%
summarise(mean_expr=mean(value)) %>%
arrange(mean_expr) %>%
pull(gene_name)
lineage_gene_counts_df |>
mutate(`Cell Line`=str_replace(`Cell Line`, "rtTA-XIST", "T21XIST")) |>
ggplot(aes(x=factor(gene_name, levels=ordered_genes), y=value, fill=Lineage, color=Lineage)) +
geom_boxplot() +
labs(title="Lineage Scorecard for iPSCs", x="Genes", y="VST Counts") +
scale_fill_npg(guide="none") +
scale_color_npg(guide="none") +
facet_grid(rows=vars(`Cell Line`), cols=vars(Lineage), scales = "free", space="free") +
theme(axis.text.x = element_text(angle=90, vjust=.5, hjust=1, size=7))
ggsave(sprintf("%s/lineage_scorecard.pdf", fig_output_dir), width=7.25, height = 4.8)