Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
## ---------------------------
##
## Script name: Transcript level differential expression
##
## Purpose of script: Using salmon transcript counts, perform differential gene and transcript expression
##
## Author: Prakhar Bansal
##
## Date Created: 2021-08-24
##
## Copyright (c) Prakhar Bansal, 2021
## Email: pbansal@uchc.edu
##
## ---------------------------
##
## Notes:
##
##
## ---------------------------
## set working directory
setwd(dirname(rstudioapi::getSourceEditorContext()$path))
## ---------------------------
# remotes::install_version("Rttf2pt1", version = "1.3.8")
# extrafont::font_import()
## load up the packages we will need: (uncomment as required)
# library(showtext)
# font_add("Arial", "arial.ttf", bold='arialbd.ttf', italic='ariali.ttf', bolditalic='arialbi.ttf')
# showtext_auto()
require(tidyverse)
theme_set(
theme_classic() +
theme(
text = element_text(size = 9, family = "sans"),
plot.title = element_text(hjust = 0.5, size = 10, family = "sans", face = "bold"),
axis.title = element_text(size = 9, face = "bold"),
axis.text = element_text(size = 9),
legend.spacing.x = unit(0, "cm"),
legend.spacing.y = unit(2, "mm"),
legend.key.height = unit(4, "mm"),
legend.margin = margin(0, 0, 0, 0),
axis.line = element_line(size = 0),
strip.background = element_blank(),
strip.text = element_text(margin = margin(1, 1, 1, 1, unit = "mm")),
panel.border = element_rect(color = "black", fill = NA, size = .5)
)
)
require(ggsci)
require(tximeta)
require(DESeq2)
require(clusterProfiler)
require(msigdbr)
require(metap)
require(patchwork)
require(ggdendro)
## --------------------------------------------------------
msig_df <- msigdbr(species = "Homo sapiens")
m_t2g <- msig_df |>
filter(gs_cat == "H" |
gs_cat == "C1" |
(gs_cat == "C2" & str_detect(gs_subcat, "CP:")) |
# (gs_cat == "C3" & str_detect(gs_subcat, "TFT:")) |
(gs_cat == "C5" & str_detect(gs_subcat, "GO:"))) |>
dplyr::select(gs_name, entrez_gene)
contrast_builder <- function(dds_obj, numerator_sample_ids, denominator_sample_ids, covariates_to_ignore, .id_col = "sample_id", .ignore_SVs = T) {
# get the model matrix
mod_mat <- model.matrix(design(dds_obj), colData(dds_obj))
# calculate the vector of coefficient weights in the numerator and denominator
numerator_coefs <- colMeans(mod_mat[dds_obj[[.id_col]] %in% numerator_sample_ids, ])
denominator_coefs <- colMeans(mod_mat[dds_obj[[.id_col]] %in% denominator_sample_ids, ])
# The contrast we are interested in is the difference between the two
contrast_coefs <- numerator_coefs - denominator_coefs
cov_ignore_regex <- covariates_to_ignore #(covariates_to_ignore |> toString() |> str_replace_all("~, ", "") |> str_split(" \\+ "))[[1]] |>
#paste(collapse = "|")
for(coef_name in names(contrast_coefs)) {
if (str_starts(coef_name, cov_ignore_regex)) {
contrast_coefs[coef_name] <- 0
}else if (.ignore_SVs == T & str_detect(coef_name, "SV[0-9]+")) {
print(coef_name)
contrast_coefs[coef_name] <- 0
}
}
return(contrast_coefs)
}
write_results <- function(result_name, filter_top, filter_bottom, dds_obj, outdir, covariates_to_ignore) {
print("in write results...")
print(result_name)
top_sample_ids <- colData(dds_obj) |>
as.data.frame() |>
filter(eval(rlang::parse_expr(filter_top))) |>
pull(sample_id)
print(top_sample_ids)
bottom_sample_ids <- colData(dds_obj) |>
as.data.frame() |>
filter(eval(rlang::parse_expr(filter_bottom))) |>
pull(sample_id)
print(bottom_sample_ids)
res_obj <- results(dds_obj, contrast = contrast_builder(dds_obj, top_sample_ids, bottom_sample_ids, covariates_to_ignore))
summary(res_obj)
write_csv(
res_obj |>
as.data.frame() |>
rownames_to_column(var = "ensembl_id"),
sprintf("%s/tables/results_%s.csv.gz", outdir, result_name)
)
}
run_neural_pipeline <- function(sample_sheet_df,
deseq2_design,
limma_covariates,
limma_design,
sva_mod,
sva_mod0,
diff_exprs = list(
"neural_t21_d21" = c(
"genotype == 'T21'",
"genotype == 'D21'",
"neural_diff|dox_status"),
"neural_d21_t21xistdox" = c(
"genotype == 'T21XIST' & dox_status != 'no'",
"(genotype == 'T21XIST' & dox_status == 'no') | genotype == 'T21'",
"neural_diff|dox_status"),
"neural_d21_t21xistdox" = c(
"genotype == 'D21'",
"genotype == 'T21XIST' & dox_status != 'no'",
"neural_diff"))) {
outdir <- sprintf(
"outputs/neural_%s %s",
str_replace(Sys.time(), " [A-Z]+", "") |> str_replace_all(":", "-"),
deseq2_design |> toString() |> str_replace_all("~,| ", "") |> str_replace_all(":", "-") |> str_replace_all("\\*", "_x_")
)
dir.create(sprintf("%s/%s", outdir, "figures"), recursive = T)
dir.create(sprintf("%s/%s", outdir, "tables"), recursive = T)
write_csv(sample_sheet_df, paste0(outdir, "/tables/sample_sheet_df.csv.gz"))
sum_exp <- tximeta(sample_sheet_df)
gene_sum_exp <- summarizeToGene(sum_exp)
print("before deseq2 here")
dds <- DESeqDataSet(gene_sum_exp,
design = deseq2_design
)
dds <- estimateSizeFactors(dds)
nc <- counts(dds, normalized = TRUE)
filter <- rowSums(nc >= 20) >= ceiling(ncol(nc) / 10)
dds <- dds[filter, ]
dds <- estimateSizeFactors(dds)
dds <- DESeq(dds, modelMatrixType = "standard")
dds <- dds[which(mcols(dds)$betaConv), ]
saveRDS(dds, paste0(outdir, "/tables/dds.rds"))
vsd <- varianceStabilizingTransformation(dds, blind = F)
vsd_salmon_df <- assay(vsd) |>
as.data.frame() |>
rownames_to_column(var = "ensembl_id")
pca_expr_data <- vsd_salmon_df |>
column_to_rownames(var = "ensembl_id")
batch_corrected_vsd <- limma::removeBatchEffect(pca_expr_data,
covariates = model.matrix(limma_covariates, data = colData(dds)),
design = model.matrix(limma_design, data = colData(dds))
)
resultsNames(dds)
write_csv(vsd_salmon_df, sprintf("%s/tables/vsd_salmon_df.csv.gz", outdir))
write_csv(batch_corrected_vsd |> as.data.frame() |> rownames_to_column(var = "ensembl_id"), sprintf("%s/tables/batch_corrected_vsd_salmon_df.csv.gz", outdir))
gene_info_df <- read_csv("input_data/gene_info_df.csv.gz")
for (de_name in names(diff_exprs)) {
print(de_name)
write_results(de_name,
diff_exprs[[de_name]][1],
diff_exprs[[de_name]][2],
dds,
outdir,
diff_exprs[[de_name]][3])
}
results_files <- fs::dir_ls(
path = sprintf("%s/tables", outdir),
glob = sprintf(
"%s/tables/results_*csv.gz",
outdir |>
str_replace_all("\\+", "\\\\+") |>
str_replace_all("\\,", "\\\\,")
)
)
combined_results_df <- read_csv(results_files,
id = "origin_file") |>
mutate(comparison = str_replace_all(
origin_file,
sprintf(
"(%s/tables/results_)|(.csv.gz)",
outdir |>
str_replace_all("\\+", "\\\\+") |>
str_replace_all("\\(", "\\\\(") |>
str_replace_all("\\)", "\\\\)") |>
str_replace_all("\\,", "\\\\,")
),
""
)) |>
pivot_wider(
id_cols = ensembl_id,
names_from = comparison,
values_from = c(baseMean, log2FoldChange, pvalue, padj, stat)
)
write_csv(combined_results_df,
sprintf("%s/tables/combined_results_df.csv.gz", outdir))
sprintf("Running GSEA Enrichments...\n")
combined_results_df <- combined_results_df |>
left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version"))
# run gsea for all comparisons
all_comparisons <- combined_results_df |>
filter(hgnc_symbol != "XIST") |>
pivot_longer(
cols = c(starts_with("log2FoldChange_")),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)",
names_to = c("value_type", "comparison")
) |>
pull(comparison) |>
unique()
for (comparison in all_comparisons) {
ordered_genes <- combined_results_df |>
filter(!is.na(entrezgene_id)) |>
arrange(desc(!!as.name(sprintf(
"stat_%s", comparison
)))) |>
dplyr::select(entrezgene_id,!!sprintf("stat_%s", comparison)) |>
deframe()
gsea_result <-
GSEA(ordered_genes,
TERM2GENE = m_t2g,
pvalueCutoff = 1)
write_csv(
gsea_result |> as.data.frame(),
sprintf("%s/tables/gsea_%s.csv.gz", outdir, comparison)
)
}
gsea_files <- fs::dir_ls(
path = sprintf("%s/tables", outdir),
glob = sprintf(
"%s/tables/gsea_*csv.gz",
outdir |>
str_replace_all("\\+", "\\\\+") |>
str_replace_all("\\,", "\\\\,")
)
)
print(gsea_files)
combined_enrichment_df <- read_csv(gsea_files,
id = "origin_file") |>
mutate(comparison = str_replace_all(
origin_file,
sprintf(
"(%s/tables/gsea_)|(.csv.gz)",
outdir |>
str_replace_all("\\+", "\\\\+") |>
str_replace_all("\\(", "\\\\(") |>
str_replace_all("\\)", "\\\\)") |>
str_replace_all("\\,", "\\\\,")
),
""
)) |>
mutate(
leadingEdgeTags = as.numeric(str_replace_all(
str_split(leading_edge, ", ", simplify = T)[, 1], "tags=|%", ""
)) / 100,
leadingEdgeList = as.numeric(str_replace_all(
str_split(leading_edge, ", ", simplify = T)[, 2], "list=|%", ""
)) / 100,
leadingEdgeSignal = as.numeric(str_replace_all(
str_split(leading_edge, ", ", simplify = T)[, 3], "signal=|%", ""
)) / 100,
numCoreEnrichment = str_count(str_split(core_enrichment, "/")) + 1
) |>
pivot_wider(
id_cols = ID,
names_from = comparison,
values_from = c(
NES,
p.adjust,
qvalue,
enrichmentScore,
setSize,
leadingEdgeTags,
leadingEdgeList,
leadingEdgeSignal,
numCoreEnrichment
)
) |>
mutate_at(vars(starts_with("p.adjust")), list( ~ if_else(is.na(.), 1, .))) |>
mutate_at(vars(starts_with("qvalue")), list( ~ if_else(is.na(.), 1, .))) |>
mutate_at(vars(starts_with("NES")), list( ~ if_else(is.na(.), 0, .))) |>
mutate_at(vars(starts_with("enrichmentScore")), list( ~ if_else(is.na(.), 0, .))) |>
dplyr::mutate(
category = dplyr::case_when(
str_detect(ID, "STRESS") ~ "stress",
str_detect(ID, "ENDOPLASMIC_RETICULUM") ~ "ER",
str_detect(ID, "chr21") ~ "chr21",
str_detect(ID, "TRANSLATION") ~ "translation",
str_detect(ID, "RIBOSOM") ~ "ribosome",
str_detect(ID, "SERINE") &
(!str_detect(ID, "SERINE_THREONINE")) ~ "serine",
str_detect(ID, "MITOCH") ~ "mitochondria",
str_detect(ID, "NRF2") ~ "stress",
str_detect(ID, "APOPTO") |
str_detect(ID, "PROGRAMMED_CELL_DEATH") ~ "apoptosis",
T ~ "other"
)
)
print("saving combined enrichment df")
write_csv(combined_enrichment_df,
paste0(outdir, "/tables/combined_enrichment_df.csv.gz"))
rmarkdown::render("summarize_neural_output.Rmd", output_file = paste0(outdir, "/neural_summary.html"), params = list(
output_dir = outdir,
samples_to_exclude = samples_to_exclude |> toString(),
deseq2_design = deseq2_design |> toString(),
limma_covariates = limma_covariates |> toString(),
limma_design = limma_design |> toString(),
sva_mod = sva_mod |> toString(),
sva_mod0 = sva_mod0 |> toString(),
use_SV = use_SV,
num_SV = num_SV
))
}
run_pipeline <- function(sample_sheet_df,
deseq2_design,
limma_covariates,
limma_design,
sva_mod,
sva_mod0,
diff_exprs = list(
"rtta_wd_vs_nd" = c(
'has_transgene & dox_status == "withdrawal"',
'has_transgene & dox_status == "no"'),
"rtta_wd_vs_nd_powered" = c(
'has_transgene & dox_status == "withdrawal"',
'(has_transgene | cell_line == "198_5") & dox_status == "no"'),
"rtta_d_vs_nd" = c(
'has_transgene & dox_status == "yes"',
'has_transgene & dox_status == "no"'),
"nonrtta_t21_vs_d21" = c(
'genotype == "T21" & dox_status != "yes"',
'genotype == "D21" & dox_status != "yes"'),
"nonrtta_t21_vs_d21_pow" = c(
'(genotype == "T21" & dox_status != "yes") | (genotype == "T21XIST" & dox_status == "no")',
'genotype == "D21" & dox_status != "yes"'),
"dox_t21xist_vs_1985" = c(
'genotype == "T21XIST" & dox_status == "yes"',
'genotype == "T21" & dox_status == "yes"'),
"nonrtta_d_vs_nd" = c(
'!has_transgene & dox_status == "yes"',
'!has_transgene & dox_status != "yes"')
),
neural_output_dir = NULL,
chr21_changes_comparisons = c("rtta_wd_vs_nd_powered", "dox_t21xist_vs_1985"),
diff_expr_compare_to = "nonrtta_t21_vs_d21_pow",
diff_expr_comparisons = c(
"neural_t21_d21",
"rtta_d_vs_nd",
"dox_t21xist_vs_1985",
"rtta_wd_vs_nd",
"rtta_wd_vs_nd_powered"
),
diff_expr_combined_padj_cutoff = .1,
gsea_compare_to = "nonrtta_t21_vs_d21_pow",
gsea_comparisons = c(
"neural_t21_d21",
"rtta_d_vs_nd",
"dox_t21xist_vs_1985",
"rtta_wd_vs_nd",
"rtta_wd_vs_nd_powered"
),
gsea_combined_padj_cutoff = .2,
lol_cutoff_primary = .1,
lol_cutoff_overlap = .1,
generate_individual_figures = F,
generate_summary_report = T) {
outdir <- sprintf(
"outputs/%s %s",
str_replace(Sys.time(), " [A-Z]+", "") |> str_replace_all(":", "-"),
deseq2_design |> toString() |> str_replace_all("~,| ", "") |> str_replace_all(":", "-") |> str_replace_all("\\*", "_x_")
)
dir.create(sprintf("%s/%s", outdir, "figures"), recursive = T)
dir.create(sprintf("%s/%s", outdir, "tables"), recursive = T)
## Import the counts using tximeta
write_csv(sample_sheet_df,
paste0(outdir, "/tables/sample_sheet_df.csv.gz"))
sum_exp <- tximeta(sample_sheet_df)
gene_sum_exp <- summarizeToGene(sum_exp)
dds <- DESeqDataSet(gene_sum_exp,
design = deseq2_design)
dds <- estimateSizeFactors(dds)
nc <- counts(dds, normalized = TRUE)
filter <- rowSums(nc >= 20) >= ceiling(ncol(nc) / 10)
dds <- dds[filter,]
dds <- estimateSizeFactors(dds)
dds <- DESeq(dds, modelMatrixType = "standard")
dds <- dds[which(mcols(dds)$betaConv),]
saveRDS(dds, paste0(outdir, "/tables/dds.rds"))
vsd <- varianceStabilizingTransformation(dds, blind = F)
vsd_salmon_df <- assay(vsd) |>
as.data.frame() |>
rownames_to_column(var = "ensembl_id")
pca_expr_data <- vsd_salmon_df |>
column_to_rownames(var = "ensembl_id")
batch_corrected_vsd <- limma::removeBatchEffect(
pca_expr_data,
covariates = model.matrix(limma_covariates, data = colData(dds)),
design = model.matrix(limma_design, data = colData(dds))
)
# results
resultsNames(dds)
write_csv(vsd_salmon_df,
sprintf("%s/tables/vsd_salmon_df.csv.gz", outdir))
write_csv(
batch_corrected_vsd |> as.data.frame() |> rownames_to_column(var = "ensembl_id"),
sprintf("%s/tables/batch_corrected_vsd_salmon_df.csv.gz", outdir)
)
gene_info_df <- read_csv("input_data/gene_info_df.csv.gz")
sprintf("Outputting DESeq2 Results...\n")
for (de_name in names(diff_exprs)) {
write_results(de_name,
diff_exprs[[de_name]][1],
diff_exprs[[de_name]][2],
dds,
outdir,
diff_exprs[[de_name]][3])
}
results_files <- fs::dir_ls(
path = sprintf("%s/tables", outdir),
glob = sprintf(
"%s/tables/results_*csv.gz",
outdir |>
str_replace_all("\\+", "\\\\+") |>
str_replace_all("\\,", "\\\\,")
)
)
combined_results_df <- read_csv(results_files,
id = "origin_file") |>
mutate(comparison = str_replace_all(
origin_file,
sprintf(
"(%s/tables/results_)|(.csv.gz)",
outdir |>
str_replace_all("\\+", "\\\\+") |>
str_replace_all("\\(", "\\\\(") |>
str_replace_all("\\)", "\\\\)") |>
str_replace_all("\\,", "\\\\,")
),
""
)) |>
pivot_wider(
id_cols = ensembl_id,
names_from = comparison,
values_from = c(baseMean, log2FoldChange, pvalue, padj, stat)
)
write_csv(combined_results_df,
sprintf("%s/tables/combined_results_df.csv.gz", outdir))
## Gene Overlap Figures ---------------
sprintf("Reading in neural enrichments...%s\n", neural_output_dir)
if (is.null(neural_output_dir)) {
neural_results_nonrtta_T21_vs_D21 <-
read_csv("input_data/neural_results_nonrtta_T21_vs_D21.csv.gz")
} else {
neural_results_nonrtta_T21_vs_D21 <-
read_csv(paste0(
neural_output_dir,
"/tables/results_neural_t21_d21.csv.gz"
))
}
combined_results_df <-
read_csv(paste0(outdir, "/tables/combined_results_df.csv.gz")) |>
inner_join(
neural_results_nonrtta_T21_vs_D21 |>
dplyr::select(
ensembl_id,
log2FoldChange_neural_t21_d21 = log2FoldChange,
pvalue_neural_t21_d21 = pvalue,
padj_neural_t21_d21 = padj,
stat_neural_t21_d21 = stat
)
) |>
mutate_at(vars(starts_with("padj")), list( ~ if_else(is.na(.), 1, .))) |>
mutate_at(vars(starts_with("pvalue")), list( ~ if_else(is.na(.), 1, .))) |>
mutate_at(vars(starts_with("log2FoldChange")), list( ~ if_else(is.na(.), 0, .))) |>
mutate_at(vars(starts_with("stat")), list( ~ if_else(is.na(.), 0, .))) |>
left_join(gene_info_df, by = c("ensembl_id" = "ensembl_gene_id_version"))
vectorized_sumlog <- function(x, y) {
if (length(x) != length(y)) {
stop("lengths of lists must be the same")
}
combined_pvals <- c()
for (i in 1:length(x)) {
combined_pvals <-
c(combined_pvals, (metap::sumlog(c(x[i], y[i])))$p)
}
return(combined_pvals)
}
# GSEA Enrichments ----------------------------
sprintf("Running GSEA Enrichments...\n")
# run gsea for all comparisons
all_comparisons <- combined_results_df |>
filter(hgnc_symbol != "XIST") |>
pivot_longer(
cols = c(starts_with("log2FoldChange_")),
names_pattern = "([A-Za-z0-9\\.]*)_(.*)",
names_to = c("value_type", "comparison")
) |>
pull(comparison) |>
unique()
for (comparison in all_comparisons) {
ordered_genes <- combined_results_df |>
filter(!is.na(entrezgene_id)) |>
arrange(desc(!!as.name(sprintf(
"stat_%s", comparison
)))) |>
dplyr::select(entrezgene_id,!!sprintf("stat_%s", comparison)) |>
deframe()
gsea_result <-
GSEA(ordered_genes,
TERM2GENE = m_t2g,
pvalueCutoff = 1)
write_csv(
gsea_result |> as.data.frame(),
sprintf("%s/tables/gsea_%s.csv.gz", outdir, comparison)
)
}
gsea_files <- fs::dir_ls(
path = sprintf("%s/tables", outdir),
glob = sprintf(
"%s/tables/gsea_*csv.gz",
outdir |>
str_replace_all("\\+", "\\\\+") |>
str_replace_all("\\,", "\\\\,")
)
)
print(gsea_files)
combined_enrichment_df <- read_csv(gsea_files,
id = "origin_file") |>
mutate(comparison = str_replace_all(
origin_file,
sprintf(
"(%s/tables/gsea_)|(.csv.gz)",
outdir |>
str_replace_all("\\+", "\\\\+") |>
str_replace_all("\\(", "\\\\(") |>
str_replace_all("\\)", "\\\\)") |>
str_replace_all("\\,", "\\\\,")
),
""
)) |>
mutate(
leadingEdgeTags = as.numeric(str_replace_all(
str_split(leading_edge, ", ", simplify = T)[, 1], "tags=|%", ""
)) / 100,
leadingEdgeList = as.numeric(str_replace_all(
str_split(leading_edge, ", ", simplify = T)[, 2], "list=|%", ""
)) / 100,
leadingEdgeSignal = as.numeric(str_replace_all(
str_split(leading_edge, ", ", simplify = T)[, 3], "signal=|%", ""
)) / 100,
numCoreEnrichment = str_count(str_split(core_enrichment, "/")) + 1
) |>
pivot_wider(
id_cols = ID,
names_from = comparison,
values_from = c(
NES,
p.adjust,
qvalue,
enrichmentScore,
setSize,
leadingEdgeTags,
leadingEdgeList,
leadingEdgeSignal,
numCoreEnrichment
)
) |>
mutate_at(vars(starts_with("p.adjust")), list( ~ if_else(is.na(.), 1, .))) |>
mutate_at(vars(starts_with("qvalue")), list( ~ if_else(is.na(.), 1, .))) |>
mutate_at(vars(starts_with("NES")), list( ~ if_else(is.na(.), 0, .))) |>
mutate_at(vars(starts_with("enrichmentScore")), list( ~ if_else(is.na(.), 0, .))) |>
dplyr::mutate(
category = dplyr::case_when(
str_detect(ID, "STRESS") ~ "stress",
str_detect(ID, "ENDOPLASMIC_RETICULUM") ~ "ER",
str_detect(ID, "chr21") ~ "chr21",
str_detect(ID, "TRANSLATION") ~ "translation",
str_detect(ID, "RIBOSOM") ~ "ribosome",
str_detect(ID, "SERINE") &
(!str_detect(ID, "SERINE_THREONINE")) ~ "serine",
str_detect(ID, "MITOCH") ~ "mitochondria",
str_detect(ID, "NRF2") ~ "stress",
# str_detect(ID, "FOLATE") ~ "Folate",
# str_detect(ID, "MTORC1") ~ "MTORC1",
str_detect(ID, "APOPTO") |
str_detect(ID, "PROGRAMMED_CELL_DEATH") ~ "apoptosis",
# str_detect(top_words, "EXTRACELLULAR|MATRIX") ~ "ECM",
T ~ "other"
)
)
print("saving combined enrichment df")
write_csv(combined_enrichment_df,
paste0(outdir, "/tables/combined_enrichment_df.csv.gz"))
tibble(
parameter = c(
"deseq2_design",
"limma_covariates",
"limma_design",
"sva_mod",
"sva_mod0",
"neural_output_dir"
),
value = c(
deseq2_design |> toString(),
limma_covariates |> toString(),
limma_design |> toString(),
sva_mod |> toString(),
sva_mod0 |> toString(),
neural_output_dir
)
) |>
write_csv(paste0(outdir, "/params.csv.gz"))
diff_expr_df <- tibble(name = character(),
numerator = character(),
denomenator = character())
for (de_name in names(diff_exprs)) {
diff_expr_df <- add_row(
diff_expr_df,
name = de_name,
numerator = diff_exprs[[de_name]][1],
denomenator = diff_exprs[[de_name]][2]
)
}
write_csv(diff_expr_df, paste0(outdir, "/diff_exprs.csv.gz"))
generate_report(
paste0(getwd(), "/", outdir),
chr21_changes_comparisons = chr21_changes_comparisons,
diff_expr_compare_to = diff_expr_compare_to,
diff_expr_comparisons = diff_expr_comparisons,
diff_expr_combined_padj_cutoff = diff_expr_combined_padj_cutoff,
gsea_compare_to = gsea_compare_to,
gsea_comparisons = gsea_comparisons,
gsea_combined_padj_cutoff = gsea_combined_padj_cutoff,
lol_cutoff_primary = lol_cutoff_primary,
lol_cutoff_overlap = lol_cutoff_overlap
)
}
generate_report <- function(outdir,
chr21_changes_comparisons = c("rtta_wd_vs_nd_powered", "dox_t21xist_vs_1985"),
diff_expr_compare_to = "nonrtta_t21_vs_d21_pow",
diff_expr_comparisons = c(
"neural_t21_d21",
"rtta_d_vs_nd",
"dox_t21xist_vs_1985",
"rtta_wd_vs_nd",
"rtta_wd_vs_nd_powered"
),
diff_expr_combined_padj_cutoff = .1,
gsea_compare_to = "nonrtta_t21_vs_d21_pow",
gsea_comparisons = c(
"neural_t21_d21",
"rtta_d_vs_nd",
"dox_t21xist_vs_1985",
"rtta_wd_vs_nd",
"rtta_wd_vs_nd_powered"
),
gsea_combined_padj_cutoff = .2,
lol_cutoff_primary = .1,
lol_cutoff_overlap = .1) {
output_filename <- "/summary.html"
rmarkdown::render("summarize_output2.Rmd", output_file = paste0(outdir, output_filename), params = list(
output_dir = outdir,
chr21_changes_comparisons = chr21_changes_comparisons,
diff_expr_compare_to = diff_expr_compare_to,
diff_expr_comparisons = diff_expr_comparisons,
diff_expr_combined_padj_cutoff = diff_expr_combined_padj_cutoff,
gsea_compare_to = gsea_compare_to,
gsea_comparisons = gsea_comparisons,
gsea_combined_padj_cutoff = gsea_combined_padj_cutoff,
lol_cutoff_primary = lol_cutoff_primary,
lol_cutoff_overlap = lol_cutoff_overlap
))
}
neural_sample_sheet_df <- read_csv("input_data/bulkrna_neural_sample_sheet.csv") |>
mutate(
names=sample_id,
files=sprintf("input_data/salmon_quantification_v38_gibbs/%s_quant/quant.sf", sample_id)
)
run_neural_pipeline(
neural_sample_sheet_df,
deseq2_design = ~ geno_dox,
limma_covariates = ~ neural_diff,
limma_design = ~ genotype,
sva_mod = ~ neural_diff + genotype,
sva_mod0 = ~ neural_diff,
diff_exprs = list(
"neural_t21_d21" = c(
"(genotype == 'T21XIST' & dox_status == 'no') | genotype == 'T21'",
"genotype == 'D21'",
"1"),
"neural_t21xistdoxcombined_t21" = c(
"genotype == 'T21XIST' & dox_status != 'no'",
"(genotype == 'T21XIST' & dox_status == 'no') | genotype == 'T21'",
"1"),
"neural_t21xistdox_t21" = c(
"genotype == 'T21XIST' & dox_status != 'no' & dox_status != 'wd'",
"(genotype == 'T21XIST' & dox_status == 'no') | genotype == 'T21'",
"1"),
"neural_t21xistwd_t21" = c(
"genotype == 'T21XIST' & dox_status != 'no' & dox_status == 'wd'",
"(genotype == 'T21XIST' & dox_status == 'no') | genotype == 'T21'",
"1"),
"neural_d21_t21xistdoxcombined" = c(
"genotype == 'D21'",
"genotype == 'T21XIST' & dox_status != 'no'",
"1"),
"neural_d21_t21xistdox" = c(
"genotype == 'D21'",
"genotype == 'T21XIST' & dox_status != 'no' & dox_status != 'wd'",
"1"),
"neural_d21_t21xistwd" = c(
"genotype == 'D21'",
"genotype == 'T21XIST' & dox_status != 'no' & dox_status == 'wd'",
"1"))
)
ipsc_sample_sheet_df <- read_csv("input_data/bulkrna_neural_sample_sheet.csv") |>
mutate(
names = sample_id,
files = sprintf("input_data/salmon_quantification_v38_gibbs/%s_quant/quant.sf", sample_id)
)
# iPSC differential expression
run_pipeline(
sample_sheet_df = ipsc_sample_sheet_df,
deseq2_design = ~ timepoint + dox_status + effective_genotype,
limma_covariates = ~ timepoint + dox_status,
limma_design = ~ effective_genotype,
sva_mod = ~ timepoint + dox_status + effective_genotype,
sva_mod0 = ~ timepoint + dox_status,
neural_output_dir = "outputs/neural_2024-02-17 20-39-53.641348 geno_dox", # from the output of run_neural_pipeline above
diff_exprs = list(
"wdT21XIST_vs_T21" = c(
'genotype == "T21XIST" & dox_status == "withdrawal"',
'(genotype == "T21XIST" & dox_status == "no") | (genotype == "T21")',
"timepoint|dox_status"),
"dT21XIST_vs_T21" = c(
'genotype == "T21XIST" & dox_status == "yes"',
'(genotype == "T21XIST" & dox_status == "no") | (genotype == "T21")',
"timepoint|dox_status"),
"dwdT21SIT_vs_T21" = c(
'genotype == "T21XIST" & dox_status != "no"',
'(genotype == "T21XIST" & dox_status == "no") | (genotype == "T21")',
"timepoint|dox_status"
),
"wdT21XIST_vs_D21" = c(
'genotype == "T21XIST" & dox_status == "withdrawal"',
'genotype == "D21"',
"timepoint|dox_status"),
"dT21XIST_vs_D21" = c(
'genotype == "T21XIST" & dox_status == "yes"',
'genotype == "D21"',
"timepoint|dox_status"),
"dwdT21SIT_vs_D21" = c(
'genotype == "T21XIST" & dox_status != "no"',
'genotype == "D21"',
"timepoint|dox_status"
),
"T21_vs_D21" = c(
'(genotype == "T21XIST" & dox_status == "no") | (genotype == "T21")',
'genotype == "D21"',
"timepoint|dox_status"),
"D21_vs_T21" = c(
'genotype == "D21"',
'(genotype == "T21XIST" & dox_status == "no") | (genotype == "T21")',
"timepoint|dox_status"),
"dox_effect" = c(
'genotype %in% c("T21", "D21") & dox_status == "yes"',
'genotype %in% c("T21", "D21") & dox_status != "yes"',
"timepoint|effective_genotype")
),
chr21_changes_comparisons = c("wdT21XIST_vs_T21", "dT21XIST_vs_T21"),
diff_expr_compare_to="T21_vs_D21",
diff_expr_comparisons = c(
"neural_t21_d21",
"neural_t21xistdoxcombined_t21",
"wdT21XIST_vs_T21",
"dT21XIST_vs_T21",
"dox_effect"
),
gsea_compare_to = "T21_vs_D21",
gsea_comparisons = c(
"neural_t21_d21",
"neural_t21xistdoxcombined_t21",
"wdT21XIST_vs_T21",
"dT21XIST_vs_T21",
"dox_effect"
)
)