Skip to content
Permalink
6b380cfa28
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
194 lines (173 sloc) 9.07 KB
## ---------------------------
##
## Script name:
##
## Purpose of script:
##
## Author: Prakhar Bansal
##
## Date Created: 2021-09-08
##
## Copyright (c) Prakhar Bansal, 2021
## Email: pbansal@uchc.edu
##
## ---------------------------
##
## Notes:
##
##
## ---------------------------
## set working directory
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
## ---------------------------
## load up the packages we will need: (uncomment as required)
require(tidyverse); theme_set(theme_classic() + theme(plot.title=element_text(hjust=0.5)))
require(ggsci)
library(zeallot)
## ---------------------------
output_dir <- "outputs/2024-02-17 21-39-53.641348 timepoint+dox_status+effective_genotype"
sample_sheet_df <- read_csv(paste0(output_dir, "/tables/sample_sheet_df.csv.gz")) |>
mutate(cell_type = "ipscs")
neural_output_dir <- paste0("../salmon_pipeline_with_stefan/",
(read_csv(paste0(output_dir, "/params.csv.gz")) |>
filter(parameter == "neural_output_dir") |>
pull(value))[1])
sample_sheet_df_neural <- read_csv(paste0(neural_output_dir, "/tables/sample_sheet_df.csv.gz")) |>
mutate(
cell_line = str_replace_all(cell_line, "C(?=(1|4|7))", "rtTA_XIST_c"),
`Cell Line` = str_replace_all(cell_line, "_c", " c") |>
str_replace_all("_", "-"),
dox_status = case_when(dox_status == "wd" ~ "withdrawal",
T ~ dox_status),
has_transgene = case_when(cell_line %in% c("198_1", "198_5") ~ F,
T ~ T)
)|>
mutate(cell_type = "neurons")
gene_info_df <- read_csv("tables/gene_info_df.csv.gz")
comb_sample_sheet_df <- bind_rows(sample_sheet_df, sample_sheet_df_neural)
chr21_allelic_counts_df <- read_tsv("phaser_output_all_chr21/combined_bams_gene_ae.txt",
col_types = cols(contig=col_character(),
variants=col_character())) |>
filter(contig == "21" &
totalCount > 0) |>
mutate(sample_id=str_extract(bam, "d[0-9]*")) |>
inner_join(sample_sheet_df)
write_csv(chr21_allelic_counts_df, "tables/chr21_allelic_counts_df.csv.gz")
chr21_allelic_counts_jan2024_df <- read_tsv("phaser_output_Jan2024/combined_bams_gene_ae.txt",
col_types = cols(contig=col_character(),
variants=col_character())) |>
filter(contig == "21" &
totalCount > 0) |>
mutate(sample_id=str_extract(bam, "d[0-9]*")) |>
right_join(comb_sample_sheet_df)
write_csv(chr21_allelic_counts_df, "tables/chr21_allelic_counts_df_jan2024.csv.gz")
make_allelic_plot <- function(chr21_allelic_counts_df, allele_designation_df_byt21=F, keep_cell_type = "ipscs") {
chr21_allelic_counts_df <- chr21_allelic_counts_df |>
filter(cell_type == keep_cell_type)
print(chr21_allelic_counts_df$sample_id |> unique())
if(length(allele_designation_df_byt21) == 1 && allele_designation_df_byt21 == FALSE) {
allele_designation_df_byt21 <- chr21_allelic_counts_df |>
filter(`Cell Line` %in% c("198-1", "198-2", "198-5"),
totalCount > 6) |>
pivot_longer(cols=c(aCount, bCount), names_to = "allele", values_to = "count") |>
group_by(name, allele, genotype) |>
summarise(mean_ratio=mean(count/totalCount)) |>
ungroup() |>
tidyr::pivot_wider(names_from = c("genotype", "allele"), values_from = "mean_ratio") |>
na.omit() |>
mutate(a_allele_diff=T21_aCount-D21_aCount,
b_allele_diff=T21_bCount-D21_bCount,
paternal_allele=case_when(abs(a_allele_diff) < .1 & abs(b_allele_diff) < .1 ~ "nodiff",
a_allele_diff>0 & b_allele_diff < 0 ~ "bCount",
a_allele_diff<0 & b_allele_diff > 0 ~ "aCount",
T ~ "undetermined"),
maternal_allele=case_when(paternal_allele == "aCount" ~ "bCount",
paternal_allele == "bCount" ~ "aCount")) |>
filter(paternal_allele %in% c("aCount", "bCount")) |>
pivot_longer(cols = c(paternal_allele, maternal_allele), names_to = "allele_type", values_to = "allele")
}
print("ENSG00000154645.14" %in% allele_designation_df_byt21$name)
rtta_xist_ae_plot_df <- chr21_allelic_counts_df |>
filter(!is.na(`Cell Line`) & totalCount >= 10) |>
mutate(dox_status=str_replace(dox_status, "withdrawal", "w/d"),
sample_type=case_when(has_transgene == T & dox_status == "no" ~ "T21XIST\n-dox",
has_transgene == T & dox_status == "yes" ~ "T21XIST\n+dox",
has_transgene == T & dox_status == "w/d" & timepoint == "6wk" ~ "T21XIST\n3w/d",
has_transgene == T & dox_status == "w/d" & timepoint == "17wk" ~ "T21XIST\n3w/d",
has_transgene == T & dox_status == "w/d" & timepoint == "9wk" ~ "T21XIST\n6w/d",
has_transgene == T & dox_status == "w/d" & timepoint == "20wk" ~ "T21XIST\n6w/d",
has_transgene == T & dox_status == "w/d" & is.na(timepoint) ~ "T21XIST\nw/d",
cell_line == "198_5" ~ "T21",
cell_line %in% c("198_1", "198_2") ~ "D21",
T ~ cell_line)) |>
pivot_longer(cols=c(aCount, bCount), names_to = "allele", values_to = "count") |>
mutate(ratio=count/totalCount) |>
inner_join(allele_designation_df_byt21, by=c("name", "allele")) |>
group_by(sample_type, name, start, allele_type) |>
summarise(mean_count=mean(count),
mean_ratio=mean(ratio),
totalCount=dplyr::first(totalCount)) |>
ungroup()
ensembl_ids_to_keep <- rtta_xist_ae_plot_df |>
left_join(gene_info_df, by=c("name"="ensembl_gene_id_version")) |>
filter(allele_type=="paternal_allele" &
gene_biotype %in% c("protein_coding")) |>
pivot_wider(id_cols=c(name), names_from=sample_type, values_from=mean_ratio) |>
na.exclude() |>
# dplyr::filter(`pb_no` < .9 & `pb_no` > .1) |>
pull(name)
genes_sorted_by_position <- rtta_xist_ae_plot_df |>
filter(name %in% ensembl_ids_to_keep) |>
arrange(start) |>
pull(name) |>
unique()
#potential escapees
potential_escapees <- rtta_xist_ae_plot_df |>
filter(sample_type == "T21XIST\n+dox" &
name %in% ensembl_ids_to_keep &
allele_type == "paternal_allele" &
mean_ratio >= 1/6) |>
left_join(gene_info_df, by=c("name"="ensembl_gene_id_version")) |>
pull(name)
plt <- ggplot(rtta_xist_ae_plot_df |>
filter(name %in% ensembl_ids_to_keep) |>
mutate(sample_type1 = case_when(str_detect(sample_type, "T21XIST") ~ "T21XIST",
T ~ sample_type),
sample_type2 = case_when(!str_detect(sample_type, "T21XIST") ~ "",
T ~ str_extract(sample_type, "\\-dox|\\+dox|3w\\/d|6w\\/d|w\\/d"))) |>
left_join(gene_info_df, by=c("name"="ensembl_gene_id_version")),
aes(x=name, y=mean_ratio, fill=allele_type, totalCount=totalCount, gene_name=hgnc_symbol)) +
geom_col(position=position_fill(), width=.8) +
scale_x_discrete(limits=genes_sorted_by_position) +
scale_fill_d3(limits=c("maternal_allele", "paternal_allele"), labels=c("M", "P")) +
ggh4x::facet_nested(rows = vars(factor(sample_type1, levels = c("D21", "T21", "T21XIST")),
factor(sample_type2, levels = c("","-dox", "+dox", "3w/d", "6w/d", "w/d"))),
strip = ggh4x::strip_nested(clip = "off", size = "variable"),
nest_line = element_line(linetype = 1)) +
#strip=ggh4x::strip_nested(clip="off")
# facet_grid(rows = ) +
scale_y_continuous(breaks = c(0,.5,1)) +
geom_hline(yintercept=.5, size=.5) +
geom_hline(yintercept=1/3, linetype="dashed", size=.3) +
labs(title="HSA21 Protein Coding Gene Allelic Expression",
x="Gene",
y="Fraction of Expression",
fill="Allele") +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
legend.position = "top",
legend.title = element_text(margin = margin(r=3, unit="mm")),
legend.text = element_text(margin = margin(r=2, unit="mm")),
strip.text.y = element_text(angle=0)
# text = element_text(size=7),
# axis.title = element_text(size=7),
# plot.title = element_text(size=7, face="bold", margin=margin(t=2, 0)),
# axis.text.y = element_text(size=5),
# panel.spacing = unit(.2, "mm"),
# strip.text = element_text(size=12, margin=margin(t=0,r=1,b=0,l=1)),
# strip.background = element_rect(size = .3),
# plot.margin = margin(0.2)
)
return(list(plt, potential_escapees, allele_designation_df_byt21))
}
c(allelic_plt, potential_escapees, allele_designation_df) %<-% make_allelic_plot(chr21_allelic_counts_df)