Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
library("DESeq2")
library('biomaRt')
library("TxDb.Hsapiens.UCSC.hg38.knownGene")
library("tidyverse")
library("org.Hs.eg.db")
diff_expr_all_df <- NA
directory <- "htseq_counts"
count_file_suffix <- "Aligned.out.sorted.bam.htseq_counts"
sampleFiles <- grep(count_file_suffix,list.files(directory),value=TRUE)
rna_seq_accessions_tib <- read_csv("rna_seq_accessions_tib.csv.gz")
cluster_tib <- read_csv("saved_sample_clusters.csv", col_names = c("sample_name", "cluster"))
cluster_tib <- column_to_rownames(cluster_tib, "sample_name")
seq_type_df <- read_csv("sample_seq_type.csv.gz")
sample_info_df <- read_csv("sample_data_combined_all_info.csv")
sample_file_to_keep <- c()
sample_names <- c()
sampleCondition <- c()
sample_seq_type <- c()
sample_group <- c()
xist_expression <- c()
all_htseq_counts_df <- tibble(gene_id=character(),gene_name=character())
for(filename in sampleFiles) {
sra_acc <-sub(count_file_suffix, "", filename)
if(sra_acc == "SRR4027227") {
print("I'm getting the import file")
}
if (sra_acc %in% rna_seq_accessions_tib$sra_accession) {
sample_name <- (filter(rna_seq_accessions_tib, sra_accession == sra_acc))$SampleName
if(sample_name == "Daily_GSM2285217_iPSC_SC14-071_female_p15") {
print(sample_name)
}
cluster <- cluster_tib[sample_name, 1]
# print(cluster)
if(is.na(cluster) == F) {
if(sample_name == "Daily_GSM2285217_iPSC_SC14-071_female_p15") {
print(sample_name)
print(cluster)
}
sample_file_to_keep <- c(sample_file_to_keep, filename)
sample_names <- c(sample_names, sample_name)
sampleCondition <- c(sampleCondition, cluster)
sample_seq_type <- c(sample_seq_type,
as.character(seq_type_df[seq_type_df$`Sample Name` == sample_name, "end_type"][1]))
sample_group <- c(sample_group,
as.character(sample_info_df[sample_info_df$sample_name == sample_name, "group_accession"][1]))
counts_tib <- read_tsv(paste(directory, "/", filename, sep=""), col_names = c('id', 'name', 'level'))
all_htseq_counts_df <- full_join(all_htseq_counts_df, dplyr::select(counts_tib, id,name, !! sample_name := level), by=c("gene_id"="id", "gene_name"="name"))
}
}
}
sampleTable <- data.frame(sampleName = sample_names,
fileName = sample_file_to_keep,
cluster = factor(sampleCondition),
seq_type = sample_seq_type,
group = sample_group)
ddsHTSeq_all <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
directory = directory,
design= ~ seq_type + group + cluster)
# keep <- rowSums(counts(ddsHTSeq_all)) >= 10
# ddsHTSeq_all <- ddsHTSeq_all[keep,]
dds_all <- DESeq(ddsHTSeq_all)
# library("tximeta")
# se <- tximeta(sampleTable)
# library(AnnotationFilter)
gtf <- "gencode.v30.annotation.gtf"
txdb <- makeTxDbFromGFF(gtf, format = "gtf", circ_seqs = character())
ebg <- ensembldb::exonsBy(txdb, by = "gene")
filtered_ebg <- ebg[rownames(dds_all)]
rowRanges(dds_all) <- GRangesList(filtered_ebg)
fpkm_counts <- fpkm(dds_all)
#need to use archive because gtf is from june 2019
hg38_mart <- useMart('ensembl',host="apr2019.archive.ensembl.org", dataset='hsapiens_gene_ensembl')
hg38_biomart_gene_df <- getBM(mart=hg38_mart,
attributes=c( 'ensembl_gene_id', 'ensembl_gene_id_version', 'hgnc_symbol', "chromosome_name", "start_position", "end_position", "strand", "description"))
fpkm_counts_df <- left_join(as_tibble(fpkm_counts, rownames=".rownames"), hg38_biomart_gene_df, by=c(".rownames"="ensembl_gene_id_version"))
fpkm_counts_df <- fpkm_counts_df %>%
filter(chromosome_name != "Y") %>%
mutate(autosomal = (chromosome_name != "X"))
mean_x_a_ratio_df <- fpkm_counts_df %>%
reshape2::melt(id.vars=c("autosomal"), measure.vars=colnames(as_tibble(fpkm_counts)), variable.name="sample_name", value.name="fpkm_val") %>%
group_by(autosomal, sample_name) %>%
summarise(mean_fpkm=mean(fpkm_val)) %>%
ungroup() %>%
mutate(is_autosomal = case_when(autosomal==FALSE ~ "mean_X_fpkm",
autosomal==TRUE ~ "mean_A_fpkm")) %>%
reshape2::dcast(sample_name ~ is_autosomal, value.var="mean_fpkm") %>%
mutate(x_a_ratio=mean_X_fpkm/mean_A_fpkm)
median_x_a_ratio_df <- fpkm_counts_df %>%
reshape2::melt(id.vars=c("autosomal"), measure.vars=colnames(as_tibble(fpkm_counts)), variable.name="sample_name", value.name="fpkm_val") %>%
group_by(autosomal, sample_name) %>%
summarise(median_fpkm=median(fpkm_val)) %>%
ungroup() %>%
mutate(is_autosomal = case_when(autosomal==FALSE ~ "median_X_fpkm",
autosomal==TRUE ~ "median_A_fpkm")) %>%
reshape2::dcast(sample_name ~ is_autosomal, value.var="median_fpkm") %>%
mutate(x_a_ratio=median_X_fpkm/median_A_fpkm)
library(pairwiseCI)
pairwise_ci_df <- fpkm_counts_df %>%
reshape2::melt(id.vars=c("autosomal"), measure.vars=colnames(as_tibble(fpkm_counts)), variable.name="sample_name", value.name="fpkm_val") %>%
group_by(sample_name) %>%
summarise(median_ratio=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$estimate,
median_ratio_ci_low=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$conf.int[1],
median_ratio_ci_high=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$conf.int[2])
write_csv(pairwise_ci_df, "pairwise_ci_df.csv")
# autosome_fpkms <- pairwise_ci_df %>% filter(autosomal == T) %>% dplyr::select(fpkm_val) %>% unlist %>% as.numeric
# x_fpkms <- pairwise_ci_df %>% filter(autosomal == F) %>% dplyr::select(fpkm_val) %>% unlist %>% as.numeric
# Median.ratio(x_fpkms, autosome_fpkms)
pairwise_ci_df <- read_csv("pairwise_ci_df.csv")
cluster_tib <- read_csv("saved_sample_clusters.csv", col_names = c("sample_name", "cluster"))
pairwise_ci_df <- left_join(pairwise_ci_df, cluster_tib, by="sample_name")
library(ggplot2)
x_a_ratio_plot_per_sample <- ggplot(pairwise_ci_df %>% filter(!is.na(cluster))) +
geom_errorbar(aes(x=sample_name, ymin=median_ratio_ci_low, ymax=median_ratio_ci_high)) +
geom_point(aes(x=sample_name, y=median_ratio)) +
facet_grid(cols=vars(cluster), scales="free_x", labeller = labeller(.cols=function(n){
return(list("0"="A",
"1"="B",
"2"="C",
"3"="D",
"4"="E",
"5"="F")[n])
})) +
labs(y="Median X:A", title="Median X:Autosome Ratios", x="Female Samples") +
theme_bw() +
base_plot_theme +
theme(axis.ticks.x=element_blank(),
axis.text.x = element_blank())
ggsave("figs/review images/x_a_ratio_per_sample.png", x_a_ratio_plot_per_sample)
library(ggsignif)
x_a_ratio_plot_per_cluster <- ggplot(pairwise_ci_df %>% filter(!is.na(cluster)) %>% mutate(cluster=case_when(cluster==0~"A", cluster==1~"B", cluster==2~"C", cluster==3~"D", cluster==5~"F")),
aes(x=cluster, y=median_ratio, group=cluster)) +
geom_boxplot(aes(fill="female", alpha=cluster), outlier.shape = NA, size=.3) +
geom_point(position=position_jitter(w=0.1,h=0), size=.5, shape=20) +
labs(y="Median X:A", title="Median X:Autosome Ratios", x="Cluster") +
geom_signif(comparisons = list(c(1,2), c(1,3), c(1,4), c(1,5)), y_position=c(1.1, 1.15, 1.2, 1.25), tip_length = 0, size=.3, textsize = 2, test="wilcox.test") +
theme_bw() +
scale_fill_npg(guide=F) +
scale_alpha_discrete(guide=F) +
base_plot_theme +
scale_x_discrete(limits=c("A", "B", "C", "D", "F")) +
scale_y_continuous(limits = c(0.54,1.28))
ggsave("figs/main/fig_4_x_a_ratio_plot_per_cluster_wilcox.pdf", x_a_ratio_plot_per_cluster, width=57.855, height=58, units = "mm")
fpkm_counts_df <- fpkm_counts_df %>%
mutate(category=case_when((hgnc_symbol %in% c("PRKX", "NLGN4X", "TBL1X",
"TMSB4X", "CXorf15X", "EIFA1X",
"ZFX", "USP9X", "DDX3X",
"KDM6A", "KDM5C", "RPS4X")) ~ "x_y_homologs",
chromosome_name == "X" & (start_position <= 2781479) | (start_position >= 155701383) ~ "PAR",
chromosome_name == "X" & start_position > 58100000 ~ "S1",
chromosome_name == "X" & start_position > 46000000 ~ "S2",
chromosome_name == "X" & start_position > 11.5e6 ~ "S3",
chromosome_name == "X" ~ "S4"))
pairwise_ci_df_split <- fpkm_counts_df[rowSums((fpkm_counts_df %>% dplyr::select(!!colnames(fpkm_counts)))>=1)==length(colnames(fpkm_counts)) ,] %>%
reshape2::melt(id.vars=c("autosomal", "category"), measure.vars=colnames(as_tibble(fpkm_counts)), variable.name="sample_name", value.name="fpkm_val") %>%
group_by(sample_name) %>%
summarise(median_ratio=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$estimate,
median_ratio_ci_low=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$conf.int[1],
median_ratio_ci_high=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$conf.int[2],
xy_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "x_y_homologs")], fpkm_val[autosomal == T])$estimate,
xy_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "x_y_homologs")], fpkm_val[autosomal == T])$conf.int[1],
xy_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "x_y_homologs")], fpkm_val[autosomal == T])$conf.int[2],
par_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$estimate,
par_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$conf.int[1],
par_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$conf.int[2],
s1_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "S1")], fpkm_val[autosomal == T])$estimate,
s1_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "S1")], fpkm_val[autosomal == T])$conf.int[1],
s1_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "S1")], fpkm_val[autosomal == T])$conf.int[2],
s2_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "S2")], fpkm_val[autosomal == T])$estimate,
s2_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "S2")], fpkm_val[autosomal == T])$conf.int[1],
s2_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "S2")], fpkm_val[autosomal == T])$conf.int[2],
s3_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "S3")], fpkm_val[autosomal == T])$estimate,
s3_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "S3")], fpkm_val[autosomal == T])$conf.int[1],
s3_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "S3")], fpkm_val[autosomal == T])$conf.int[2],
s4_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "S4")], fpkm_val[autosomal == T])$estimate,
s4_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "S4")], fpkm_val[autosomal == T])$conf.int[1],
s4_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "S4")], fpkm_val[autosomal == T])$conf.int[2])
cluster_tib <- read_csv("saved_sample_clusters.csv", col_names = c("sample_name", "cluster"))
pairwise_ci_df_split <- left_join(pairwise_ci_df_split, cluster_tib, by="sample_name")
library(facetscales)
scales_y <- list("all" = scale_y_continuous(limits= c(0, 5)),
"PAR" = scale_y_continuous(limits=c(0, 5)),
"XY_homolog" = scale_y_continuous(limits=c(0, 5)),
"S1" = scale_y_continuous(limits=c(0, 5)),
"S2" = scale_y_continuous(limits=c(0, 5)),
"S3" = scale_y_continuous(limits=c(0, 5)),
"S4" = scale_y_continuous(limits=c(0, 5)))
pairwise_ci_plot_df <- pairwise_ci_df_split %>%
reshape2::melt(id.vars=c("sample_name", "cluster"), measure.vars=c("median_ratio", "median_ratio_ci_low", "median_ratio_ci_high",
"par_median_ratio", "par_median_ratio_ci_low", "par_median_ratio_ci_high",
"s1_median_ratio", "s1_median_ratio_ci_low", "s1_median_ratio_ci_high",
"s2_median_ratio", "s2_median_ratio_ci_low", "s2_median_ratio_ci_high",
"s3_median_ratio", "s3_median_ratio_ci_low", "s3_median_ratio_ci_high",
"s4_median_ratio", "s4_median_ratio_ci_low", "s4_median_ratio_ci_high",
"xy_median_ratio", "xy_median_ratio_ci_low", "xy_median_ratio_ci_high")) %>%
mutate(subset_type=case_when(grepl("par", variable) ~ "PAR",
grepl("s1", variable) ~ "S1",
grepl("s2", variable) ~ "S2",
grepl("s3", variable) ~ "S3",
grepl("s4", variable) ~ "S4",
grepl("xy", variable) ~ "XY_homolog",
T ~ "all"),
value_type=case_when(grepl("median_ratio_ci_high", variable) ~ "median_ratio_ci_high",
grepl("median_ratio_ci_low", variable) ~ "median_ratio_ci_low",
grepl("median_ratio", variable) ~ "median_ratio")) %>%
reshape2::dcast(sample_name+cluster+subset_type ~ value_type, value.var = "value")
library(ggplot2)
ggplot(pairwise_ci_plot_df %>% filter(!is.na(cluster))) +
geom_errorbar(aes(x=sample_name, ymin=median_ratio_ci_low, ymax=median_ratio_ci_high)) +
geom_point(aes(x=sample_name, y=median_ratio)) +
facet_grid_sc(cols=vars(cluster), rows=vars(subset_type), scales = list(y=scales_y, x="free"), space="free_x")
test <- fpkm_counts_df %>%
reshape2::melt(id.vars=c("autosomal", "category"), measure.vars=colnames(as_tibble(fpkm_counts)), variable.name="sample_name", value.name="fpkm_val") %>%
filter(sample_name == "Daily_GSM2285217_iPSC_SC14-071_female_p15") %>%
# group_by(sample_name) %>%
summarise(par_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$estimate,
par_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$conf.int[1],
par_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$conf.int[2])
darcy_ipsc_fpkms <- read_csv("C:/Users/Prakhar Bansal/Downloads/darcy_fpkm_counts_iPSCs_avg.csv")
x_y_homologs <- c("PRKX", "NLGN4X", "TBL1X",
"TMSB4X", "CXorf15X", "EIFA1X",
"ZFX", "USP9X", "DDX3X",
"KDM6A", "KDM5C", "RPS4X")
darcy_homologs <- darcy_ipsc_fpkms %>% filter(hgnc_symbol %in% x_y_homologs)
my_homologs <- fpkm_counts_df %>% filter(category == "x_y_homologs")
darcy_homologs %>% arrange(hgnc_symbol) %>% write_csv("darcy_homologs.csv")
my_homologs %>% arrange(hgnc_symbol) %>% write_csv("my_homologs.csv")
darcy_fpkms_df <- read_csv("darcy_data/fpkm_counts_unfilt_iPSCs.csv")
darcy_hg38_mart <- useMart('ensembl',host="mar2016.archive.ensembl.org", dataset='hsapiens_gene_ensembl')
darcy_hg38_biomart_gene_df <- getBM(mart=darcy_hg38_mart,
attributes=c( 'ensembl_gene_id', 'version', 'hgnc_symbol', "chromosome_name", "start_position", "end_position", "strand", "description"))
darcy_hg38_biomart_gene_df <- darcy_hg38_biomart_gene_df %>% mutate(ensembl_gene_id_version=paste(ensembl_gene_id, version, sep="."))
darcy_fpkms_df <- left_join(darcy_fpkms_df, darcy_hg38_biomart_gene_df, by=c("X1"="ensembl_gene_id_version"))
darcy_fpkms_df <- darcy_fpkms_df %>%
filter(chromosome_name != "Y") %>%
mutate(autosomal = chromosome_name != "X")
darcy_fpkms_df <- darcy_fpkms_df %>%
mutate(category=case_when((hgnc_symbol %in% c("PRKX", "NLGN4X", "TBL1X",
"TMSB4X", "CXorf15X", "EIFA1X",
"ZFX", "USP9X", "DDX3X",
"KDM6A", "KDM5C", "RPS4X")) ~ "x_y_homologs",
chromosome_name == "X" & (start_position <= 2781479) | (start_position >= 155701383) ~ "PAR",
chromosome_name == "X" & start_position > 58100000 ~ "S1",
chromosome_name == "X" & start_position > 46000000 ~ "S2",
chromosome_name == "X" & start_position > 11.5e6 ~ "S3",
chromosome_name == "X" ~ "S4"),
)
darcy_samples <- c( "iPSC_XY_1", "iPSC_XY_2", "iPSC_XY_3",
"iPSC_XY_4", "iPSC_XY_5", "iPSC_XO_1",
"iPSC_XO_2", "iPSC_XO_3", "iPSC_XO_4",
"iPSC_XO_5", "iPSC_SRYDelC2_1", "iPSC_SRYDelC2_2",
"iPSC_SRYDelC2_3", "iPSC_SRYDelC2_4", "iPSC_SRYDelC2_5",
"iPSC_SRYDelC30_1", "iPSC_SRYDelC30_2", "iPSC_SRYDelC30_3",
"iPSC_SRYDelC30_4", "iPSC_SRYDelC30_5", "iPSC_C11XX_1",
"iPSC_C11XX_2", "iPSC_C11XX_3", "iPSC_C11XX_4", "iPSC_C6XX_1",
"iPSC_C6XX_2", "iPSC_C6XX_3", "iPSC_C6XX_4", "iPSC_C2XO_1", "iPSC_C2XO_2",
"iPSC_C2XO_3", "iPSC_C2XO_4", "iPSC_C8XO_1", "iPSC_C8XO_2", "iPSC_C8XO_3",
"iPSC_C9XO_1", "iPSC_C9XO_2", "iPSC_C9XO_3", "iPSC_C9XO_4")
darcy_fraction_autosomes_less_1 <- nrow((filter(darcy_fpkms_df, autosomal == T) %>%
dplyr::select(!!darcy_samples))[rowSums((filter(darcy_fpkms_df, autosomal == T) %>% dplyr::select(!!darcy_samples))>=1)==39 ,]) / nrow(filter(darcy_fpkms_df, autosomal == T))
darcy_fraction_xchr_less_1 <- nrow((filter(darcy_fpkms_df, autosomal == F) %>%
dplyr::select(!!darcy_samples))[rowSums((filter(darcy_fpkms_df, autosomal == F) %>% dplyr::select(!!darcy_samples))>=1)==39 ,]) / nrow(filter(darcy_fpkms_df, autosomal == F))
darcy_pairwise_ci_df_split <- darcy_fpkms_df %>%
reshape2::melt(id.vars=c("autosomal", "category"), measure.vars=darcy_samples, variable.name="sample_name", value.name="fpkm_val") %>%
group_by(sample_name) %>%
summarise(median_ratio=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$estimate,
median_ratio_ci_low=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$conf.int[1],
median_ratio_ci_high=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$conf.int[2],
xy_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "x_y_homologs")], fpkm_val[autosomal == T])$estimate,
xy_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "x_y_homologs")], fpkm_val[autosomal == T])$conf.int[1],
xy_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "x_y_homologs")], fpkm_val[autosomal == T])$conf.int[2],
par_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$estimate,
par_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$conf.int[1],
par_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$conf.int[2],
s1_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "S1")], fpkm_val[autosomal == T])$estimate,
s1_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "S1")], fpkm_val[autosomal == T])$conf.int[1],
s1_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "S1")], fpkm_val[autosomal == T])$conf.int[2],
s2_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "S2")], fpkm_val[autosomal == T])$estimate,
s2_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "S2")], fpkm_val[autosomal == T])$conf.int[1],
s2_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "S2")], fpkm_val[autosomal == T])$conf.int[2],
s3_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "S3")], fpkm_val[autosomal == T])$estimate,
s3_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "S3")], fpkm_val[autosomal == T])$conf.int[1],
s3_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "S3")], fpkm_val[autosomal == T])$conf.int[2],
s4_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "S4")], fpkm_val[autosomal == T])$estimate,
s4_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "S4")], fpkm_val[autosomal == T])$conf.int[1],
s4_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "S4")], fpkm_val[autosomal == T])$conf.int[2])
darcy_sample_groups <- tibble(sample_name=c("iPSC_XY_1", "iPSC_XY_2", "iPSC_XY_3",
"iPSC_XY_4", "iPSC_XY_5", "iPSC_XO_1",
"iPSC_XO_2", "iPSC_XO_3", "iPSC_XO_4",
"iPSC_XO_5", "iPSC_SRYDelC2_1", "iPSC_SRYDelC2_2",
"iPSC_SRYDelC2_3", "iPSC_SRYDelC2_4", "iPSC_SRYDelC2_5",
"iPSC_SRYDelC30_1", "iPSC_SRYDelC30_2", "iPSC_SRYDelC30_3",
"iPSC_SRYDelC30_4", "iPSC_SRYDelC30_5", "iPSC_C11XX_1",
"iPSC_C11XX_2", "iPSC_C11XX_3", "iPSC_C11XX_4", "iPSC_C6XX_1",
"iPSC_C6XX_2", "iPSC_C6XX_3", "iPSC_C6XX_4", "iPSC_C2XO_1", "iPSC_C2XO_2",
"iPSC_C2XO_3", "iPSC_C2XO_4", "iPSC_C8XO_1", "iPSC_C8XO_2", "iPSC_C8XO_3",
"iPSC_C9XO_1", "iPSC_C9XO_2", "iPSC_C9XO_3", "iPSC_C9XO_4")) %>%
mutate(cluster=case_when(grepl("iPSC_XY", sample_name) ~ "XY",
grepl("iPSC_XO", sample_name) ~ "male-XO",
grepl("iPSC_SRYDel", sample_name) ~ "SRYDel",
grepl("iPSC_C11XX|iPSC_C6XX", sample_name) ~ "XX",
grepl("iPSC_C2XO|iPSC_C8XO|iPSC_C9XO", sample_name) ~ "female-XO"))
darcy_pairwise_ci_df_split <- left_join(darcy_pairwise_ci_df_split, darcy_sample_groups, by="sample_name")
darcy_scales_y <- list("all" = scale_y_continuous(limits= c(0, 3), breaks= c(0,1, 2, 3)),
"PAR" = scale_y_continuous(limits=c(0, 5)),
"XY_homolog" = scale_y_continuous(limits=c(0, 200)),
"S1" = scale_y_continuous(limits=c(0, 3), breaks=c(0, 1, 2, 3)),
"S2" = scale_y_continuous(limits=c(0, 3), breaks=c(0, 1, 2, 3)),
"S3" = scale_y_continuous(limits=c(0, 3), breaks=c(0, 1, 2, 3)),
"S4" = scale_y_continuous(limits=c(0, 3), breaks=c(0, 1, 2, 3)))
darcy_pairwise_ci_plot_df <- darcy_pairwise_ci_df_split %>%
reshape2::melt(id.vars=c("sample_name", "cluster"), measure.vars=c("median_ratio", "median_ratio_ci_low", "median_ratio_ci_high",
"par_median_ratio", "par_median_ratio_ci_low", "par_median_ratio_ci_high",
"s1_median_ratio", "s1_median_ratio_ci_low", "s1_median_ratio_ci_high",
"s2_median_ratio", "s2_median_ratio_ci_low", "s2_median_ratio_ci_high",
"s3_median_ratio", "s3_median_ratio_ci_low", "s3_median_ratio_ci_high",
"s4_median_ratio", "s4_median_ratio_ci_low", "s4_median_ratio_ci_high",
"xy_median_ratio", "xy_median_ratio_ci_low", "xy_median_ratio_ci_high")) %>%
mutate(subset_type=case_when(grepl("par", variable) ~ "PAR",
grepl("s1", variable) ~ "S1",
grepl("s2", variable) ~ "S2",
grepl("s3", variable) ~ "S3",
grepl("s4", variable) ~ "S4",
grepl("xy", variable) ~ "XY_homolog",
T ~ "all"),
value_type=case_when(grepl("median_ratio_ci_high", variable) ~ "median_ratio_ci_high",
grepl("median_ratio_ci_low", variable) ~ "median_ratio_ci_low",
grepl("median_ratio", variable) ~ "median_ratio")) %>%
reshape2::dcast(sample_name+cluster+subset_type ~ value_type, value.var = "value")
ggplot(darcy_pairwise_ci_plot_df %>% filter(!is.na(cluster))) +
geom_errorbar(aes(x=sample_name, ymin=median_ratio_ci_low, ymax=median_ratio_ci_high)) +
geom_point(aes(x=sample_name, y=median_ratio)) +
facet_grid_sc(cols=vars(cluster), rows=vars(subset_type), scales = list(y=scales_y, x="free"), space="free_x")