Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
contiguous_x_erosion/x_a_ratios.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
391 lines (332 sloc)
27.5 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("DESeq2") | |
library('biomaRt') | |
library("TxDb.Hsapiens.UCSC.hg38.knownGene") | |
library("tidyverse") | |
library("org.Hs.eg.db") | |
diff_expr_all_df <- NA | |
directory <- "htseq_counts" | |
count_file_suffix <- "Aligned.out.sorted.bam.htseq_counts" | |
sampleFiles <- grep(count_file_suffix,list.files(directory),value=TRUE) | |
rna_seq_accessions_tib <- read_csv("rna_seq_accessions_tib.csv.gz") | |
cluster_tib <- read_csv("saved_sample_clusters.csv", col_names = c("sample_name", "cluster")) | |
cluster_tib <- column_to_rownames(cluster_tib, "sample_name") | |
seq_type_df <- read_csv("sample_seq_type.csv.gz") | |
sample_info_df <- read_csv("sample_data_combined_all_info.csv") | |
sample_file_to_keep <- c() | |
sample_names <- c() | |
sampleCondition <- c() | |
sample_seq_type <- c() | |
sample_group <- c() | |
xist_expression <- c() | |
all_htseq_counts_df <- tibble(gene_id=character(),gene_name=character()) | |
for(filename in sampleFiles) { | |
sra_acc <-sub(count_file_suffix, "", filename) | |
if(sra_acc == "SRR4027227") { | |
print("I'm getting the import file") | |
} | |
if (sra_acc %in% rna_seq_accessions_tib$sra_accession) { | |
sample_name <- (filter(rna_seq_accessions_tib, sra_accession == sra_acc))$SampleName | |
if(sample_name == "Daily_GSM2285217_iPSC_SC14-071_female_p15") { | |
print(sample_name) | |
} | |
cluster <- cluster_tib[sample_name, 1] | |
# print(cluster) | |
if(is.na(cluster) == F) { | |
if(sample_name == "Daily_GSM2285217_iPSC_SC14-071_female_p15") { | |
print(sample_name) | |
print(cluster) | |
} | |
sample_file_to_keep <- c(sample_file_to_keep, filename) | |
sample_names <- c(sample_names, sample_name) | |
sampleCondition <- c(sampleCondition, cluster) | |
sample_seq_type <- c(sample_seq_type, | |
as.character(seq_type_df[seq_type_df$`Sample Name` == sample_name, "end_type"][1])) | |
sample_group <- c(sample_group, | |
as.character(sample_info_df[sample_info_df$sample_name == sample_name, "group_accession"][1])) | |
counts_tib <- read_tsv(paste(directory, "/", filename, sep=""), col_names = c('id', 'name', 'level')) | |
all_htseq_counts_df <- full_join(all_htseq_counts_df, dplyr::select(counts_tib, id,name, !! sample_name := level), by=c("gene_id"="id", "gene_name"="name")) | |
} | |
} | |
} | |
sampleTable <- data.frame(sampleName = sample_names, | |
fileName = sample_file_to_keep, | |
cluster = factor(sampleCondition), | |
seq_type = sample_seq_type, | |
group = sample_group) | |
ddsHTSeq_all <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, | |
directory = directory, | |
design= ~ seq_type + group + cluster) | |
# keep <- rowSums(counts(ddsHTSeq_all)) >= 10 | |
# ddsHTSeq_all <- ddsHTSeq_all[keep,] | |
dds_all <- DESeq(ddsHTSeq_all) | |
# library("tximeta") | |
# se <- tximeta(sampleTable) | |
# library(AnnotationFilter) | |
gtf <- "gencode.v30.annotation.gtf" | |
txdb <- makeTxDbFromGFF(gtf, format = "gtf", circ_seqs = character()) | |
ebg <- ensembldb::exonsBy(txdb, by = "gene") | |
filtered_ebg <- ebg[rownames(dds_all)] | |
rowRanges(dds_all) <- GRangesList(filtered_ebg) | |
fpkm_counts <- fpkm(dds_all) | |
#need to use archive because gtf is from june 2019 | |
hg38_mart <- useMart('ensembl',host="apr2019.archive.ensembl.org", dataset='hsapiens_gene_ensembl') | |
hg38_biomart_gene_df <- getBM(mart=hg38_mart, | |
attributes=c( 'ensembl_gene_id', 'ensembl_gene_id_version', 'hgnc_symbol', "chromosome_name", "start_position", "end_position", "strand", "description")) | |
fpkm_counts_df <- left_join(as_tibble(fpkm_counts, rownames=".rownames"), hg38_biomart_gene_df, by=c(".rownames"="ensembl_gene_id_version")) | |
fpkm_counts_df <- fpkm_counts_df %>% | |
filter(chromosome_name != "Y") %>% | |
mutate(autosomal = (chromosome_name != "X")) | |
mean_x_a_ratio_df <- fpkm_counts_df %>% | |
reshape2::melt(id.vars=c("autosomal"), measure.vars=colnames(as_tibble(fpkm_counts)), variable.name="sample_name", value.name="fpkm_val") %>% | |
group_by(autosomal, sample_name) %>% | |
summarise(mean_fpkm=mean(fpkm_val)) %>% | |
ungroup() %>% | |
mutate(is_autosomal = case_when(autosomal==FALSE ~ "mean_X_fpkm", | |
autosomal==TRUE ~ "mean_A_fpkm")) %>% | |
reshape2::dcast(sample_name ~ is_autosomal, value.var="mean_fpkm") %>% | |
mutate(x_a_ratio=mean_X_fpkm/mean_A_fpkm) | |
median_x_a_ratio_df <- fpkm_counts_df %>% | |
reshape2::melt(id.vars=c("autosomal"), measure.vars=colnames(as_tibble(fpkm_counts)), variable.name="sample_name", value.name="fpkm_val") %>% | |
group_by(autosomal, sample_name) %>% | |
summarise(median_fpkm=median(fpkm_val)) %>% | |
ungroup() %>% | |
mutate(is_autosomal = case_when(autosomal==FALSE ~ "median_X_fpkm", | |
autosomal==TRUE ~ "median_A_fpkm")) %>% | |
reshape2::dcast(sample_name ~ is_autosomal, value.var="median_fpkm") %>% | |
mutate(x_a_ratio=median_X_fpkm/median_A_fpkm) | |
library(pairwiseCI) | |
pairwise_ci_df <- fpkm_counts_df %>% | |
reshape2::melt(id.vars=c("autosomal"), measure.vars=colnames(as_tibble(fpkm_counts)), variable.name="sample_name", value.name="fpkm_val") %>% | |
group_by(sample_name) %>% | |
summarise(median_ratio=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$estimate, | |
median_ratio_ci_low=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$conf.int[1], | |
median_ratio_ci_high=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$conf.int[2]) | |
write_csv(pairwise_ci_df, "pairwise_ci_df.csv") | |
# autosome_fpkms <- pairwise_ci_df %>% filter(autosomal == T) %>% dplyr::select(fpkm_val) %>% unlist %>% as.numeric | |
# x_fpkms <- pairwise_ci_df %>% filter(autosomal == F) %>% dplyr::select(fpkm_val) %>% unlist %>% as.numeric | |
# Median.ratio(x_fpkms, autosome_fpkms) | |
pairwise_ci_df <- read_csv("pairwise_ci_df.csv") | |
cluster_tib <- read_csv("saved_sample_clusters.csv", col_names = c("sample_name", "cluster")) | |
pairwise_ci_df <- left_join(pairwise_ci_df, cluster_tib, by="sample_name") | |
library(ggplot2) | |
x_a_ratio_plot_per_sample <- ggplot(pairwise_ci_df %>% filter(!is.na(cluster))) + | |
geom_errorbar(aes(x=sample_name, ymin=median_ratio_ci_low, ymax=median_ratio_ci_high)) + | |
geom_point(aes(x=sample_name, y=median_ratio)) + | |
facet_grid(cols=vars(cluster), scales="free_x", labeller = labeller(.cols=function(n){ | |
return(list("0"="A", | |
"1"="B", | |
"2"="C", | |
"3"="D", | |
"4"="E", | |
"5"="F")[n]) | |
})) + | |
labs(y="Median X:A", title="Median X:Autosome Ratios", x="Female Samples") + | |
theme_bw() + | |
base_plot_theme + | |
theme(axis.ticks.x=element_blank(), | |
axis.text.x = element_blank()) | |
ggsave("figs/review images/x_a_ratio_per_sample.png", x_a_ratio_plot_per_sample) | |
library(ggsignif) | |
x_a_ratio_plot_per_cluster <- ggplot(pairwise_ci_df %>% filter(!is.na(cluster)) %>% mutate(cluster=case_when(cluster==0~"A", cluster==1~"B", cluster==2~"C", cluster==3~"D", cluster==5~"F")), | |
aes(x=cluster, y=median_ratio, group=cluster)) + | |
geom_boxplot(aes(fill="female", alpha=cluster), outlier.shape = NA, size=.3) + | |
geom_point(position=position_jitter(w=0.1,h=0), size=.5, shape=20) + | |
labs(y="Median X:A", title="Median X:Autosome Ratios", x="Cluster") + | |
geom_signif(comparisons = list(c(1,2), c(1,3), c(1,4), c(1,5)), y_position=c(1.1, 1.15, 1.2, 1.25), tip_length = 0, size=.3, textsize = 2, test="wilcox.test") + | |
theme_bw() + | |
scale_fill_npg(guide=F) + | |
scale_alpha_discrete(guide=F) + | |
base_plot_theme + | |
scale_x_discrete(limits=c("A", "B", "C", "D", "F")) + | |
scale_y_continuous(limits = c(0.54,1.28)) | |
ggsave("figs/main/fig_4_x_a_ratio_plot_per_cluster_wilcox.pdf", x_a_ratio_plot_per_cluster, width=57.855, height=58, units = "mm") | |
fpkm_counts_df <- fpkm_counts_df %>% | |
mutate(category=case_when((hgnc_symbol %in% c("PRKX", "NLGN4X", "TBL1X", | |
"TMSB4X", "CXorf15X", "EIFA1X", | |
"ZFX", "USP9X", "DDX3X", | |
"KDM6A", "KDM5C", "RPS4X")) ~ "x_y_homologs", | |
chromosome_name == "X" & (start_position <= 2781479) | (start_position >= 155701383) ~ "PAR", | |
chromosome_name == "X" & start_position > 58100000 ~ "S1", | |
chromosome_name == "X" & start_position > 46000000 ~ "S2", | |
chromosome_name == "X" & start_position > 11.5e6 ~ "S3", | |
chromosome_name == "X" ~ "S4")) | |
pairwise_ci_df_split <- fpkm_counts_df[rowSums((fpkm_counts_df %>% dplyr::select(!!colnames(fpkm_counts)))>=1)==length(colnames(fpkm_counts)) ,] %>% | |
reshape2::melt(id.vars=c("autosomal", "category"), measure.vars=colnames(as_tibble(fpkm_counts)), variable.name="sample_name", value.name="fpkm_val") %>% | |
group_by(sample_name) %>% | |
summarise(median_ratio=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$estimate, | |
median_ratio_ci_low=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$conf.int[1], | |
median_ratio_ci_high=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$conf.int[2], | |
xy_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "x_y_homologs")], fpkm_val[autosomal == T])$estimate, | |
xy_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "x_y_homologs")], fpkm_val[autosomal == T])$conf.int[1], | |
xy_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "x_y_homologs")], fpkm_val[autosomal == T])$conf.int[2], | |
par_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$estimate, | |
par_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$conf.int[1], | |
par_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$conf.int[2], | |
s1_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "S1")], fpkm_val[autosomal == T])$estimate, | |
s1_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "S1")], fpkm_val[autosomal == T])$conf.int[1], | |
s1_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "S1")], fpkm_val[autosomal == T])$conf.int[2], | |
s2_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "S2")], fpkm_val[autosomal == T])$estimate, | |
s2_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "S2")], fpkm_val[autosomal == T])$conf.int[1], | |
s2_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "S2")], fpkm_val[autosomal == T])$conf.int[2], | |
s3_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "S3")], fpkm_val[autosomal == T])$estimate, | |
s3_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "S3")], fpkm_val[autosomal == T])$conf.int[1], | |
s3_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "S3")], fpkm_val[autosomal == T])$conf.int[2], | |
s4_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "S4")], fpkm_val[autosomal == T])$estimate, | |
s4_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "S4")], fpkm_val[autosomal == T])$conf.int[1], | |
s4_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "S4")], fpkm_val[autosomal == T])$conf.int[2]) | |
cluster_tib <- read_csv("saved_sample_clusters.csv", col_names = c("sample_name", "cluster")) | |
pairwise_ci_df_split <- left_join(pairwise_ci_df_split, cluster_tib, by="sample_name") | |
library(facetscales) | |
scales_y <- list("all" = scale_y_continuous(limits= c(0, 5)), | |
"PAR" = scale_y_continuous(limits=c(0, 5)), | |
"XY_homolog" = scale_y_continuous(limits=c(0, 5)), | |
"S1" = scale_y_continuous(limits=c(0, 5)), | |
"S2" = scale_y_continuous(limits=c(0, 5)), | |
"S3" = scale_y_continuous(limits=c(0, 5)), | |
"S4" = scale_y_continuous(limits=c(0, 5))) | |
pairwise_ci_plot_df <- pairwise_ci_df_split %>% | |
reshape2::melt(id.vars=c("sample_name", "cluster"), measure.vars=c("median_ratio", "median_ratio_ci_low", "median_ratio_ci_high", | |
"par_median_ratio", "par_median_ratio_ci_low", "par_median_ratio_ci_high", | |
"s1_median_ratio", "s1_median_ratio_ci_low", "s1_median_ratio_ci_high", | |
"s2_median_ratio", "s2_median_ratio_ci_low", "s2_median_ratio_ci_high", | |
"s3_median_ratio", "s3_median_ratio_ci_low", "s3_median_ratio_ci_high", | |
"s4_median_ratio", "s4_median_ratio_ci_low", "s4_median_ratio_ci_high", | |
"xy_median_ratio", "xy_median_ratio_ci_low", "xy_median_ratio_ci_high")) %>% | |
mutate(subset_type=case_when(grepl("par", variable) ~ "PAR", | |
grepl("s1", variable) ~ "S1", | |
grepl("s2", variable) ~ "S2", | |
grepl("s3", variable) ~ "S3", | |
grepl("s4", variable) ~ "S4", | |
grepl("xy", variable) ~ "XY_homolog", | |
T ~ "all"), | |
value_type=case_when(grepl("median_ratio_ci_high", variable) ~ "median_ratio_ci_high", | |
grepl("median_ratio_ci_low", variable) ~ "median_ratio_ci_low", | |
grepl("median_ratio", variable) ~ "median_ratio")) %>% | |
reshape2::dcast(sample_name+cluster+subset_type ~ value_type, value.var = "value") | |
library(ggplot2) | |
ggplot(pairwise_ci_plot_df %>% filter(!is.na(cluster))) + | |
geom_errorbar(aes(x=sample_name, ymin=median_ratio_ci_low, ymax=median_ratio_ci_high)) + | |
geom_point(aes(x=sample_name, y=median_ratio)) + | |
facet_grid_sc(cols=vars(cluster), rows=vars(subset_type), scales = list(y=scales_y, x="free"), space="free_x") | |
test <- fpkm_counts_df %>% | |
reshape2::melt(id.vars=c("autosomal", "category"), measure.vars=colnames(as_tibble(fpkm_counts)), variable.name="sample_name", value.name="fpkm_val") %>% | |
filter(sample_name == "Daily_GSM2285217_iPSC_SC14-071_female_p15") %>% | |
# group_by(sample_name) %>% | |
summarise(par_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$estimate, | |
par_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$conf.int[1], | |
par_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$conf.int[2]) | |
darcy_ipsc_fpkms <- read_csv("C:/Users/Prakhar Bansal/Downloads/darcy_fpkm_counts_iPSCs_avg.csv") | |
x_y_homologs <- c("PRKX", "NLGN4X", "TBL1X", | |
"TMSB4X", "CXorf15X", "EIFA1X", | |
"ZFX", "USP9X", "DDX3X", | |
"KDM6A", "KDM5C", "RPS4X") | |
darcy_homologs <- darcy_ipsc_fpkms %>% filter(hgnc_symbol %in% x_y_homologs) | |
my_homologs <- fpkm_counts_df %>% filter(category == "x_y_homologs") | |
darcy_homologs %>% arrange(hgnc_symbol) %>% write_csv("darcy_homologs.csv") | |
my_homologs %>% arrange(hgnc_symbol) %>% write_csv("my_homologs.csv") | |
darcy_fpkms_df <- read_csv("darcy_data/fpkm_counts_unfilt_iPSCs.csv") | |
darcy_hg38_mart <- useMart('ensembl',host="mar2016.archive.ensembl.org", dataset='hsapiens_gene_ensembl') | |
darcy_hg38_biomart_gene_df <- getBM(mart=darcy_hg38_mart, | |
attributes=c( 'ensembl_gene_id', 'version', 'hgnc_symbol', "chromosome_name", "start_position", "end_position", "strand", "description")) | |
darcy_hg38_biomart_gene_df <- darcy_hg38_biomart_gene_df %>% mutate(ensembl_gene_id_version=paste(ensembl_gene_id, version, sep=".")) | |
darcy_fpkms_df <- left_join(darcy_fpkms_df, darcy_hg38_biomart_gene_df, by=c("X1"="ensembl_gene_id_version")) | |
darcy_fpkms_df <- darcy_fpkms_df %>% | |
filter(chromosome_name != "Y") %>% | |
mutate(autosomal = chromosome_name != "X") | |
darcy_fpkms_df <- darcy_fpkms_df %>% | |
mutate(category=case_when((hgnc_symbol %in% c("PRKX", "NLGN4X", "TBL1X", | |
"TMSB4X", "CXorf15X", "EIFA1X", | |
"ZFX", "USP9X", "DDX3X", | |
"KDM6A", "KDM5C", "RPS4X")) ~ "x_y_homologs", | |
chromosome_name == "X" & (start_position <= 2781479) | (start_position >= 155701383) ~ "PAR", | |
chromosome_name == "X" & start_position > 58100000 ~ "S1", | |
chromosome_name == "X" & start_position > 46000000 ~ "S2", | |
chromosome_name == "X" & start_position > 11.5e6 ~ "S3", | |
chromosome_name == "X" ~ "S4"), | |
) | |
darcy_samples <- c( "iPSC_XY_1", "iPSC_XY_2", "iPSC_XY_3", | |
"iPSC_XY_4", "iPSC_XY_5", "iPSC_XO_1", | |
"iPSC_XO_2", "iPSC_XO_3", "iPSC_XO_4", | |
"iPSC_XO_5", "iPSC_SRYDelC2_1", "iPSC_SRYDelC2_2", | |
"iPSC_SRYDelC2_3", "iPSC_SRYDelC2_4", "iPSC_SRYDelC2_5", | |
"iPSC_SRYDelC30_1", "iPSC_SRYDelC30_2", "iPSC_SRYDelC30_3", | |
"iPSC_SRYDelC30_4", "iPSC_SRYDelC30_5", "iPSC_C11XX_1", | |
"iPSC_C11XX_2", "iPSC_C11XX_3", "iPSC_C11XX_4", "iPSC_C6XX_1", | |
"iPSC_C6XX_2", "iPSC_C6XX_3", "iPSC_C6XX_4", "iPSC_C2XO_1", "iPSC_C2XO_2", | |
"iPSC_C2XO_3", "iPSC_C2XO_4", "iPSC_C8XO_1", "iPSC_C8XO_2", "iPSC_C8XO_3", | |
"iPSC_C9XO_1", "iPSC_C9XO_2", "iPSC_C9XO_3", "iPSC_C9XO_4") | |
darcy_fraction_autosomes_less_1 <- nrow((filter(darcy_fpkms_df, autosomal == T) %>% | |
dplyr::select(!!darcy_samples))[rowSums((filter(darcy_fpkms_df, autosomal == T) %>% dplyr::select(!!darcy_samples))>=1)==39 ,]) / nrow(filter(darcy_fpkms_df, autosomal == T)) | |
darcy_fraction_xchr_less_1 <- nrow((filter(darcy_fpkms_df, autosomal == F) %>% | |
dplyr::select(!!darcy_samples))[rowSums((filter(darcy_fpkms_df, autosomal == F) %>% dplyr::select(!!darcy_samples))>=1)==39 ,]) / nrow(filter(darcy_fpkms_df, autosomal == F)) | |
darcy_pairwise_ci_df_split <- darcy_fpkms_df %>% | |
reshape2::melt(id.vars=c("autosomal", "category"), measure.vars=darcy_samples, variable.name="sample_name", value.name="fpkm_val") %>% | |
group_by(sample_name) %>% | |
summarise(median_ratio=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$estimate, | |
median_ratio_ci_low=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$conf.int[1], | |
median_ratio_ci_high=Median.ratio(fpkm_val[autosomal == F], fpkm_val[autosomal == T])$conf.int[2], | |
xy_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "x_y_homologs")], fpkm_val[autosomal == T])$estimate, | |
xy_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "x_y_homologs")], fpkm_val[autosomal == T])$conf.int[1], | |
xy_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "x_y_homologs")], fpkm_val[autosomal == T])$conf.int[2], | |
par_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$estimate, | |
par_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$conf.int[1], | |
par_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "PAR")], fpkm_val[autosomal == T])$conf.int[2], | |
s1_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "S1")], fpkm_val[autosomal == T])$estimate, | |
s1_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "S1")], fpkm_val[autosomal == T])$conf.int[1], | |
s1_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "S1")], fpkm_val[autosomal == T])$conf.int[2], | |
s2_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "S2")], fpkm_val[autosomal == T])$estimate, | |
s2_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "S2")], fpkm_val[autosomal == T])$conf.int[1], | |
s2_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "S2")], fpkm_val[autosomal == T])$conf.int[2], | |
s3_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "S3")], fpkm_val[autosomal == T])$estimate, | |
s3_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "S3")], fpkm_val[autosomal == T])$conf.int[1], | |
s3_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "S3")], fpkm_val[autosomal == T])$conf.int[2], | |
s4_median_ratio=Median.ratio(fpkm_val[(autosomal == F) & (category == "S4")], fpkm_val[autosomal == T])$estimate, | |
s4_median_ratio_ci_low=Median.ratio(fpkm_val[(autosomal == F) & (category == "S4")], fpkm_val[autosomal == T])$conf.int[1], | |
s4_median_ratio_ci_high=Median.ratio(fpkm_val[(autosomal == F) & (category == "S4")], fpkm_val[autosomal == T])$conf.int[2]) | |
darcy_sample_groups <- tibble(sample_name=c("iPSC_XY_1", "iPSC_XY_2", "iPSC_XY_3", | |
"iPSC_XY_4", "iPSC_XY_5", "iPSC_XO_1", | |
"iPSC_XO_2", "iPSC_XO_3", "iPSC_XO_4", | |
"iPSC_XO_5", "iPSC_SRYDelC2_1", "iPSC_SRYDelC2_2", | |
"iPSC_SRYDelC2_3", "iPSC_SRYDelC2_4", "iPSC_SRYDelC2_5", | |
"iPSC_SRYDelC30_1", "iPSC_SRYDelC30_2", "iPSC_SRYDelC30_3", | |
"iPSC_SRYDelC30_4", "iPSC_SRYDelC30_5", "iPSC_C11XX_1", | |
"iPSC_C11XX_2", "iPSC_C11XX_3", "iPSC_C11XX_4", "iPSC_C6XX_1", | |
"iPSC_C6XX_2", "iPSC_C6XX_3", "iPSC_C6XX_4", "iPSC_C2XO_1", "iPSC_C2XO_2", | |
"iPSC_C2XO_3", "iPSC_C2XO_4", "iPSC_C8XO_1", "iPSC_C8XO_2", "iPSC_C8XO_3", | |
"iPSC_C9XO_1", "iPSC_C9XO_2", "iPSC_C9XO_3", "iPSC_C9XO_4")) %>% | |
mutate(cluster=case_when(grepl("iPSC_XY", sample_name) ~ "XY", | |
grepl("iPSC_XO", sample_name) ~ "male-XO", | |
grepl("iPSC_SRYDel", sample_name) ~ "SRYDel", | |
grepl("iPSC_C11XX|iPSC_C6XX", sample_name) ~ "XX", | |
grepl("iPSC_C2XO|iPSC_C8XO|iPSC_C9XO", sample_name) ~ "female-XO")) | |
darcy_pairwise_ci_df_split <- left_join(darcy_pairwise_ci_df_split, darcy_sample_groups, by="sample_name") | |
darcy_scales_y <- list("all" = scale_y_continuous(limits= c(0, 3), breaks= c(0,1, 2, 3)), | |
"PAR" = scale_y_continuous(limits=c(0, 5)), | |
"XY_homolog" = scale_y_continuous(limits=c(0, 200)), | |
"S1" = scale_y_continuous(limits=c(0, 3), breaks=c(0, 1, 2, 3)), | |
"S2" = scale_y_continuous(limits=c(0, 3), breaks=c(0, 1, 2, 3)), | |
"S3" = scale_y_continuous(limits=c(0, 3), breaks=c(0, 1, 2, 3)), | |
"S4" = scale_y_continuous(limits=c(0, 3), breaks=c(0, 1, 2, 3))) | |
darcy_pairwise_ci_plot_df <- darcy_pairwise_ci_df_split %>% | |
reshape2::melt(id.vars=c("sample_name", "cluster"), measure.vars=c("median_ratio", "median_ratio_ci_low", "median_ratio_ci_high", | |
"par_median_ratio", "par_median_ratio_ci_low", "par_median_ratio_ci_high", | |
"s1_median_ratio", "s1_median_ratio_ci_low", "s1_median_ratio_ci_high", | |
"s2_median_ratio", "s2_median_ratio_ci_low", "s2_median_ratio_ci_high", | |
"s3_median_ratio", "s3_median_ratio_ci_low", "s3_median_ratio_ci_high", | |
"s4_median_ratio", "s4_median_ratio_ci_low", "s4_median_ratio_ci_high", | |
"xy_median_ratio", "xy_median_ratio_ci_low", "xy_median_ratio_ci_high")) %>% | |
mutate(subset_type=case_when(grepl("par", variable) ~ "PAR", | |
grepl("s1", variable) ~ "S1", | |
grepl("s2", variable) ~ "S2", | |
grepl("s3", variable) ~ "S3", | |
grepl("s4", variable) ~ "S4", | |
grepl("xy", variable) ~ "XY_homolog", | |
T ~ "all"), | |
value_type=case_when(grepl("median_ratio_ci_high", variable) ~ "median_ratio_ci_high", | |
grepl("median_ratio_ci_low", variable) ~ "median_ratio_ci_low", | |
grepl("median_ratio", variable) ~ "median_ratio")) %>% | |
reshape2::dcast(sample_name+cluster+subset_type ~ value_type, value.var = "value") | |
ggplot(darcy_pairwise_ci_plot_df %>% filter(!is.na(cluster))) + | |
geom_errorbar(aes(x=sample_name, ymin=median_ratio_ci_low, ymax=median_ratio_ci_high)) + | |
geom_point(aes(x=sample_name, y=median_ratio)) + | |
facet_grid_sc(cols=vars(cluster), rows=vars(subset_type), scales = list(y=scales_y, x="free"), space="free_x") |