diff --git a/analysis-only_overlap.R b/analysis-only_overlap.R index 98ab98f..fafebac 100644 --- a/analysis-only_overlap.R +++ b/analysis-only_overlap.R @@ -108,6 +108,15 @@ p_value <- function(x) { x <- x[1L, 1L] phyper(x - 1, m, n, k, lower.tail = FALSE) } +## Apply implementation of Fisher's exact test to large vectors +p_values <- function(x, y, x_total, y_total) { + contingency_matrix <- rbind(x, y, + x_total, y_total) + ## Organize array to have 2x2 margin instead of 1x4. + dim(contingency_matrix) <- c(2, 2, length(contingency_matrix) / 4) + apply(contingency_matrix, 3, p_value) # nolint +} +## P-values for RNAi wells. wells_all <- merge(wells, wells_layout) %>% mutate(coloc = n_coloc / n) cells <- filter(wells_all) %>% select(n) %>% unlist @@ -117,13 +126,19 @@ cells_all <- filter(wells_all, is.na(control)) %>% select(n) %>% unlist %>% sum overlaps_all <- filter(wells_all, is.na(control)) %>% select(n_coloc) %>% unlist %>% sum -contingency_matrix <- rbind(cells, overlaps, - cells_all, overlaps_all) -## Organize array to have 2x2 margin instead of 1x4. -dim(contingency_matrix) <- c(2, 2, length(contingency_matrix) / 4) -p_values_uncorrected <- apply(contingency_matrix, 3, p_value) -p_values <- p.adjust(p_values_uncorrected, method = "bonferroni") -wells_all$p_value <- p_values_uncorrected +wells_all$p_value <- p_values(cells, overlaps, cells_all, overlaps_all) +## P-values for control wells. +cells_all_control <- filter(wells_all, + !is.na(control), + symbol == "LacZ") %>% + select(n) %>% unlist %>% sum +overlaps_all_control <- filter(wells_all, + !is.na(control), + symbol == "LacZ") %>% + select(n_coloc) %>% unlist %>% sum +wells_all$p_value_control <- p_values(cells, overlaps, + cells_all_control, + overlaps_all_control) ## Check repeated RNAi wells. wells_repeated <- group_by(wells_all, symbol) %>% diff --git a/plots.R b/plots.R index 2e613f7..5c6ddca 100644 --- a/plots.R +++ b/plots.R @@ -4,10 +4,13 @@ suppressPackageStartupMessages({ library(ggplot2) library(dplyr) library(readr) + library(forcats) + library(varhandle) }) wells_all <- read_csv("rnai-p_values.csv") %>% - mutate(significant = p_value < 0.05) + mutate(significant = p_value < 0.05, + plate = factor(plate) %>% fct_relevel("504")) binwidth <- 0.025 theme_set(theme_bw()) @@ -37,24 +40,30 @@ ggplot(filter(wells_all, xlab("% colocalization") ggplot(filter(wells_all, !is.na(control)) %>% - group_by(well) %>% mutate(ymin = min(coloc), ymax = max(coloc), - plate = factor(plate)), + group_by(well) %>% mutate(ymin = min(coloc), ymax = max(coloc)), aes(well, coloc, color = plate)) + geom_errorbar(aes(ymin = ymin, ymax = ymax), color = "gray") + geom_jitter(aes(shape = plate)) + scale_color_brewer(palette = "Set1") + - ggtitle(paste("Scatter plot of control well colocalization across plates")) + + ggtitle(paste("Scatter plot of control well", + "colocalization across plates")) + xlab("well") + ylab("% colocalization") ggplot(filter(wells_all, !is.na(control)) %>% - group_by(well) %>% mutate(ymin = min(p_value), ymax = max(p_value), - plate = factor(plate)), - aes(well, p_value, color = plate)) + - geom_errorbar(aes(ymin = ymin, ymax = ymax, color = control)) + - geom_jitter(aes(shape = plate, size = n)) + + group_by(well) %>% mutate(ymin = min(p_value_control), + ymax = max(p_value_control), + ytext = plate %>% + lvls_revalue(seq(1.5, 1.1, -0.1) %>% + as.character) %>% + unfactor), + aes(well, p_value_control, color = plate)) + + geom_errorbar(aes(ymin = ymin, ymax = ymax, linetype = control), + color = "grey") + + geom_jitter(aes(color = plate), size = 3) + + geom_text(aes(label = symbol, y = ytext, color = plate)) + scale_color_brewer(palette = "Set1") + ggtitle(paste("Scatter plot of control well p-values across plates")) + - xlab("well") + ylab("p-value") + xlab("well") + ylab("p-value against LacZ") ggplot(wells_all, aes(coloc, color = type)) + geom_freqpoly(binwidth = binwidth) +