From c7bc7c0338589d0f2b34db120bb937b276d8d166 Mon Sep 17 00:00:00 2001
From: Pariksheet Nanda <pariksheet.nanda@uconn.edu>
Date: Fri, 27 Jan 2017 06:20:24 -0500
Subject: [PATCH] ENH: Calculate p-values for all wells; not just RNAi

---
 analysis-only_overlap.R | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/analysis-only_overlap.R b/analysis-only_overlap.R
index c1e3503..78b8ab3 100644
--- a/analysis-only_overlap.R
+++ b/analysis-only_overlap.R
@@ -11,10 +11,12 @@ wells_control <- data.frame(
     well = c("C03", "C22", "D12", "D13", "E05", "E20", "F11", "F14",
              "G07", "G18", "H09", "H16", "I09", "I16", "J07", "J18",
              "K11", "K14", "L05", "L20", "M12", "M13", "N03", "N22"),
-    type = "Lacz") %>%
+    type = "Lacz",
+    control = "negative") %>%
     rbind(data.frame(
         type = c("BROWN", "CAL1", "FACT", "Rho1", "thread", "water"),
-        well = c("I04", "E09", "L03", "E16", "L16", "H21")
+        well = c("I04", "E09", "L03", "E16", "L16", "H21"),
+        control = c(NA, NA, "positive", NA, NA, NA)
     ))
 
 ## Database boilerplate.
@@ -54,20 +56,23 @@ wells <- merge(cells, select(images, imagenumber, image_metadata_well,
 # Check the controls.
 controls_plate1 <- merge(wells, wells_control) %>%
     filter(plate == "160415_015529-V") %>%
-    group_by(type) %>%
-    summarise(n = sum(n), n_coloc = sum(n_coloc), coloc = n_coloc / n)
+    group_by(type, control) %>%
+    summarise(n = sum(n),
+              n_coloc = sum(n_coloc),
+              coloc = n_coloc / n)
 print(controls_plate1)
 
 controls_all <- merge(wells, wells_control) %>%
-    group_by(type) %>%
+    group_by(type, control) %>%
     summarise(n = sum(n), n_coloc = sum(n_coloc), coloc = n_coloc / n)
 print(controls_all)
 
 wells_rnai <- data.frame(
     well = setdiff(wells$well, wells_control$well),
-    type = "RNAi")
+    type = "RNAi",
+    control = NA)
 
-rnai <- filter(wells, well %in% wells_rnai) %>%
+rnai <- filter(wells, well %in% wells_rnai$well) %>%
     mutate(coloc = n_coloc / n)
 print(rnai)
 
@@ -91,19 +96,18 @@ p_value <- function(x) {
     x <- x[1L, 1L]
     phyper(x - 1, m, n, k, lower.tail = FALSE)
 }
-cells <- filter(wells_all, type == "RNAi") %>% select(n) %>% unlist
-overlaps <- filter(wells_all, type == "RNAi") %>% select(n_coloc) %>%
+cells <- filter(wells_all) %>% select(n) %>% unlist
+overlaps <- filter(wells_all) %>% select(n_coloc) %>%
     unlist
-cells_all <- sum(cells)
-overlaps_all <- sum(overlaps)
+cells_all <- sum(filter(cells, type == "RNAi"))
+overlaps_all <- sum(filter(overlaps, type == "RNAi"))
 contingency_matrix <- rbind(cells, overlaps,
                             cells_all, overlaps_all)
 ## Organize array to have 2x2 margin instead of 1x4.
 dim(contingency_matrix) <- c(2, 2, length(contingency_matrix) / 4)
 p_values_uncorrected <- apply(contingency_matrix, 3, p_value)
 p_values <- p.adjust(p_values_uncorrected, method = "bonferroni")
-wells_all$p_value <- NA
-wells_all[wells_all$type == "RNAi", "p_value"] <- p_values_uncorrected
+wells_all$p_value <- p_values_uncorrected
 
 write.csv(wells_all %>% arrange(p_value), file = "rnai-p_values.csv",
           quote = FALSE, row.names = FALSE)