From c7bc7c0338589d0f2b34db120bb937b276d8d166 Mon Sep 17 00:00:00 2001 From: Pariksheet Nanda Date: Fri, 27 Jan 2017 06:20:24 -0500 Subject: [PATCH] ENH: Calculate p-values for all wells; not just RNAi --- analysis-only_overlap.R | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/analysis-only_overlap.R b/analysis-only_overlap.R index c1e3503..78b8ab3 100644 --- a/analysis-only_overlap.R +++ b/analysis-only_overlap.R @@ -11,10 +11,12 @@ wells_control <- data.frame( well = c("C03", "C22", "D12", "D13", "E05", "E20", "F11", "F14", "G07", "G18", "H09", "H16", "I09", "I16", "J07", "J18", "K11", "K14", "L05", "L20", "M12", "M13", "N03", "N22"), - type = "Lacz") %>% + type = "Lacz", + control = "negative") %>% rbind(data.frame( type = c("BROWN", "CAL1", "FACT", "Rho1", "thread", "water"), - well = c("I04", "E09", "L03", "E16", "L16", "H21") + well = c("I04", "E09", "L03", "E16", "L16", "H21"), + control = c(NA, NA, "positive", NA, NA, NA) )) ## Database boilerplate. @@ -54,20 +56,23 @@ wells <- merge(cells, select(images, imagenumber, image_metadata_well, # Check the controls. controls_plate1 <- merge(wells, wells_control) %>% filter(plate == "160415_015529-V") %>% - group_by(type) %>% - summarise(n = sum(n), n_coloc = sum(n_coloc), coloc = n_coloc / n) + group_by(type, control) %>% + summarise(n = sum(n), + n_coloc = sum(n_coloc), + coloc = n_coloc / n) print(controls_plate1) controls_all <- merge(wells, wells_control) %>% - group_by(type) %>% + group_by(type, control) %>% summarise(n = sum(n), n_coloc = sum(n_coloc), coloc = n_coloc / n) print(controls_all) wells_rnai <- data.frame( well = setdiff(wells$well, wells_control$well), - type = "RNAi") + type = "RNAi", + control = NA) -rnai <- filter(wells, well %in% wells_rnai) %>% +rnai <- filter(wells, well %in% wells_rnai$well) %>% mutate(coloc = n_coloc / n) print(rnai) @@ -91,19 +96,18 @@ p_value <- function(x) { x <- x[1L, 1L] phyper(x - 1, m, n, k, lower.tail = FALSE) } -cells <- filter(wells_all, type == "RNAi") %>% select(n) %>% unlist -overlaps <- filter(wells_all, type == "RNAi") %>% select(n_coloc) %>% +cells <- filter(wells_all) %>% select(n) %>% unlist +overlaps <- filter(wells_all) %>% select(n_coloc) %>% unlist -cells_all <- sum(cells) -overlaps_all <- sum(overlaps) +cells_all <- sum(filter(cells, type == "RNAi")) +overlaps_all <- sum(filter(overlaps, type == "RNAi")) contingency_matrix <- rbind(cells, overlaps, cells_all, overlaps_all) ## Organize array to have 2x2 margin instead of 1x4. dim(contingency_matrix) <- c(2, 2, length(contingency_matrix) / 4) p_values_uncorrected <- apply(contingency_matrix, 3, p_value) p_values <- p.adjust(p_values_uncorrected, method = "bonferroni") -wells_all$p_value <- NA -wells_all[wells_all$type == "RNAi", "p_value"] <- p_values_uncorrected +wells_all$p_value <- p_values_uncorrected write.csv(wells_all %>% arrange(p_value), file = "rnai-p_values.csv", quote = FALSE, row.names = FALSE)