diff --git a/.gitignore b/.gitignore index c82c184..be049ac 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,9 @@ cellprofiler_all-plates.tar.xz cellprofiler_all-plates/all-plates.db z_stack.tar.xz z_stack/ +*.csv +*.pdf +*.png # Log files from CellProfiler Analyst *.log diff --git a/analysis-only_overlap.R b/analysis-only_overlap.R index aba906b..c1e3503 100644 --- a/analysis-only_overlap.R +++ b/analysis-only_overlap.R @@ -6,14 +6,16 @@ suppressPackageStartupMessages({ ## Configuration values. file_db <- "cellprofiler_all-plates/all-plates.db" -wells_control <- c( - "C03", # Should be 25-30% - "E05", # Should be 25-30% - "E09", # Should be <= 5% - "H21", # Should be 25-30% - "I04", # Should be 25-30% - "L09" # Should be 5-15% -) + +wells_control <- data.frame( + well = c("C03", "C22", "D12", "D13", "E05", "E20", "F11", "F14", + "G07", "G18", "H09", "H16", "I09", "I16", "J07", "J18", + "K11", "K14", "L05", "L20", "M12", "M13", "N03", "N22"), + type = "Lacz") %>% + rbind(data.frame( + type = c("BROWN", "CAL1", "FACT", "Rho1", "thread", "water"), + well = c("I04", "E09", "L03", "E16", "L16", "H21") + )) ## Database boilerplate. driver <- dbDriver("SQLite") @@ -45,11 +47,63 @@ wells <- merge(cells, select(images, imagenumber, image_metadata_well, image_metadata_plate)) %>% replace_na(list(ect_classify_coloc = 0)) %>% group_by(image_metadata_well, image_metadata_plate) %>% - summarise(n = n(), n_coloc = sum(ect_classify_coloc)) + summarise(n = n(), n_coloc = sum(ect_classify_coloc)) %>% + rename(well = image_metadata_well, + plate = image_metadata_plate) # Check the controls. -controls <- filter(wells, - image_metadata_well %in% wells_control, - image_metadata_plate == "160415_015529-V") %>% +controls_plate1 <- merge(wells, wells_control) %>% + filter(plate == "160415_015529-V") %>% + group_by(type) %>% + summarise(n = sum(n), n_coloc = sum(n_coloc), coloc = n_coloc / n) +print(controls_plate1) + +controls_all <- merge(wells, wells_control) %>% + group_by(type) %>% + summarise(n = sum(n), n_coloc = sum(n_coloc), coloc = n_coloc / n) +print(controls_all) + +wells_rnai <- data.frame( + well = setdiff(wells$well, wells_control$well), + type = "RNAi") + +rnai <- filter(wells, well %in% wells_rnai) %>% + mutate(coloc = n_coloc / n) +print(rnai) + +wells_type_all <- rbind(wells_rnai, wells_control) +wells_all <- merge(wells, wells_type_all) %>% mutate(coloc = n_coloc / n) -print(controls) + +## Apply statistical significance test. +## +## Prof. Mellone suggested aggregating all the RNAi well counts in the +## contingency table. + +## Fisher's exact test for alternative = "greater". +## +## Extracted the relevant parts of the fisher.test() source code to +## optimize by >1000x per http://adv-r.had.co.nz/Profiling.html#t-test +p_value <- function(x) { + m <- sum(x[, 1L]) + n <- sum(x[, 2L]) + k <- sum(x[1L, ]) + x <- x[1L, 1L] + phyper(x - 1, m, n, k, lower.tail = FALSE) +} +cells <- filter(wells_all, type == "RNAi") %>% select(n) %>% unlist +overlaps <- filter(wells_all, type == "RNAi") %>% select(n_coloc) %>% + unlist +cells_all <- sum(cells) +overlaps_all <- sum(overlaps) +contingency_matrix <- rbind(cells, overlaps, + cells_all, overlaps_all) +## Organize array to have 2x2 margin instead of 1x4. +dim(contingency_matrix) <- c(2, 2, length(contingency_matrix) / 4) +p_values_uncorrected <- apply(contingency_matrix, 3, p_value) +p_values <- p.adjust(p_values_uncorrected, method = "bonferroni") +wells_all$p_value <- NA +wells_all[wells_all$type == "RNAi", "p_value"] <- p_values_uncorrected + +write.csv(wells_all %>% arrange(p_value), file = "rnai-p_values.csv", + quote = FALSE, row.names = FALSE) diff --git a/plots.R b/plots.R new file mode 100644 index 0000000..152e1ff --- /dev/null +++ b/plots.R @@ -0,0 +1,37 @@ +## See the distribution of % Overlap vs. wells + +suppressPackageStartupMessages({ + library(ggplot2) +}) + +wells_all <- read.csv("rnai-p_values.csv") %>% + mutate(significant = p_value < 0.05) +binwidth <- 0.025 +## Bring RNAi to the front of the level list. +wells_all$type <- relevel(wells_all$type, "RNAi") + +ggplot(filter(wells_all, type == "RNAi"), + aes(coloc, fill = significant)) + + geom_histogram(binwidth = binwidth) + + scale_fill_manual(values = c("grey70", "blue")) + + ggtitle("Histogram of RNAi wells with significant overlap (p-value < 5%)") + + xlab("% colocalization") +ggsave("rnai-hist-signif.png") +ggsave("rnai-hist-signif.pdf") + +ggplot(wells_all, aes(coloc, color = type)) + + geom_freqpoly(binwidth = binwidth) + + scale_y_log10() + + facet_wrap(~ type) + + ggtitle("Histogram of all well types") + + xlab("% colocalization") +ggsave("all-hist.png") +ggsave("all-hist.pdf") + +ggplot(wells_all, aes(coloc, color = type)) + + geom_density() + + facet_wrap(~ type) + + ggtitle("Density plot of wells to show distribution") + + xlab("% colocalization") +ggsave("all-density.png") +ggsave("all-density.pdf")