Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
suppressPackageStartupMessages({
library(tidyverse)
library(magrittr)
library(readxl)
library(stringr)
library(methods) # For Rscript per http://stackoverflow.com/a/41797025
})
## Configuration values.
file_db <- file.path("../results/cellprofiler/rnai-screen-tf.db")
files_plate <- "../data/DRSC_TF_Library_Distribution.xls"
## Plate 504 was run at the last minute, and so the controls are not
## reflected in the Excel spreadsheet provided by the core facility.
## Therefore below is the list of added wells, which we will later
## merge with the spreadsheet data:
wells_control <- tibble::tribble(
~well, ~symbol,
##---|------
"I04", "BROWN",
"E09", "CAL1",
"L03", "FACT",
"E16", "Rho1",
"L16", "Thread",
"H21", "water") %>%
bind_rows(tibble(
symbol = "LacZ",
well = c("C03", "C22", "D12", "D13", "E05", "E20", "F11",
"F14", "G07", "G18", "H09", "H16", "I09", "I16",
"J07", "J18", "K11", "K14", "L05", "L20", "M12",
"M13", "N03", "N22"))) %>%
rename(symbol_control = symbol) %>% # Prepare for join.
arrange(well)
## FIXME: Need to confirm what type of control Thread is.
wells_layout <-
read_excel(files_plate, sheet = 3) %>%
setNames(tolower(names(.))) %>%
select(plate, well, `symbol(s)`, `fbgn(s)`, amplicon) %>%
rename(symbol = `symbol(s)`,
fbgn = `fbgn(s)`) %>% arrange(plate, well)
wells_layout %<>%
left_join(wells_control) %>%
mutate(symbol = ifelse(is.na(symbol_control), symbol, symbol_control)) %>%
select(-symbol_control) %>%
mutate(control = ifelse(
symbol %in% c("CAL1", "FACT", "Rho1"), ## Rho1 gives binucleates
"positive", NA)) %>%
mutate(control = ifelse(
symbol %in% c("LacZ", "Empty", "BROWN", "water"),
"negative", control)) %>%
mutate(control = ifelse(
is.na(control) & is.na(amplicon),
"unknown", control))
## List all controls
print(wells_layout %>% filter(!is.na(control)) %>% .$symbol %>% unique)
## Connect to the database.
db <- src_sqlite(file_db)
## Read in objects table and summarize. Only consider cells with 1-2
## ectopic centromeres; assume other cells are bad.
cells <- tbl(db, "Per_Object") %>%
select(ImageNumber, ect_Classify_coloc, nuc_Children_ect_Count) %>%
filter(nuc_Children_ect_Count == 1 | nuc_Children_ect_Count == 2)
## The image table has metadata about well numbers, plate numbers,
## etc.
images <- tbl(db, "Per_Image") %>%
select(ImageNumber, Image_Metadata_Well, Image_Metadata_Plate)
## Create well summary from images and cells. `imagenumber` is a
## primary key.
wells <- inner_join(cells, images) %>% collect(n = Inf) %>%
replace_na(list(ect_Classify_coloc = 0)) %>%
group_by(Image_Metadata_Well, Image_Metadata_Plate) %>%
summarise(n = n(), n_coloc = sum(ect_Classify_coloc)) %>%
rename(well = Image_Metadata_Well,
plate = Image_Metadata_Plate) %>%
mutate(plate = ifelse(plate == "160415_015529-V", "504",
str_extract(plate, ".{3}"))) %>%
mutate(plate = as.numeric(plate)) # For subsequent merge.
# Check the controls.
controls_all <- merge(wells, filter(wells_layout, ! is.na(control))) %>%
group_by(symbol, control) %>%
summarise(n = sum(n), n_coloc = sum(n_coloc), coloc = n_coloc / n)
print(controls_all)
## Apply statistical significance test.
##
## Prof. Mellone suggested aggregating all the RNAi well counts in the
## contingency table.
## Fisher's exact test for alternative = "greater".
##
## Extracted the relevant parts of the fisher.test() source code to
## optimize by >1000x per http://adv-r.had.co.nz/Profiling.html#t-test
p_value <- function(x) {
m <- sum(x[, 1L])
n <- sum(x[, 2L])
k <- sum(x[1L, ])
x <- x[1L, 1L]
phyper(x - 1, m, n, k, lower.tail = FALSE)
}
## Apply implementation of Fisher's exact test to large vectors
p_values <- function(x, y, x_total, y_total) {
contingency_matrix <- rbind(x, y,
x_total, y_total)
## Organize array to have 2x2 margin instead of 1x4.
dim(contingency_matrix) <- c(2, 2, length(contingency_matrix) / 4)
apply(contingency_matrix, 3, p_value) # nolint
}
## P-values for RNAi wells.
wells_all <- merge(wells, wells_layout) %>%
mutate(coloc = n_coloc / n)
cells <- filter(wells_all) %>% select(n) %>% unlist
overlaps <- filter(wells_all) %>% select(n_coloc) %>%
unlist
cells_all <- filter(wells_all, is.na(control)) %>% select(n) %>%
unlist %>% sum
overlaps_all <- filter(wells_all, is.na(control)) %>%
select(n_coloc) %>% unlist %>% sum
wells_all$p_value <- p_values(cells, overlaps, cells_all, overlaps_all)
## P-values for control wells.
cells_all_control <- filter(wells_all,
!is.na(control),
symbol == "LacZ") %>%
select(n) %>% unlist %>% sum
overlaps_all_control <- filter(wells_all,
!is.na(control),
symbol == "LacZ") %>%
select(n_coloc) %>% unlist %>% sum
wells_all$p_value_control <- p_values(cells, overlaps,
cells_all_control,
overlaps_all_control)
## Check repeated RNAi wells.
wells_repeated <- group_by(wells_all, symbol) %>%
summarise(n = n(), m = mean(coloc), sd = sd(coloc)) %>%
filter(n > 1)
print(summary(wells_repeated))
write_csv(wells_all %>% arrange(p_value),
paste0("../results/tables/rnai-p_values.csv"))