Skip to content

Commit

Permalink
ENH: Add well metadata from spreadsheet
Browse files Browse the repository at this point in the history
- Reduce number of variables.
- Use more tidyverse packages.
- Make note to use dplyr database integration instead of directly
  using RSQLite.
  • Loading branch information
pan14001 committed Feb 7, 2017
1 parent c7bc7c0 commit 0916ac5
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 39 deletions.
116 changes: 77 additions & 39 deletions analysis-only_overlap.R
Original file line number Diff line number Diff line change
@@ -1,23 +1,66 @@
suppressPackageStartupMessages({
library(RSQLite)
library(RSQLite) # TODO: Use dplyr instead of RSQLite.
library(tidyr)
library(dplyr)
library(readr)
library(readxl)
library(stringr)
})

## Configuration values.
file_db <- "cellprofiler_all-plates/all-plates.db"
files_plate <- "plates/DRSC_TF_Library_Distribution.xls"

wells_control <- data.frame(
well = c("C03", "C22", "D12", "D13", "E05", "E20", "F11", "F14",
"G07", "G18", "H09", "H16", "I09", "I16", "J07", "J18",
"K11", "K14", "L05", "L20", "M12", "M13", "N03", "N22"),
type = "Lacz",
control = "negative") %>%
rbind(data.frame(
type = c("BROWN", "CAL1", "FACT", "Rho1", "thread", "water"),
well = c("I04", "E09", "L03", "E16", "L16", "H21"),
control = c(NA, NA, "positive", NA, NA, NA)
))
## Plate 504 was run at the last minute, and so the controls are not
## reflected in the Excel spreadsheet provided by the core facility.
## Therefore below is the list of added wells, which we will later
## merge with the spreadsheet data:
wells_control_plate504 <- tibble::tribble(
~well, ~symbol,
##---|------
"I04", "BROWN",
"E09", "CAL1",
"L03", "FACT",
"E16", "Rho1",
"L16", "Thread",
"H21", "water") %>%
bind_rows(tibble(
symbol = "LacZ",
well = c("C03", "C22", "D12", "D13", "E05", "E20", "F11",
"F14", "G07", "G18", "H09", "H16", "I09", "I16",
"J07", "J18", "K11", "K14", "L05", "L20", "M12",
"M13", "N03", "N22")))
## FIXME: Need to confirm what type of control Thread is.
wells_layout <-
read_excel(files_plate, sheet = 3) %>%
setNames(tolower(names(.))) %>%
select(plate, well, `symbol(s)`, `fbgn(s)`, amplicon) %>%
rename(symbol = `symbol(s)`,
fbgn = `fbgn(s)`)

wells_control_comparison <- merge(
wells_layout %>% filter(well %in% wells_control_plate504$well,
plate %in% c(501:504, 507)) %>%
select(-fbgn, -amplicon) %>% spread(plate, symbol),
wells_control_plate504 %>% rename(`504_new` = symbol)) %>%
.[, order(colnames(.))] %>% select(well, everything())
write_csv(wells_control_comparison, "plate504-different-controls.csv")

wells_layout %<>%
bind_rows(mutate(wells_control_plate504, plate = 504)) %>%
arrange(plate, well) %>%
mutate(control = ifelse(
symbol %in% c("CAL1", "FACT", "Rho1"), ## Rho1 gives binucleates
"positive", NA)) %>%
mutate(control = ifelse(
symbol %in% c("LacZ", "Empty", "BROWN", "water"),
"negative", control)) %>%
mutate(control = ifelse(
is.na(control) & is.na(amplicon),
"unknown", control))

## List all controls
print(wells_layout %>% filter(!is.na(control)) %>% .$symbol %>% unique)

## Database boilerplate.
driver <- dbDriver("SQLite")
Expand Down Expand Up @@ -51,34 +94,20 @@ wells <- merge(cells, select(images, imagenumber, image_metadata_well,
group_by(image_metadata_well, image_metadata_plate) %>%
summarise(n = n(), n_coloc = sum(ect_classify_coloc)) %>%
rename(well = image_metadata_well,
plate = image_metadata_plate)
plate = image_metadata_plate) %>%
mutate(plate = ifelse(plate == "160415_015529-V", "504",
str_extract(plate, ".{3}"))) %>%
mutate(plate = as.numeric(plate)) # For subsequent merge.

# Check the controls.
controls_plate1 <- merge(wells, wells_control) %>%
filter(plate == "160415_015529-V") %>%
group_by(type, control) %>%
summarise(n = sum(n),
n_coloc = sum(n_coloc),
coloc = n_coloc / n)
print(controls_plate1)

controls_all <- merge(wells, wells_control) %>%
group_by(type, control) %>%
controls_all <- merge(wells, filter(wells_layout, ! is.na(control))) %>%
group_by(symbol, control) %>%
summarise(n = sum(n), n_coloc = sum(n_coloc), coloc = n_coloc / n)
print(controls_all)

wells_rnai <- data.frame(
well = setdiff(wells$well, wells_control$well),
type = "RNAi",
control = NA)

rnai <- filter(wells, well %in% wells_rnai$well) %>%
mutate(coloc = n_coloc / n)
print(rnai)

wells_type_all <- rbind(wells_rnai, wells_control)
wells_all <- merge(wells, wells_type_all) %>%
mutate(coloc = n_coloc / n)
## wells_repeated <- group_by(wells_all, symbol) %>%
## summarise(n = n(), m = mean(p_value), sd = sd(p_value)) %>%
## filter(n > 1)

## Apply statistical significance test.
##
Expand All @@ -96,11 +125,15 @@ p_value <- function(x) {
x <- x[1L, 1L]
phyper(x - 1, m, n, k, lower.tail = FALSE)
}
wells_all <- merge(wells, wells_layout) %>%
mutate(coloc = n_coloc / n)
cells <- filter(wells_all) %>% select(n) %>% unlist
overlaps <- filter(wells_all) %>% select(n_coloc) %>%
unlist
cells_all <- sum(filter(cells, type == "RNAi"))
overlaps_all <- sum(filter(overlaps, type == "RNAi"))
cells_all <- filter(wells_all, is.na(control)) %>% select(n) %>%
unlist %>% sum
overlaps_all <- filter(wells_all, is.na(control)) %>%
select(n_coloc) %>% unlist %>% sum
contingency_matrix <- rbind(cells, overlaps,
cells_all, overlaps_all)
## Organize array to have 2x2 margin instead of 1x4.
Expand All @@ -109,5 +142,10 @@ p_values_uncorrected <- apply(contingency_matrix, 3, p_value)
p_values <- p.adjust(p_values_uncorrected, method = "bonferroni")
wells_all$p_value <- p_values_uncorrected

write.csv(wells_all %>% arrange(p_value), file = "rnai-p_values.csv",
quote = FALSE, row.names = FALSE)
## Check repeated RNAi wells.
wells_repeated <- group_by(wells_all, symbol) %>%
summarise(n = n(), m = mean(coloc), sd = sd(coloc)) %>%
filter(n > 1)
print(summary(wells_repeated))

write_csv(wells_all %>% arrange(p_value), "rnai-p_values.csv")
Binary file added plates/DRSC_TF_Library_Distribution.xls
Binary file not shown.

0 comments on commit 0916ac5

Please sign in to comment.