diff --git a/analysis-only_overlap.R b/analysis-only_overlap.R index 3876b63..98ab98f 100644 --- a/analysis-only_overlap.R +++ b/analysis-only_overlap.R @@ -1,10 +1,8 @@ suppressPackageStartupMessages({ - library(RSQLite) # TODO: Use dplyr instead of RSQLite. - library(tidyr) - library(dplyr) - library(readr) + library(tidyverse) library(readxl) library(stringr) + library(methods) # For Rscript per http://stackoverflow.com/a/41797025 }) ## Configuration values. @@ -62,39 +60,28 @@ wells_layout %<>% ## List all controls print(wells_layout %>% filter(!is.na(control)) %>% .$symbol %>% unique) -## Database boilerplate. -driver <- dbDriver("SQLite") -con <- dbConnect(driver, dbname = file_db) - -## Table inspection functions. There are hundreds of fields, so it -## helps to grep through them. -db_list_all_fields <- function() { - tables <- dbListTables(con) # nolint - sapply(tables, dbListFields, con = con) # nolint -} -db_grep_fields <- function(pattern = "") { - db_list_all_fields() %>% unlist() %>% - grep(pattern = pattern, value = TRUE, ignore.case = TRUE) -} +## Connect to the database. +db <- src_sqlite(file_db) ## Read in objects table and summarize. Only consider cells with 1-2 ## ectopic centromeres; assume other cells are bad. -cells <- dbReadTable(con, "Per_Object") %>% setNames(tolower(names(.))) %>% - filter(nuc_children_ect_count == 1 | nuc_children_ect_count == 2) +cells <- tbl(db, "Per_Object") %>% + select(ImageNumber, ect_Classify_coloc, nuc_Children_ect_Count) %>% + filter(nuc_Children_ect_Count == 1 | nuc_Children_ect_Count == 2) ## The image table has metadata about well numbers, plate numbers, ## etc. -images <- dbReadTable(con, "Per_Image") %>% setNames(tolower(names(.))) +images <- tbl(db, "Per_Image") %>% + select(ImageNumber, Image_Metadata_Well, Image_Metadata_Plate) ## Create well summary from images and cells. `imagenumber` is a ## primary key. -wells <- merge(cells, select(images, imagenumber, image_metadata_well, - image_metadata_plate)) %>% - replace_na(list(ect_classify_coloc = 0)) %>% - group_by(image_metadata_well, image_metadata_plate) %>% - summarise(n = n(), n_coloc = sum(ect_classify_coloc)) %>% - rename(well = image_metadata_well, - plate = image_metadata_plate) %>% +wells <- inner_join(cells, images) %>% collect(n = Inf) %>% + replace_na(list(ect_Classify_coloc = 0)) %>% + group_by(Image_Metadata_Well, Image_Metadata_Plate) %>% + summarise(n = n(), n_coloc = sum(ect_Classify_coloc)) %>% + rename(well = Image_Metadata_Well, + plate = Image_Metadata_Plate) %>% mutate(plate = ifelse(plate == "160415_015529-V", "504", str_extract(plate, ".{3}"))) %>% mutate(plate = as.numeric(plate)) # For subsequent merge.