Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
ENH: Use higher performance dplyr database integration
- Replace packages with tidyverse and dplyr sqlite integration.
- Rscript run time is now 3 seconds instead of several minutes.
  • Loading branch information
pan14001 committed Feb 7, 2017
1 parent aed1175 commit d97d74d
Showing 1 changed file with 15 additions and 28 deletions.
43 changes: 15 additions & 28 deletions analysis-only_overlap.R
@@ -1,10 +1,8 @@
suppressPackageStartupMessages({
library(RSQLite) # TODO: Use dplyr instead of RSQLite.
library(tidyr)
library(dplyr)
library(readr)
library(tidyverse)
library(readxl)
library(stringr)
library(methods) # For Rscript per http://stackoverflow.com/a/41797025
})

## Configuration values.
Expand Down Expand Up @@ -62,39 +60,28 @@ wells_layout %<>%
## List all controls
print(wells_layout %>% filter(!is.na(control)) %>% .$symbol %>% unique)

## Database boilerplate.
driver <- dbDriver("SQLite")
con <- dbConnect(driver, dbname = file_db)

## Table inspection functions. There are hundreds of fields, so it
## helps to grep through them.
db_list_all_fields <- function() {
tables <- dbListTables(con) # nolint
sapply(tables, dbListFields, con = con) # nolint
}
db_grep_fields <- function(pattern = "") {
db_list_all_fields() %>% unlist() %>%
grep(pattern = pattern, value = TRUE, ignore.case = TRUE)
}
## Connect to the database.
db <- src_sqlite(file_db)

## Read in objects table and summarize. Only consider cells with 1-2
## ectopic centromeres; assume other cells are bad.
cells <- dbReadTable(con, "Per_Object") %>% setNames(tolower(names(.))) %>%
filter(nuc_children_ect_count == 1 | nuc_children_ect_count == 2)
cells <- tbl(db, "Per_Object") %>%
select(ImageNumber, ect_Classify_coloc, nuc_Children_ect_Count) %>%
filter(nuc_Children_ect_Count == 1 | nuc_Children_ect_Count == 2)

## The image table has metadata about well numbers, plate numbers,
## etc.
images <- dbReadTable(con, "Per_Image") %>% setNames(tolower(names(.)))
images <- tbl(db, "Per_Image") %>%
select(ImageNumber, Image_Metadata_Well, Image_Metadata_Plate)

## Create well summary from images and cells. `imagenumber` is a
## primary key.
wells <- merge(cells, select(images, imagenumber, image_metadata_well,
image_metadata_plate)) %>%
replace_na(list(ect_classify_coloc = 0)) %>%
group_by(image_metadata_well, image_metadata_plate) %>%
summarise(n = n(), n_coloc = sum(ect_classify_coloc)) %>%
rename(well = image_metadata_well,
plate = image_metadata_plate) %>%
wells <- inner_join(cells, images) %>% collect(n = Inf) %>%
replace_na(list(ect_Classify_coloc = 0)) %>%
group_by(Image_Metadata_Well, Image_Metadata_Plate) %>%
summarise(n = n(), n_coloc = sum(ect_Classify_coloc)) %>%
rename(well = Image_Metadata_Well,
plate = Image_Metadata_Plate) %>%
mutate(plate = ifelse(plate == "160415_015529-V", "504",
str_extract(plate, ".{3}"))) %>%
mutate(plate = as.numeric(plate)) # For subsequent merge.
Expand Down

0 comments on commit d97d74d

Please sign in to comment.