From 76755f4a100584237c1a97a2b6d4be25ee4e0d61 Mon Sep 17 00:00:00 2001 From: Pariksheet Nanda Date: Wed, 18 Jan 2017 14:33:39 -0500 Subject: [PATCH] ENH: Initial commit --- .gitignore | 8 + analysis-only_overlap.R | 52 ++++++ cellprofiler_all-plates/Makefile | 3 + cellprofiler_all-plates/all-plates.properties | 176 ++++++++++++++++++ 4 files changed, 239 insertions(+) create mode 100644 .gitignore create mode 100644 analysis-only_overlap.R create mode 100644 cellprofiler_all-plates/Makefile create mode 100644 cellprofiler_all-plates/all-plates.properties diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c82c184 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +# Data +cellprofiler_all-plates.tar.xz +cellprofiler_all-plates/all-plates.db +z_stack.tar.xz +z_stack/ + +# Log files from CellProfiler Analyst +*.log diff --git a/analysis-only_overlap.R b/analysis-only_overlap.R new file mode 100644 index 0000000..4ba1d12 --- /dev/null +++ b/analysis-only_overlap.R @@ -0,0 +1,52 @@ +suppressPackageStartupMessages({ + library(RSQLite) + library(tidyr) + library(dplyr) +}) + +## Configuration values. +file_db <- "cellprofiler_all-plates/all-plates.db" +wells_control <- c( + "C03", # Should be 25-30% + "E05", # Should be 25-30% + "E09", # Should be <= 5% + "H21", # Should be 25-30% + "I04", # Should be 25-30% + "L09" # Should be 5-15% +) + +## Database boilerplate. +driver <- dbDriver("SQLite") +con <- dbConnect(driver, dbname = file_db) + +## Table inspection functions. There are hundreds of fields, so it +## helps to grep through them. +db_list_all_fields <- function() { + tables <- dbListTables(con) # nolint + sapply(tables, dbListFields, con = con) # nolint +} +db_grep_fields <- function(pattern = "") { + db_list_all_fields() %>% unlist() %>% + grep(pattern = pattern, value = TRUE, ignore.case = TRUE) +} + +## Read in objects table and summarize. Only consider cells with 1-2 +## ectopic centromeres; assume other cells are bad. +cells <- dbReadTable(con, "Per_Object") %>% setNames(tolower(names(.))) %>% + filter(nuc_children_ect_count == 1 | nuc_children_ect_count == 2) + +## The image table has metadata about well numbers, plate numbers, +## etc. +images <- dbReadTable(con, "Per_Image") %>% setNames(tolower(names(.))) + +## Create well summary from images and cells. `imagenumber` is a +## primary key. +wells <- merge(cells, select(images, imagenumber, image_metadata_well)) %>% + replace_na(list(ect_classify_coloc = 0)) %>% + group_by(image_metadata_well) %>% + summarise(n = n(), n_coloc = sum(ect_classify_coloc)) + +# Check the controls. +controls <- filter(wells, image_metadata_well %in% wells_control) %>% + mutate(coloc = n_coloc / n) +print(controls) diff --git a/cellprofiler_all-plates/Makefile b/cellprofiler_all-plates/Makefile new file mode 100644 index 0000000..3af2fd6 --- /dev/null +++ b/cellprofiler_all-plates/Makefile @@ -0,0 +1,3 @@ +.PHONY : analyst +analyst : + ~/bin/cellprofiler-analyst all-plates.properties 2>&1 | tee cellprofiler-analyst-$(date -Isec).log diff --git a/cellprofiler_all-plates/all-plates.properties b/cellprofiler_all-plates/all-plates.properties new file mode 100644 index 0000000..e925cb6 --- /dev/null +++ b/cellprofiler_all-plates/all-plates.properties @@ -0,0 +1,176 @@ +#Wed Jan 4 22:42:40 2017 +# ============================================== +# +# CellProfiler Analyst 2.0 properties file +# +# ============================================== + +# ==== Database Info ==== +db_type = sqlite +db_sqlite_file = /work/pan14001/cellprofiler_all-plates/all-plates.db + +# ==== Database Tables ==== +image_table = Per_Image +object_table = Per_Object + +# ==== Database Columns ==== +# Specify the database column names that contain unique IDs for images and +# objects (and optionally tables). +# +# table_id (OPTIONAL): This field lets Classifier handle multiple tables if +# you merge them into one and add a table_number column as a foreign +# key to your per-image and per-object tables. +# image_id: must be a foreign key column between your per-image and per-object +# tables +# object_id: the object key column from your per-object table + +image_id = ImageNumber +object_id = ObjectNumber +plate_id = Image_Metadata_Plate +well_id = Image_Metadata_Well +series_id = Image_Group_Number +group_id = Image_Group_Number +timepoint_id = Image_Group_Index + +# Also specify the column names that contain X and Y coordinates for each +# object within an image. +cell_x_loc = nuc_Location_Center_X +cell_y_loc = nuc_Location_Center_Y + +# ==== Image Path and File Name Columns ==== +# Classifier needs to know where to find the images from your experiment. +# Specify the column names from your per-image table that contain the image +# paths and file names here. +# +# Individual image files are expected to be monochromatic and represent a single +# channel. However, any number of images may be combined by adding a new channel +# path and filename column to the per-image table of your database and then +# adding those column names here. +# +# NOTE: These lists must have equal length! +image_path_cols = Image_PathName_nuc_raw,Image_PathName_ect_raw,Image_PathName_cen_raw +image_file_cols = Image_FileName_nuc_raw,Image_FileName_ect_raw,Image_FileName_cen_raw + +# CPA will now read image thumbnails directly from the database, if chosen in ExportToDatabase. +image_thumbnail_cols = Image_Thumbnail_cen,Image_Thumbnail_ect,Image_Thumbnail_im_outlines,Image_Thumbnail_nuc + +# Give short names for each of the channels (respectively)... +image_names = nuc_raw,ect_raw,cen_raw + +# Specify a default color for each of the channels (respectively) +# Valid colors are: [red, green, blue, magenta, cyan, yellow, gray, none] +image_channel_colors = red, green, blue, cyan, magenta, yellow, gray + +# ==== Image Accesss Info ==== +image_url_prepend = + +# ==== Dynamic Groups ==== +# Here you can define groupings to choose from when classifier scores your experiment. (eg: per-well) +# This is OPTIONAL, you may leave "groups = ". +# FORMAT: +# group_XXX = MySQL select statement that returns image-keys and group-keys. This will be associated with the group name "XXX" from above. +# EXAMPLE GROUPS: +# groups = Well, Gene, Well+Gene, +# group_SQL_Well = SELECT Per_Image_Table.TableNumber, Per_Image_Table.ImageNumber, Per_Image_Table.well FROM Per_Image_Table +# group_SQL_Gene = SELECT Per_Image_Table.TableNumber, Per_Image_Table.ImageNumber, Well_ID_Table.gene FROM Per_Image_Table, Well_ID_Table WHERE Per_Image_Table.well=Well_ID_Table.well +# group_SQL_Well+Gene = SELECT Per_Image_Table.TableNumber, Per_Image_Table.ImageNumber, Well_ID_Table.well, Well_ID_Table.gene FROM Per_Image_Table, Well_ID_Table WHERE Per_Image_Table.well=Well_ID_Table.well + + + +# ==== Image Filters ==== +# Here you can define image filters to let you select objects from a subset of your experiment when training the classifier. +# FORMAT: +# filter_SQL_XXX = MySQL select statement that returns image keys you wish to filter out. This will be associated with the filter name "XXX" from above. +# EXAMPLE FILTERS: +# filters = EMPTY, CDKs, +# filter_SQL_EMPTY = SELECT TableNumber, ImageNumber FROM CPA_per_image, Well_ID_Table WHERE CPA_per_image.well=Well_ID_Table.well AND Well_ID_Table.Gene="EMPTY" +# filter_SQL_CDKs = SELECT TableNumber, ImageNumber FROM CPA_per_image, Well_ID_Table WHERE CPA_per_image.well=Well_ID_Table.well AND Well_ID_Table.Gene REGEXP 'CDK.*' + + + +# ==== Meta data ==== +# What are your objects called? +# FORMAT: +# object_name = singular object name, plural object name, +object_name = cell, cells, + +# What size plates were used? 96, 384 or 5600? This is for use in the PlateViewer. Leave blank if none +plate_type = 384 + +# ==== Excluded Columns ==== +# OPTIONAL +# Classifier uses columns in your per_object table to find rules. It will +# automatically ignore ID columns defined in table_id, image_id, and object_id +# as well as any columns that contain non-numeric data. +# +# Here you may list other columns in your per_object table that you wish the +# classifier to ignore when finding rules. +# +# You may also use regular expressions here to match more general column names. +# +# Example: classifier_ignore_columns = WellID, Meta_.*, .*_Position +# This will ignore any column named "WellID", any columns that start with +# "Meta_", and any columns that end in "_Position". +# +# A more restrictive example: +# classifier_ignore_columns = ImageNumber, ObjectNumber, .*Parent.*, .*Children.*, .*_Location_Center_.*,.*_Metadata_.* + +classifier_ignore_columns = table_number_key_column, image_number_key_column, object_number_key_column + +# ==== Other ==== +# Specify the approximate diameter of your objects in pixels here. +image_tile_size = 50 + +# Provides the image width and height. Used for per-image classification. +# If not set, it will be obtained from the Image_Width and Image_Height +# measurements in CellProfiler. + +# image_width = 1000 +# image_height = 1000 + +# OPTIONAL +# Image Gallery can use a different tile size (in pixels) to create thumbnails for images +# If not set, it will be the same as image_tile_size + +image_size = + +# ======== Classification type ======== +# OPTIONAL +# CPA 2.2.0 allows image classification instead of object classification. +# If left blank or set to "object", then Classifier will fetch objects (default). +# If set to "image", then Classifier will fetch whole images instead of objects. + +classification_type = + +# ======== Auto Load Training Set ======== +# OPTIONAL +# You may enter the full path to a training set that you would like Classifier +# to automatically load when started. + +training_set = + +# ======== Area Based Scoring ======== +# OPTIONAL +# You may specify a column in your per-object table which will be summed and +# reported in place of object-counts when scoring. The typical use for this +# is to report the areas of objects on a per-image or per-group basis. + +area_scoring_column = + +# ======== Output Per-Object Classes ======== +# OPTIONAL +# Here you can specify a MySQL table in your Database where you would like +# Classifier to write out class information for each object in the +# object_table + +class_table = colocalization + +# ======== Check Tables ======== +# OPTIONAL +# [yes/no] You can ask classifier to check your tables for anomalies such +# as orphaned objects or missing column indices. Default is on. +# This check is run when Classifier starts and may take up to a minute if +# your object_table is extremely large. + +check_tables = yes + \ No newline at end of file