From 76755f4a100584237c1a97a2b6d4be25ee4e0d61 Mon Sep 17 00:00:00 2001
From: Pariksheet Nanda <pariksheet.nanda@uconn.edu>
Date: Wed, 18 Jan 2017 14:33:39 -0500
Subject: [PATCH] ENH: Initial commit

---
 .gitignore                                    |   8 +
 analysis-only_overlap.R                       |  52 ++++++
 cellprofiler_all-plates/Makefile              |   3 +
 cellprofiler_all-plates/all-plates.properties | 176 ++++++++++++++++++
 4 files changed, 239 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 analysis-only_overlap.R
 create mode 100644 cellprofiler_all-plates/Makefile
 create mode 100644 cellprofiler_all-plates/all-plates.properties

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c82c184
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,8 @@
+# Data
+cellprofiler_all-plates.tar.xz
+cellprofiler_all-plates/all-plates.db
+z_stack.tar.xz
+z_stack/
+
+# Log files from CellProfiler Analyst
+*.log
diff --git a/analysis-only_overlap.R b/analysis-only_overlap.R
new file mode 100644
index 0000000..4ba1d12
--- /dev/null
+++ b/analysis-only_overlap.R
@@ -0,0 +1,52 @@
+suppressPackageStartupMessages({
+    library(RSQLite)
+    library(tidyr)
+    library(dplyr)
+})
+
+## Configuration values.
+file_db <- "cellprofiler_all-plates/all-plates.db"
+wells_control <- c(
+  "C03", # Should be 25-30%
+  "E05", # Should be 25-30%
+  "E09", # Should be <= 5%
+  "H21", # Should be 25-30%
+  "I04", # Should be 25-30%
+  "L09"  # Should be 5-15%
+)
+
+## Database boilerplate.
+driver <- dbDriver("SQLite")
+con <- dbConnect(driver, dbname = file_db)
+
+## Table inspection functions.  There are hundreds of fields, so it
+## helps to grep through them.
+db_list_all_fields <- function() {
+    tables <- dbListTables(con)             # nolint
+    sapply(tables, dbListFields, con = con) # nolint
+}
+db_grep_fields <- function(pattern = "") {
+    db_list_all_fields() %>% unlist() %>%
+        grep(pattern = pattern, value = TRUE, ignore.case = TRUE)
+}
+
+## Read in objects table and summarize.  Only consider cells with 1-2
+## ectopic centromeres; assume other cells are bad.
+cells <- dbReadTable(con, "Per_Object") %>% setNames(tolower(names(.))) %>%
+    filter(nuc_children_ect_count == 1 | nuc_children_ect_count == 2)
+
+## The image table has metadata about well numbers, plate numbers,
+## etc.
+images <- dbReadTable(con, "Per_Image") %>% setNames(tolower(names(.)))
+
+## Create well summary from images and cells. `imagenumber` is a
+## primary key.
+wells <- merge(cells, select(images, imagenumber, image_metadata_well)) %>%
+    replace_na(list(ect_classify_coloc = 0)) %>%
+    group_by(image_metadata_well) %>%
+    summarise(n = n(), n_coloc = sum(ect_classify_coloc))
+
+# Check the controls.
+controls <- filter(wells, image_metadata_well %in% wells_control) %>%
+    mutate(coloc = n_coloc / n)
+print(controls)
diff --git a/cellprofiler_all-plates/Makefile b/cellprofiler_all-plates/Makefile
new file mode 100644
index 0000000..3af2fd6
--- /dev/null
+++ b/cellprofiler_all-plates/Makefile
@@ -0,0 +1,3 @@
+.PHONY : analyst
+analyst :
+	~/bin/cellprofiler-analyst all-plates.properties 2>&1 | tee cellprofiler-analyst-$(date -Isec).log
diff --git a/cellprofiler_all-plates/all-plates.properties b/cellprofiler_all-plates/all-plates.properties
new file mode 100644
index 0000000..e925cb6
--- /dev/null
+++ b/cellprofiler_all-plates/all-plates.properties
@@ -0,0 +1,176 @@
+#Wed Jan  4 22:42:40 2017
+# ==============================================
+#
+# CellProfiler Analyst 2.0 properties file
+#
+# ==============================================
+
+# ==== Database Info ====
+db_type         = sqlite
+db_sqlite_file  = /work/pan14001/cellprofiler_all-plates/all-plates.db
+
+# ==== Database Tables ====
+image_table   = Per_Image
+object_table  = Per_Object
+
+# ==== Database Columns ====
+# Specify the database column names that contain unique IDs for images and
+# objects (and optionally tables).
+#
+# table_id (OPTIONAL): This field lets Classifier handle multiple tables if
+#          you merge them into one and add a table_number column as a foreign
+#          key to your per-image and per-object tables.
+# image_id: must be a foreign key column between your per-image and per-object
+#           tables
+# object_id: the object key column from your per-object table
+
+image_id      = ImageNumber
+object_id     = ObjectNumber
+plate_id      = Image_Metadata_Plate
+well_id       = Image_Metadata_Well
+series_id     = Image_Group_Number
+group_id      = Image_Group_Number
+timepoint_id  = Image_Group_Index
+
+# Also specify the column names that contain X and Y coordinates for each
+# object within an image.
+cell_x_loc    = nuc_Location_Center_X
+cell_y_loc    = nuc_Location_Center_Y
+
+# ==== Image Path and File Name Columns ====
+# Classifier needs to know where to find the images from your experiment.
+# Specify the column names from your per-image table that contain the image
+# paths and file names here.
+#
+# Individual image files are expected to be monochromatic and represent a single
+# channel. However, any number of images may be combined by adding a new channel
+# path and filename column to the per-image table of your database and then
+# adding those column names here.
+#
+# NOTE: These lists must have equal length!
+image_path_cols = Image_PathName_nuc_raw,Image_PathName_ect_raw,Image_PathName_cen_raw
+image_file_cols = Image_FileName_nuc_raw,Image_FileName_ect_raw,Image_FileName_cen_raw
+
+# CPA will now read image thumbnails directly from the database, if chosen in ExportToDatabase.
+image_thumbnail_cols = Image_Thumbnail_cen,Image_Thumbnail_ect,Image_Thumbnail_im_outlines,Image_Thumbnail_nuc
+
+# Give short names for each of the channels (respectively)...
+image_names = nuc_raw,ect_raw,cen_raw
+
+# Specify a default color for each of the channels (respectively)
+# Valid colors are: [red, green, blue, magenta, cyan, yellow, gray, none]
+image_channel_colors = red, green, blue, cyan, magenta, yellow, gray
+
+# ==== Image Accesss Info ====
+image_url_prepend = 
+
+# ==== Dynamic Groups ====
+# Here you can define groupings to choose from when classifier scores your experiment.  (eg: per-well)
+# This is OPTIONAL, you may leave "groups = ".
+# FORMAT:
+#   group_XXX  =  MySQL select statement that returns image-keys and group-keys.  This will be associated with the group name "XXX" from above.
+# EXAMPLE GROUPS:
+#   groups               =  Well, Gene, Well+Gene,
+#   group_SQL_Well       =  SELECT Per_Image_Table.TableNumber, Per_Image_Table.ImageNumber, Per_Image_Table.well FROM Per_Image_Table
+#   group_SQL_Gene       =  SELECT Per_Image_Table.TableNumber, Per_Image_Table.ImageNumber, Well_ID_Table.gene FROM Per_Image_Table, Well_ID_Table WHERE Per_Image_Table.well=Well_ID_Table.well
+#   group_SQL_Well+Gene  =  SELECT Per_Image_Table.TableNumber, Per_Image_Table.ImageNumber, Well_ID_Table.well, Well_ID_Table.gene FROM Per_Image_Table, Well_ID_Table WHERE Per_Image_Table.well=Well_ID_Table.well
+
+
+
+# ==== Image Filters ====
+# Here you can define image filters to let you select objects from a subset of your experiment when training the classifier.
+# FORMAT:
+#   filter_SQL_XXX  =  MySQL select statement that returns image keys you wish to filter out.  This will be associated with the filter name "XXX" from above.
+# EXAMPLE FILTERS:
+#   filters           =  EMPTY, CDKs,
+#   filter_SQL_EMPTY  =  SELECT TableNumber, ImageNumber FROM CPA_per_image, Well_ID_Table WHERE CPA_per_image.well=Well_ID_Table.well AND Well_ID_Table.Gene="EMPTY"
+#   filter_SQL_CDKs   =  SELECT TableNumber, ImageNumber FROM CPA_per_image, Well_ID_Table WHERE CPA_per_image.well=Well_ID_Table.well AND Well_ID_Table.Gene REGEXP 'CDK.*'
+
+
+
+# ==== Meta data ====
+# What are your objects called?
+# FORMAT:
+#   object_name  =  singular object name, plural object name,
+object_name  =  cell, cells,
+
+# What size plates were used?  96, 384 or 5600?  This is for use in the PlateViewer. Leave blank if none
+plate_type  = 384
+
+# ==== Excluded Columns ====
+# OPTIONAL
+# Classifier uses columns in your per_object table to find rules. It will
+# automatically ignore ID columns defined in table_id, image_id, and object_id
+# as well as any columns that contain non-numeric data.
+#
+# Here you may list other columns in your per_object table that you wish the
+# classifier to ignore when finding rules.
+#
+# You may also use regular expressions here to match more general column names.
+#
+# Example: classifier_ignore_columns = WellID, Meta_.*, .*_Position
+#   This will ignore any column named "WellID", any columns that start with
+#   "Meta_", and any columns that end in "_Position".
+#
+# A more restrictive example:
+# classifier_ignore_columns = ImageNumber, ObjectNumber, .*Parent.*, .*Children.*, .*_Location_Center_.*,.*_Metadata_.*
+
+classifier_ignore_columns  =  table_number_key_column, image_number_key_column, object_number_key_column
+
+# ==== Other ====
+# Specify the approximate diameter of your objects in pixels here.
+image_tile_size   =  50
+
+# Provides the image width and height. Used for per-image classification.
+# If not set, it will be obtained from the Image_Width and Image_Height
+# measurements in CellProfiler.
+
+# image_width  = 1000
+# image_height = 1000
+
+# OPTIONAL
+# Image Gallery can use a different tile size (in pixels) to create thumbnails for images
+# If not set, it will be the same as image_tile_size
+
+image_size =
+
+# ======== Classification type ========
+# OPTIONAL
+# CPA 2.2.0 allows image classification instead of object classification.
+# If left blank or set to "object", then Classifier will fetch objects (default).
+# If set to "image", then Classifier will fetch whole images instead of objects.
+
+classification_type  = 
+
+# ======== Auto Load Training Set ========
+# OPTIONAL
+# You may enter the full path to a training set that you would like Classifier
+# to automatically load when started.
+
+training_set  =
+
+# ======== Area Based Scoring ========
+# OPTIONAL
+# You may specify a column in your per-object table which will be summed and
+# reported in place of object-counts when scoring.  The typical use for this
+# is to report the areas of objects on a per-image or per-group basis.
+
+area_scoring_column =
+
+# ======== Output Per-Object Classes ========
+# OPTIONAL
+# Here you can specify a MySQL table in your Database where you would like
+# Classifier to write out class information for each object in the
+# object_table
+
+class_table  = colocalization
+
+# ======== Check Tables ========
+# OPTIONAL
+# [yes/no]  You can ask classifier to check your tables for anomalies such
+# as orphaned objects or missing column indices.  Default is on.
+# This check is run when Classifier starts and may take up to a minute if
+# your object_table is extremely large.
+
+check_tables = yes
+    
\ No newline at end of file