Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
NCC_RNAseq_45X/NCC_analysis_OPP_v01_pass.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
369 lines (332 sloc)
18.1 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# NCC_analysis.R - Owen Chambers | |
require(tidyverse) | |
require(ggpubr) | |
require(ggbeeswarm) | |
# define color palette for clone level plots | |
col.pal <- c( | |
"deepskyblue", "blue", | |
"springgreen3", "darkgreen") | |
# input CSV from CellProfiler with IntegratedIntensity of DAPI and MeanIntensity of DAPI/SOX10/OPP | |
data <- read.csv(file.choose(), header = TRUE) | |
# mutate columns to label ploidy, round, and clone | |
# normalize OPP intensity by the dimmest OPP object in each image | |
total <- as_tibble(data) |> | |
mutate(slide = str_extract(FileName_Nuclei, pattern = "_[TB]")) |> | |
mutate(round = str_extract(FileName_Nuclei, pattern = "R[0-9]*")) |> | |
mutate(ploidy = str_extract(FileName_Nuclei, pattern = "X[XYO]*")) |> | |
mutate(image = str_extract(FileName_Nuclei, pattern = "f[0-9][0-9]")) |> | |
mutate(nM = str_replace(str_trim(str_extract(FileName_Nuclei, pattern = "[0-9]*nM"), side = "left"), "nM", "")) |> | |
mutate(nM = as.numeric(nM)) |> | |
mutate(nM = case_when(is.na(nM) ~ 0, T ~ nM)) |> | |
mutate(clone = case_when(round %in% c("R14","R16") ~ paste0(ploidy,"1"), | |
round == "R27" ~ paste0(ploidy, str_replace(str_extract(FileName_Nuclei, "c[0-9]*"), "c", "")), | |
round == "R29" ~ "XY1", | |
round == "R" ~ paste0(ploidy, str_extract(str_extract(FileName_Nuclei, "male[12]"), "[12]"), "_", str_extract(FileName_Nuclei, "[no][el][dw]")), | |
round %in% c("R34") ~ paste0(ploidy, str_extract(str_extract(FileName_Nuclei, "male[12]"), "[12]")), | |
T ~ paste0(ploidy, str_replace(str_extract(FileName_Nuclei, "m[0-9]*"), "m", "")))) |> | |
mutate(round = case_when(round == "R" ~ "R1", | |
round == "R29" ~ "R2", | |
T ~ round)) |> | |
mutate(clone = str_replace(clone, "XY2", "XY70")) |> | |
mutate(clone = str_replace(clone, "XO2", "XO7")) |> | |
mutate(clone = case_when(round == "R3" ~ paste0(clone, "_new"), T ~ clone)) |> | |
mutate(ploidy = case_when(round == "R2" ~ str_extract(clone, "X[YO]"), | |
T ~ ploidy)) |> | |
mutate(panel = case_when(str_detect(clone, "1") ~ "male1", T ~ "male2")) |> | |
filter(!(round %in% c("R1","R2"))) | |
outdir<-"all_final_pass" | |
dir.create(outdir) | |
quant <- 0.05 | |
Z<-6 | |
total_quantiles <- total |> group_by(ImageNumber) |> | |
dplyr::summarize(lower_nuclei_int = quantile(Intensity_IntegratedIntensity_AlignedNuclei, quant), | |
upper_nuclei_int = quantile(Intensity_IntegratedIntensity_AlignedNuclei, 1-quant)) | |
total_no_outliers <- total |> left_join(total_quantiles, by = c("ImageNumber")) |> | |
filter(Intensity_IntegratedIntensity_AlignedNuclei > lower_nuclei_int & | |
Intensity_IntegratedIntensity_AlignedNuclei < upper_nuclei_int) | |
# normalize SOX10 intensity by dividing each object's intensity by the ratio of | |
# the dimmest SOX10 object in its image to the dimmest SOX10 object across all images | |
normalized_sox10 <- total_no_outliers |> | |
group_by(ImageNumber) |> | |
dplyr::summarize( | |
min_sox10_object_int = quantile(Intensity_MeanIntensity_AlignedSOX10, quant), | |
max_sox10_object_int = quantile(Intensity_MeanIntensity_AlignedSOX10, 1-quant), | |
sum_sox10 = sum(Intensity_MeanIntensity_AlignedSOX10), | |
cells_per_image = n()) |> | |
right_join(total_no_outliers, by = "ImageNumber") |> | |
mutate(sox10_object_ratio = min_sox10_object_int/quantile(min_sox10_object_int, quant), .before = 2) |> | |
mutate(sox10_normalized_int = (Intensity_MeanIntensity_AlignedSOX10/sox10_object_ratio), .before = 2) | |
#thresholded | |
cutoff <- normalized_sox10 |> ungroup() |> filter(round %in% c("R3")) |> | |
dplyr::summarize(total_cells = n(), | |
background_mean = mean(sox10_normalized_int), | |
stdev = sd(sox10_normalized_int), | |
cutoff = background_mean + (stdev * Z)) |> pull(cutoff) | |
cutoff | |
# ggdensity(normalized_sox10 |> filter(nM == 0), x = "sox10_normalized_int", | |
# color = "clone", linetype = "ploidy", facet.by = c("round","panel"), scales = "free_y") + | |
# geom_vline(xintercept = cutoff) | |
#standardized | |
classified_sox10 <- normalized_sox10 |> filter(str_detect(round, "R[1-9][0-9]")) |> | |
group_by(round) |> | |
dplyr::summarize(cells_per_round = n(), | |
mean_sox10 = mean(sox10_normalized_int), | |
sd_sox10 = sd(sox10_normalized_int)) |> | |
dplyr::select(round, mean_sox10, sd_sox10) |> | |
right_join(normalized_sox10 |> filter(str_detect(round, "R[1-9][0-9]")), by = c("round")) |> group_by(round) |> | |
mutate(opp_ratio = Intensity_MeanIntensity_AlignedOPP/quantile(Intensity_MeanIntensity_AlignedOPP, quant), | |
sox10_ratio = (sox10_normalized_int-mean_sox10)/sd_sox10, .before = 1) |> | |
mutate(sox10_status2 = case_when(sox10_normalized_int > cutoff ~ T, | |
sox10_normalized_int <= cutoff ~ F)) |> | |
mutate(sox10_status = case_when(sox10_ratio > 0 ~ T, | |
sox10_ratio <= 0 ~ F)) |> | |
group_by(ImageNumber) |> | |
mutate(sox10_perc = sum(sox10_status == T)/n()) |> | |
mutate(sox10_perc2 = sum(sox10_status2 == T)/n()) | |
# ggdensity(classified_sox10 |> filter(nM == 0), | |
# x = "sox10_ratio", facet.by = c("round", "panel"), | |
# color = "clone", linetype = "ploidy", scales = "free_y") + | |
# geom_vline(xintercept = 0) | |
# ggscatter(classified_sox10 |> group_by(ImageNumber) |> | |
# dplyr::summarize(panel = unique(panel), | |
# ploidy = unique(ploidy), | |
# group = paste(unique(panel), "_", unique(ploidy)), | |
# clone = unique(clone), | |
# sox10 = unique(sox10_perc), | |
# sox102 = unique(sox10_perc2), | |
# mean_opp = mean(opp_ratio), | |
# cells = unique(cells_per_image)), | |
# x = "sox10", y = "sox102", color = "group", | |
# cor.coef = T, add = "reg.line", | |
# facet.by = c("panel","ploidy")) + geom_abline(slope = 1) + | |
# scale_color_manual(values = c(col.pal[1:4])) | |
# | |
# ggscatter(classified_sox10 |> group_by(ImageNumber) |> | |
# dplyr::summarize(panel = unique(panel), | |
# karyotype = unique(ploidy), | |
# group = paste(unique(panel), "_", unique(ploidy)), | |
# clone = unique(clone), | |
# sox10 = unique(sox10_perc), | |
# mean_opp = mean(opp_ratio), | |
# cells = unique(cells_per_image)), | |
# x = "cells", y = "sox10", color = "group", | |
# cor.coef = T, add = "reg.line")+ | |
# facet_grid(rows=vars(panel), cols = vars(karyotype), margins = F) + | |
# scale_color_manual(values = c(col.pal[1:4])) | |
# sort dataset into four groups based on SOX10 status and ploidy | |
sorted_all <- classified_sox10 |> ungroup() |> | |
mutate(group = case_when(ploidy == "XY" & sox10_status ~ "XY+", | |
ploidy == "XY" & !sox10_status ~ "XY-", | |
ploidy == "XO" & sox10_status ~ "XO+", | |
ploidy == "XO" & !sox10_status ~ "XO-")) | |
sorted <- sorted_all |> | |
filter(str_detect(round, "R[1-9][0-9]")) |> | |
mutate(group = fct_reorder(group, opp_ratio, .fun = median, na.rm = TRUE)) | |
# scale OPP intensity values in relation to the median euploid positive intensity of each round | |
scaled_opps <- sorted |> filter(group == "XY+" & nM == 0) |> | |
group_by(panel, round) |> | |
dplyr::summarize(median_per_group = median(opp_ratio)) |> | |
right_join(sorted, by = c("panel", "round")) |> | |
ungroup() |> | |
mutate(scaled_opp = opp_ratio/median_per_group, .before = "panel") | |
ls.comparisons <- combn(scaled_opps |> pull(group) |> levels(), 2, simplify = F) | |
names(ls.comparisons)<-c(1:length(ls.comparisons)) | |
my_comparisons <-ls.comparisons[c(5,4,2)] | |
#by_image: group & OPP, median of image_averages | |
median_n <- scaled_opps |> ungroup() |> filter(nM == 0) |> | |
group_by(ImageNumber, round, group, panel, nM) |> | |
dplyr::summarize(opp = mean(scaled_opp), cells = n(), SOX10 = unique(sox10_status), | |
sox10 = mean(sox10_perc)) |> | |
group_by(group, panel, nM) |> | |
dplyr::summarize(median_opp = signif(median(opp),2), cells = sum(cells), | |
rounds = length(unique(round)), SOX10 = unique(SOX10), images = n(), | |
median_sox10 = signif(median(sox10),2)) |> | |
mutate(nM2 = as.factor(nM)) |> | |
ungroup() | |
scaled_opps |> | |
group_by(ImageNumber, group) |> filter(nM == 0) |> | |
dplyr::summarize( | |
nM = unique(nM), | |
SOX10 = unique(sox10_status), | |
ploidy = unique(ploidy), | |
rounds = unique(round), | |
group = unique(group), | |
panel = unique(panel), | |
opp_scaled = median(scaled_opp), | |
sox10_med = median(sox10_perc)) |> | |
ungroup() |> | |
mutate(group = fct_relevel(group, c("XY+","XY-","XO+","XO-"))) |> | |
ggplot(aes(x = group, y = opp_scaled, color = SOX10, shape = group)) + | |
geom_quasirandom(size = 1, stroke = 0.5) + scale_shape_manual(values = c(16,16,1,1)) + | |
geom_boxplot(color = rgb(0,0,0,0.5), fill = NA, outliers = F) + | |
geom_text(data = median_n, aes(x = group, y = 0.1, fontface = "bold", label = round(median_opp, 2))) + | |
geom_text(data = median_n, colour = "black", aes(x = group, y = -0.1, label = paste0("(",rounds,")"))) + | |
facet_grid(rows = vars(panel), cols = vars(nM), drop = T) + #coord_cartesian(ylim=c(-0.1,1.6)) + | |
stat_compare_means(ref.group = "XY+", label.y = 1.4, show.legend = T, hide.ns = F, label = "p.signif") + | |
theme_bw() + theme(strip.text.x = element_blank()) | |
ggsave(paste0(outdir,"/OPP_bygroup_image.pdf"),width = 4, height =4) | |
#by_image: group & OPP, median of averages | |
median_n <- scaled_opps |> ungroup() |> | |
group_by(ImageNumber, round, group, panel, nM) |> | |
dplyr::summarize(opp = mean(scaled_opp), cells = n(), SOX10 = unique(sox10_status), ploidy = unique(ploidy), | |
sox10 = mean(sox10_perc)) |> | |
group_by(group, ploidy, panel, nM) |> | |
dplyr::summarize(median_opp = signif(median(opp),2), cells = sum(cells), | |
rounds = length(unique(round)), SOX10 = unique(SOX10), images = n(), | |
median_sox10 = signif(median(sox10),2)) |> | |
mutate(nM2 = as.factor(nM)) |> | |
ungroup() | |
scaled_opps |> | |
group_by(ImageNumber, group) |> | |
dplyr::summarize( | |
nM = unique(nM), | |
SOX10 = unique(sox10_status), | |
ploidy = unique(ploidy), | |
rounds = unique(round), | |
group = unique(group), | |
panel = unique(panel), | |
opp_scaled = median(scaled_opp), | |
sox10_med = median(sox10_perc)) |> | |
ungroup() |> | |
mutate(group = fct_relevel(group, c("XY+","XY-","XO+","XO-"))) |> | |
ggplot(aes(x = group, y = opp_scaled, color = SOX10, shape = group)) + | |
geom_quasirandom(size = 1, stroke = 0.5) + scale_shape_manual(values = c(16,16,1,1)) + | |
geom_boxplot(color = rgb(0,0,0,0.5), fill = NA, outliers = F) + | |
geom_text(data = median_n, aes(x = group, y = 0.1, fontface = "bold", label = round(median_opp, 2))) + | |
geom_text(data = median_n, colour = "black", aes(x = group, y = -0.1, label = paste0("(",rounds,")"))) + | |
facet_grid(rows = vars(panel), cols = vars(nM)) + coord_cartesian(ylim=c(-0.1,1.6)) + | |
stat_compare_means(ref.group = "XY+", label.y = 1.4, show.legend = T, hide.ns = F, label = "p.signif") + | |
theme_bw() #+ theme(legend.position = "top", legend.direction = "horizontal") | |
ggsave(paste0(outdir,"/OPP_bygroup_image_rap.pdf"),width = 7.5, height =4) | |
median_n <- scaled_opps |> | |
ungroup() |> | |
group_by(group, panel, nM) |> | |
dplyr::summarize(median_opp = median(scaled_opp), cells = n(), SOX10 = unique(sox10_status), ploidy = unique(ploidy), | |
sox10 = mean(sox10_perc), images=length(unique(ImageNumber))) | |
scaled_opps |> | |
rename(SOX10 = "sox10_status") |> | |
mutate(group = fct_relevel(group, c("XY+","XY-","XO+","XO-"))) |> | |
ggplot(aes(x = group, y = scaled_opp, fill = SOX10, linetype = ploidy)) + | |
geom_violin(scale = "count", draw_quantiles = c (0.5), show.legend = T) + | |
scale_linetype_manual(values = c("22","solid")) + | |
geom_text(size=3, data = median_n, aes(x = group, y = 0.1, fontface = "bold", label = round(median_opp, 2), color = SOX10)) + | |
geom_text(size=3, data = median_n, aes(x = group, y = -0.1, label = paste0("(",images,")"))) + | |
facet_grid(rows = vars(panel), cols = vars(nM)) + ylim(c(-0.1,3)) + | |
stat_compare_means(ref.group = "XY+", label.y = 2.6, label = "p.signif", method.args = list(alternative="less")) + | |
theme_bw() | |
ggsave(paste0(outdir,"/OPP_rap_bycell.pdf"),width = 7.5, height =4) | |
#by_image male1: ploidy & sox10, median of averages | |
median_n <- scaled_opps |> ungroup() |> | |
group_by(ImageNumber, round, ploidy, nM, panel) |> | |
dplyr::summarize(opp = mean(scaled_opp), cells = n(), | |
sox10 = mean(sox10_perc)) |> | |
group_by(ploidy, nM, panel) |> | |
dplyr::summarize(med_opp = median(opp), med_cells = median(cells), images = n(), rounds = length(unique(round)), | |
med_sox10 = median(sox10)) |> | |
mutate(nM2 = as.factor(nM)) |> | |
ungroup() | |
scaled_opps |> | |
filter(panel == "male1") |> | |
group_by(ImageNumber) |> | |
dplyr::summarize( | |
nM = unique(nM), | |
nM2 = as.factor(unique(nM)), | |
cells = n(), | |
rounds = unique(round), | |
ploidy = unique(ploidy), | |
panel = unique(panel), | |
opp_scaled = median(scaled_opp), | |
sox10_perc = 100*unique(sox10_perc)) |> | |
ungroup() |> | |
mutate(ploidy = fct_relevel(ploidy, c("XY","XO"))) |> | |
mutate(nM2 = fct_reorder(nM2, nM, na.rm = TRUE)) |> | |
ggplot(aes(x = ploidy, y = sox10_perc, color = ploidy)) + | |
geom_quasirandom() + | |
scale_color_manual(values = c(col.pal[2],col.pal[1])) + | |
geom_boxplot(color = rgb(0,0,0,0.5), fill = NA, outliers = F) + | |
geom_text(size = 3, data = median_n, aes(x = ploidy, y = -1, fontface = "bold", label = paste0(100*round(med_sox10, 2),"%"))) + | |
geom_text(size = 3, data = median_n, colour = "black", aes(x = ploidy, y = -5, label = paste0("(",rounds,")"))) + | |
facet_grid(rows = vars(panel) | |
, cols = vars(nM)) + coord_cartesian(ylim = c(-5,80)) + | |
stat_compare_means(ref.group = "XY", label.y.npc = 1, label = "p.format") + | |
theme_bw() | |
ggsave(paste0(outdir,"/SOX10_by_rap1.pdf"),width = 7.3, height =4) | |
scaled_opps |> | |
filter(panel == "male2") |> | |
group_by(ImageNumber) |> | |
dplyr::summarize( | |
nM = unique(nM), | |
nM2 = as.factor(unique(nM)), | |
cells = n(), | |
rounds = unique(round), | |
ploidy = unique(ploidy), | |
panel = unique(panel), | |
opp_scaled = median(scaled_opp), | |
sox10_perc = 100*unique(sox10_perc)) |> | |
ungroup() |> | |
mutate(ploidy = fct_relevel(ploidy, c("XY","XO"))) |> | |
mutate(nM2 = fct_reorder(nM2, nM, na.rm = TRUE)) |> | |
ggplot(aes(x = ploidy, y = sox10_perc, color = ploidy)) + | |
geom_quasirandom() + | |
scale_color_manual(values = c(col.pal[4],col.pal[3])) + | |
geom_boxplot(color = rgb(0,0,0,0.5), fill = NA, outliers = F) + | |
geom_text(size = 3, data = median_n, aes(x = ploidy, y = -1, fontface = "bold", label = paste0(100*round(med_sox10, 2),"%"))) + | |
geom_text(size = 3, data = median_n, colour = "black", aes(x = ploidy, y = -5, label = paste0("(",rounds,")"))) + | |
facet_grid(rows = vars(panel) | |
, cols = vars(nM)) + coord_cartesian(ylim = c(-5,80)) + | |
stat_compare_means(ref.group = "XY", label.y.npc = 1, label = "p.format") + | |
theme_bw() | |
ggsave(paste0(outdir,"/SOX10_by_rap2.pdf"),width = 7.3, height =4) | |
#by_image: ploidy & cells & panel | |
scaled_opps |> | |
filter(panel == "male1") |> | |
group_by(ImageNumber) |> | |
dplyr::summarize( | |
nM = unique(nM), | |
nM2 = as.factor(unique(nM)), | |
cells = n(), | |
rounds = unique(round), | |
ploidy = unique(ploidy), | |
panel = unique(panel), | |
opp_scaled = median(scaled_opp), | |
sox10_perc = 100*unique(sox10_perc)) |> | |
ungroup() |> | |
mutate(ploidy = fct_relevel(ploidy, c("XY","XO"))) |> | |
mutate(nM2 = fct_reorder(nM2, nM, na.rm = TRUE)) |> | |
ggplot(aes(x = ploidy, y = cells, color = ploidy)) + | |
geom_quasirandom() + | |
scale_color_manual(values = c(col.pal[2],col.pal[1])) + | |
geom_boxplot(color = rgb(0,0,0,0.5), fill = NA, outliers = F) + | |
geom_text(size = 3, data = median_n, aes(x = ploidy, y = 15, fontface = "bold", label = round(med_cells,0))) + | |
geom_text(size = 3, data = median_n, colour = "black", aes(x = ploidy, y = -15, label = paste0("(",rounds,")"))) + | |
facet_grid(rows = vars(panel) | |
, cols = vars(nM)) + coord_cartesian(ylim = c(-20,500)) + | |
stat_compare_means(ref.group = "XY", label.y.npc = 1, label = "p.format") + | |
theme_bw() | |
ggsave(paste0(outdir,"/Cells_rap_bypanel1.pdf"),width = 7.3, height =4) | |
scaled_opps |> | |
filter(panel == "male2") |> | |
group_by(ImageNumber) |> | |
dplyr::summarize( | |
nM = unique(nM), | |
nM2 = as.factor(unique(nM)), | |
cells = n(), | |
rounds = unique(round), | |
ploidy = unique(ploidy), | |
panel = unique(panel), | |
opp_scaled = median(scaled_opp), | |
sox10_perc = 100*unique(sox10_perc)) |> | |
ungroup() |> | |
mutate(ploidy = fct_relevel(ploidy, c("XY","XO"))) |> | |
mutate(nM2 = fct_reorder(nM2, nM, na.rm = TRUE)) |> | |
ggplot(aes(x = ploidy, y = cells, color = ploidy)) + | |
geom_quasirandom() + | |
scale_color_manual(values = c(col.pal[4],col.pal[3])) + | |
geom_boxplot(color = rgb(0,0,0,0.5), fill = NA, outliers = F) + | |
geom_text(size = 3, data = median_n, aes(x = ploidy, y = 15, fontface = "bold", label = round(med_cells,0))) + | |
geom_text(size = 3, data = median_n, colour = "black", aes(x = ploidy, y = -15, label = paste0("(",rounds,")"))) + | |
facet_grid(rows = vars(panel) | |
, cols = vars(nM)) + coord_cartesian(ylim = c(-20,500)) + | |
stat_compare_means(ref.group = "XY", label.y.npc = 1, label = "p.format") + | |
theme_bw() | |
ggsave(paste0(outdir,"/Cells_rap_bypanel2.pdf"),width = 7.3, height =4) | |