Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
NCC_RNAseq_45X/DESEQ2_v08_NCC_pass.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
929 lines (809 sloc)
45.9 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setwd(dirname(rstudioapi::getSourceEditorContext()$path)) | |
require(tidyverse) | |
### NCC | |
FeatureColumn = 1 | |
LastMetaColumn = 6 | |
PathDesign.csv = "./coldata_NCC_final.csv" | |
SampleColumn = 1 | |
df.design2<-read_csv(file = PathDesign.csv) %>% as.data.frame() %>% | |
mutate(files = sprintf("./NCC_salmon/%s_quant/quant.sf", RegistryID)) %>% | |
add_column(CellType = "NCC") %>% | |
mutate(line = paste0(karyotype,replace_na(parse_number(str_split_fixed(Sample, "_", 3)[,2]),replace = 1))) %>% | |
mutate(donor = str_split_fixed(Sample, "_", 3)[,1], | |
condition2 = condition) %>% | |
mutate(condition = clone, | |
condition3 = case_when(clone == "X1_femaleXO" ~ "femXO", | |
clone == "X1_femaleXX" ~ "femXX", | |
clone == "X3_maleXO" ~ "male1XO", | |
clone == "X3_maleXY" ~ "male1XY", | |
clone == "X4_maleXO" ~ "male2XO", | |
clone == "X4_maleXY" ~ "male2XY", | |
T ~ NA)) %>% | |
relocate(simple, .after = donor) | |
df.design<-df.design2|>mutate(names = RegistryID) | |
require(DESeq2) | |
require(tximeta) | |
sum_exp <- tximeta(df.design) | |
gene_sum_exp <- summarizeToGene(sum_exp) | |
designform="Diff + condition" | |
findSV <- T | |
removeSV <- findSV | |
minCounts <- 20 | |
minSamples <- 4 | |
minAve <- 10 | |
vc.design<-unlist(strsplit(designform, split = " ")) | |
vc.designLast<-unlist(strsplit(tail(vc.design,1), split = ":")) | |
outdir = paste0("./repout_ex_",designform,"_SV_",removeSV) | |
dir.create(outdir) | |
dds0<- DESeqDataSet(gene_sum_exp, | |
design = formula(paste0("~" ,designform))) | |
if(!all(c(vc.design[(vc.design != "+") & (vc.design != tail(vc.design,1))],vc.designLast) %in% colnames(df.design))) | |
{stop("Check your designform and sample_sheet")} | |
write_csv(as.data.frame(counts(dds0)) |> | |
rownames_to_column(var = "ENSEMBL_ID"), | |
file = paste0(outdir,"/GSE264742_counts.csv.gz")) | |
write_csv(as.data.frame(dds0@colData) |> dplyr::select(-c(names, clone, condition, condition2, Xa, karyotype, simple)) |> | |
rename(Sample_ID = "RegistryID", | |
Cell_Type = "CellType", | |
Cell_line = "line", | |
condition = "condition3") |> | |
arrange(Sample_ID), | |
file = paste0(outdir,"/Samples.csv")) | |
#Pre-filter based on https://support.bioconductor.org/p/65091/ | |
dds1 <- estimateSizeFactors(dds0) | |
mn.nc1 <- counts(dds1, normalized=TRUE) | |
filter1 <- rowSums(mn.nc1 >= minCounts) >= minSamples | |
dds1 <- dds1[filter1,] | |
mn.nc2 <- counts(dds1, normalized=TRUE) | |
filter2 <- rowMeans(mn.nc2) >= minAve | |
dds1 <- dds1[filter2,] | |
print("Sample names match in correct order. Returning filtered dds") | |
if(findSV){ | |
require("sva") | |
require("limma") | |
mod <- model.matrix (design(dds1), colData(dds1)) | |
mod0 <- model.matrix (~ 1, colData(dds1)) | |
n.sv = num.sv(mn.nc2,mod,method="leek") | |
n.sv | |
ls.svseq <- svaseq(mn.nc2, mod, mod0, method="irw") | |
dds1.sva <- dds1 | |
colnames(ls.svseq$sv) <- colnames(ls.svseq$sv, do.NULL = FALSE, prefix = "SV") | |
colData(dds1.sva)<-cbind(colData(dds1.sva), ls.svseq$sv) | |
design(dds1.sva)<-as.formula(paste0("~",paste(colnames(ls.svseq$sv), collapse="+"), "+", designform)) | |
dds2 <- DESeq(dds1.sva) | |
dds2<-dds2[which(mcols(dds2)$betaConv),] | |
#dds2 is for DESEQ2 | |
vsd=vst(dds2, blind = FALSE) | |
vsd3<-vsd | |
} | |
if(!findSV){print("Skipping SVA...") | |
dds2 <- DESeq(dds1) | |
dds2<-dds2[which(mcols(dds2)$betaConv),] | |
vsd=vst(dds2, blind = FALSE); vsd3 = vsd | |
} | |
if(removeSV) | |
{assay(vsd3) <- limma::removeBatchEffect(assay(vsd3) | |
, covariates = colData(vsd3)[,c(grep("SV", colnames(colData(vsd3)), value=T),vc.design[(vc.design != "+") & (vc.design != tail(vc.design,1))])] | |
, design = model.matrix( ~ colData(dds2)[,tail(vc.design,1)]))} | |
require(ggplot2) | |
fx.gg_col<-function(n) { | |
hues = seq(15, 375, length = n + 1) | |
hcl(h = hues, l = 65, c = 100)[1:n] | |
} | |
col.pal<- c("violet", "darkmagenta", | |
"deepskyblue", "blue", | |
"springgreen", "darkgreen") | |
names(col.pal)<-c("X1_femaleXO", "X1_femaleXX", | |
"X3_maleXO","X3_maleXY","X4_maleXO","X4_maleXY") | |
#names(col.pal)<-sort(unique(dds2$clone)) | |
df.ggPCA<-plotPCA(vsd, intgroup=c(colnames(colData(dds2))), returnData=T) | |
df.ggPCA3<-plotPCA(vsd3, intgroup=c(colnames(colData(dds2))), returnData=T) | |
percentVar <- round(100 * attr(df.ggPCA3, "percentVar")) | |
gg.PCA3<-ggplot(df.ggPCA3, aes(x = PC1, y = PC2, color = factor(condition), shape = factor(karyotype), label=RegistryID)) + | |
geom_point(size =3) + | |
xlab(paste0("PC1: ", percentVar[1], "% variance")) + | |
ylab(paste0("PC2: ", percentVar[2], "% variance")) + | |
ggtitle(paste0("SV =",findSV,", design =",designform)) + | |
scale_color_manual(values = col.pal) + | |
theme(text = element_text(size = 8)) | |
gg.PCA3 | |
ggsave(paste0(outdir,"/PCA3.pdf"), gg.PCA3, device = "pdf", width=4, height =2) | |
require(ggdendro) | |
sampleTree <- hclust(dist(t(assay(vsd3))), method = "average") | |
ggdendrogram(sampleTree) + labs(title="WGCNA Sample Dendrogram") | |
sample_dend_data <- dendro_data(sampleTree, type = "rectangle") | |
sample_dend_data$labels <- sample_dend_data$labels %>% | |
left_join(df.design |> | |
dplyr::select(RegistryID, clone, karyotype, donor), by=c("label"="RegistryID")) | |
col.pal2<-c(col.pal, fx.gg_col(length(unique(dds2$donor))), "grey30", "grey60", "grey90") | |
names(col.pal2)<-c(names(col.pal), sort(unique(dds2$donor)), sort(unique(dds2$karyotype))) | |
ggplot() + | |
geom_segment(data=sample_dend_data$segments, | |
mapping=aes(x = x, y = y, xend = xend, yend = yend))+ | |
geom_text(data = sample_dend_data$labels, | |
aes(x=x, y=y, label = label), | |
angle = 90, size = 3) + | |
geom_tile(data = sample_dend_data$labels, | |
aes(x=x,y=-6, fill=clone), height=2) + | |
annotate(geom = "text", x=52, y=-6, label="clone") + | |
scale_x_continuous(limits=c(0,60)) + | |
geom_tile(data = sample_dend_data$labels, | |
aes(x=x,y=-9, fill=karyotype), height=2) + | |
annotate(geom = "text", x=90, y=-9, label="karyotype") + | |
geom_tile(data = sample_dend_data$labels, | |
aes(x=x,y=-12, fill=donor), height=2) + | |
annotate(geom = "text", x=90, y=-12, label="donor") + | |
scale_fill_manual(values = col.pal2) + theme_bw() + ggtitle(paste0("SV =",findSV,", design =",designform)) | |
ggsave(paste0(outdir,"/Dendrogram.pdf")) | |
rm(dds0) | |
rm(dds1) | |
rm(dds1.sva) | |
dds<-dds2 | |
rm(dds2) | |
saveRDS(dds, file = paste0(outdir,"/dds.rds")) | |
saveRDS(vsd, file = paste0(outdir,"/vsd.rds")) | |
saveRDS(vsd3, file = paste0(outdir,"/vsd3.rds")) | |
dds<-readRDS(file = paste0(outdir,"/dds.rds")) | |
ap <- 0.1 | |
l2fc<-0 | |
require(org.Hs.eg.db) | |
require(tidyverse) | |
require(clusterProfiler) | |
require(BioVenn) | |
if(vc.designLast == "condition") | |
{ | |
DF.resF<-results(dds, contrast=c("condition","X1_femaleXO","X1_femaleXX"), alpha = ap) | |
DF.resM1<-results(dds, contrast=c("condition","X3_maleXO","X3_maleXY"), alpha = ap) | |
DF.resM2<-results(dds, contrast=c("condition","X4_maleXO","X4_maleXY"), alpha = ap) | |
DF.resO1<-results(dds, contrast=c("condition","X1_femaleXO","X3_maleXO"), alpha = ap) | |
DF.resO2<-results(dds, contrast=c("condition","X1_femaleXO","X4_maleXO"), alpha = ap) | |
DF.resO3 <-results(dds, contrast=c("condition","X3_maleXO","X4_maleXO"), alpha = ap) | |
DF.resE1<-results(dds, contrast=c("condition","X1_femaleXO","X3_maleXY"), alpha = ap) | |
DF.resE2 <- results(dds, contrast=c("condition","X1_femaleXO","X4_maleXY"), alpha = ap) | |
DF.resE3 <- results(dds, contrast=c("condition","X3_maleXY","X4_maleXY"), alpha = ap) | |
df.resF<-as.data.frame(DF.resF) | |
df.resM1<-as.data.frame(DF.resM1) | |
df.resM2<-as.data.frame(DF.resM2) | |
df.resO1<-as.data.frame(DF.resO1) | |
df.resO2<-as.data.frame(DF.resO2) | |
df.resO3 <- as.data.frame(DF.resO3) | |
df.resE1<-as.data.frame(DF.resE1) | |
df.resE2<-as.data.frame(DF.resE2) | |
df.resE3 <- as.data.frame(DF.resE3) | |
colnames(df.resF)<-paste("fXO", colnames(df.resF), sep = "_") | |
colnames(df.resM1)<-paste("mXO1", colnames(df.resM1), sep = "_") | |
colnames(df.resM2)<-paste("mXO2", colnames(df.resM2), sep = "_") | |
colnames(df.resO1)<-paste("fmO1", colnames(df.resO1), sep = "_") | |
colnames(df.resO2)<-paste("fmO2", colnames(df.resO2), sep = "_") | |
colnames(df.resO3) <- paste("m1m2O", colnames(df.resO3), sep = "_") | |
colnames(df.resE1)<-paste("fmE1", colnames(df.resE1), sep = "_") | |
colnames(df.resE2)<-paste("fmE2", colnames(df.resE2), sep = "_") | |
colnames(df.resE3)<-paste("m1m2E", colnames(df.resE3), sep = "_") | |
df.all<-cbind(assay(vsd3), df.resF, | |
df.resM1, | |
df.resM2, | |
df.resO1, | |
df.resO2, | |
df.resO3, df.resE1, | |
df.resE2, | |
df.resE3) | |
} | |
#metadata(dds)<-df.meta | |
df.meta<-rowRanges(dds) |> as.data.frame() | |
df.meta$ENSEMBL <- row.names(df.meta) | |
df.meta$ENSEMBL <- gsub("\\..*","", df.meta$ENSEMBL) | |
All.df <- bitr(df.meta$ENSEMBL, fromType = "ENSEMBL", | |
toType = c("ENTREZID", "SYMBOL"), | |
OrgDb = org.Hs.eg.db) | |
All.df.ENSG<-All.df[!duplicated(All.df$ENSEMBL),] | |
rownames(All.df.ENSG)<-All.df.ENSG$ENSEMBL | |
df.all2<-merge(df.meta[,c(1:LastMetaColumn,length(df.meta))], df.all, by = "row.names") | |
rownames(df.all2)<-df.all2$Row.names | |
colnames(df.all2)[1]<-"gene_name" | |
df<-df.all2[order(match(rownames(df.all2), rownames(assay(vsd3)))),] |> | |
mutate(gene_name = All.df.ENSG[ENSEMBL,"SYMBOL"]) | |
rm(df.all) | |
rm(df.all2) | |
# # all vtf | |
if(vc.designLast == "condition") | |
{ | |
vtf.F <- (df$fXO_padj <= ap & abs(df$fXO_log2FoldChange) >= l2fc) | |
vtf.M1 <- (df$mXO1_padj <= ap & abs(df$mXO1_log2FoldChange) >= l2fc) | |
vtf.M2 <- (df$mXO2_padj <= ap & abs(df$mXO2_log2FoldChange) >= l2fc) | |
vtf.O1 <- (df$fmO1_padj <= ap & abs(df$fmO1_log2FoldChange) >= l2fc) | |
vtf.O2 <- (df$fmO2_padj <= ap & abs(df$fmO2_log2FoldChange) >= l2fc) | |
vtf.O3 <- (df$m1m2O_padj <= ap & abs(df$m1m2O_log2FoldChange) >= l2fc) | |
vtf.E1 <- (df$fmE1_padj <= ap & abs(df$fmE1_log2FoldChange) >= l2fc) | |
vtf.E2 <- (df$fmE2_padj <= ap & abs(df$fmE2_log2FoldChange) >= l2fc) | |
vtf.E3 <- (df$m1m2E_padj <= ap & abs(df$m1m2E_log2FoldChange) >= l2fc) | |
names(vtf.F)<-rownames(df) | |
names(vtf.M1)<-rownames(df) | |
names(vtf.M2)<-rownames(df) | |
names(vtf.O1)<-rownames(df) | |
names(vtf.O2)<-rownames(df) | |
names(vtf.O3)<-rownames(df) | |
names(vtf.E1)<-rownames(df) | |
names(vtf.E2)<-rownames(df) | |
names(vtf.E3)<-rownames(df) | |
} | |
if(!("seqnames" %in% colnames(df))) | |
{ | |
vtf.X <- (df$Chr == "chrX" & as.numeric(df$Start) > 2750000 & as.numeric(df$End)<155701383) | |
vtf.Y <- (df$Chr == "chrY" & as.numeric(df$Start) > 2750000 & as.numeric(df$End)<56887903) | |
vtf.PAR <- ((df$Chr == "chrX" | df$Chr == "chrY") & !(vtf.X) & !(vtf.Y)) | |
} | |
if("seqnames" %in% colnames(df)) | |
{ | |
vtf.X <- (df$seqnames == "chrX" & as.numeric(df$start) > 2750000 & as.numeric(df$end)> 2750000 & as.numeric(df$start)<155701383 & as.numeric(df$end)<155701383) | |
vtf.Y <- (df$seqnames == "chrY" & as.numeric(df$start) > 2750000 & as.numeric(df$end)> 2750000 & as.numeric(df$start) < 56887903 & as.numeric(df$end)<56887903) | |
vtf.PAR <- ((df$seqnames == "chrX" | df$seqnames == "chrY") & !(vtf.X) & !(vtf.Y)) | |
} | |
names(vtf.X)<-rownames(df) | |
names(vtf.Y)<-rownames(df) | |
names(vtf.PAR)<-rownames(df) | |
vn.oFM1<-length(which(vtf.F & vtf.M1)) | |
vn.oFM2<-length(which(vtf.F & vtf.M2)) | |
vn.oM1M2<-length(which(vtf.M1 & vtf.M2)) | |
p.oFM1<-phyper(vn.oFM1-1, length(which(vtf.M1)), | |
length(dds)-length(which(vtf.M1)), | |
length(which(vtf.F)), lower.tail = F) | |
p.oFM2<-phyper(vn.oFM2-1, length(which(vtf.M2)), | |
length(dds)-length(which(vtf.M2)), | |
length(which(vtf.F)), lower.tail = F) | |
p.oM1M2<-phyper(vn.oM1M2-1, length(which(vtf.M2)), | |
length(dds)-length(which(vtf.M2)), | |
length(which(vtf.M1)), lower.tail = F) | |
biovenn<-draw.venn( | |
rownames(df[which(df$fXO_padj<=ap & (abs(df$fXO_log2FoldChange) >= l2fc)),]), | |
rownames(df[which(df$mXO1_padj<=ap & (abs(df$mXO1_log2FoldChange) >= l2fc)),]), | |
rownames(df[which(df$mXO2_padj<=ap & (abs(df$mXO2_log2FoldChange) >= l2fc)),]), | |
subtitle = signif(p.oM1M2,2), title = "b", | |
xtitle = "", ytitle = signif(p.oFM2,2), ztitle = signif(p.oFM1,2), | |
x_c = "violet", y_c ="deepskyblue", z_c = "springgreen", | |
width = 500, height = 500, | |
output = "pdf", nrtype = "abs", filename = paste0(outdir,"/venn.pdf") | |
) | |
require(preprocessCore) | |
require(limma) | |
if(vc.designLast == "condition") | |
{ | |
vc.ENSG<-df$ENSEMBL | |
mn.stat<-matrix(cbind(df$fXO_stat, | |
df$mXO1_stat, | |
df$mXO2_stat, | |
df$fmO1_stat, | |
df$fmO2_stat, | |
df$m1m2O_stat, | |
df$fmE1_stat, | |
df$fmE2_stat, | |
df$m1m2E_stat | |
), ncol = 9) | |
rownames(mn.stat)<-vc.ENSG | |
colnames(mn.stat)<-c("fXO", | |
"mXO1", | |
"mXO2", | |
"fmO1", | |
"fmO2", | |
"m1m2O", | |
"fmE1", | |
"fmE2", | |
"m1m2E" | |
) | |
rownames(mn.stat)<-vc.ENSG | |
df.gsea1<-na.omit(merge(All.df.ENSG, mn.stat, by="row.names")) | |
mn.stat_qn<-normalize.quantiles(matrix(unlist(df.gsea1[,-c(1:4)]), ncol = length(colnames(df.gsea1[,-c(1:4)])))) | |
colnames(mn.stat_qn)<-colnames(mn.stat) | |
df.gsea2<-cbind(df.gsea1[c(1:4)], | |
mn.stat_qn, | |
"avemXO" = rowMeans(mn.stat_qn[,grep("^mXO",colnames(mn.stat_qn))]), | |
"aveXO" = rowMeans(mn.stat_qn[,grep("XO",colnames(mn.stat_qn))]) | |
) | |
df.gsea2<-df.gsea2[!duplicated(df.gsea2$ENTREZID),] | |
ls.gsea <- list( | |
.fXO = (df.gsea2 %>% | |
arrange(desc(fXO)) %>% | |
dplyr::select(ENTREZID, fXO) %>% | |
deframe() | |
), | |
.mXO1 = (df.gsea2 %>% | |
arrange(desc(mXO1)) %>% | |
dplyr::select(ENTREZID, mXO1) %>% | |
deframe() | |
), | |
.mXO2 = (df.gsea2 %>% | |
arrange(desc(mXO2)) %>% | |
dplyr::select(ENTREZID, mXO2) %>% | |
deframe() | |
), | |
fmO1 = (df.gsea2 %>% | |
arrange(desc(fmO1)) %>% | |
dplyr::select(ENTREZID, fmO1) %>% | |
deframe() | |
), | |
fmE1 = (df.gsea2 %>% | |
arrange(desc(fmE1)) %>% | |
dplyr::select(ENTREZID, fmE1) %>% | |
deframe() | |
), | |
fmO2 = (df.gsea2 %>% | |
arrange(desc(fmO2)) %>% | |
dplyr::select(ENTREZID, fmO2) %>% | |
deframe() | |
), | |
fmE2 = (df.gsea2 %>% | |
arrange(desc(fmE2)) %>% | |
dplyr::select(ENTREZID, fmE2) %>% | |
deframe() | |
), | |
m1m2O = (df.gsea2 %>% | |
arrange(desc(m1m2O)) %>% | |
dplyr::select(ENTREZID, m1m2O) %>% | |
deframe() | |
), | |
m1m2E = (df.gsea2 %>% | |
arrange(desc(m1m2E)) %>% | |
dplyr::select(ENTREZID, m1m2E) %>% | |
deframe() | |
), | |
ave.XO = (df.gsea2 %>% | |
arrange(desc(aveXO)) %>% | |
dplyr::select(ENTREZID, aveXO) %>% | |
deframe() | |
), | |
ave.mXO = (df.gsea2 %>% | |
arrange(desc(avemXO)) %>% | |
dplyr::select(ENTREZID, avemXO) %>% | |
deframe() | |
) | |
) | |
} | |
rm(dds) | |
saveRDS(df.gsea2, file = paste0(outdir,"/df.gsea2.rds")) | |
saveRDS(ls.gsea, file = paste0(outdir,"/ls.gsea.rds")) | |
df.DEGtable<-df %>% | |
dplyr::select(-(starts_with(c("d", "Start", "End", "Strand", "Length", "fmO", "m1m2E")) | |
| ends_with(c("basemean", "lfcSE", "pvalue")))) | |
df.DEGtable$ENSEMBL <- row.names(df) | |
write.csv(df.DEGtable, file = paste0(outdir,"/DEG.csv"), row.names = F,quote = F) | |
require(msigdbr) | |
require(tidyverse) | |
msig_df <- msigdbr(species = "Homo sapiens") | |
require(clusterProfiler) | |
require(org.Hs.eg.db) | |
require(GOSemSim) | |
require(enrichplot) | |
require(DOSE) | |
pcut1 <-0.5 | |
qcut1 <- 0.5 | |
pcut2 <- 0.05 | |
qcut2 <- 0.05 | |
msig_Hall_C1_CP_TFT_MOD_HP_C8 <- msig_df %>% | |
filter(gs_cat == "H" | | |
gs_cat == "C1" | | |
(gs_cat == "C2" & str_detect(gs_subcat, "^CP:")) | | |
(gs_cat == "C3" & str_detect(gs_subcat, "^TFT:")) | | |
gs_cat == "C4" | | |
(gs_cat == "C5" & str_detect(gs_subcat, "^HPO")) | | |
gs_cat == "C8") %>% | |
dplyr::select(gs_name, entrez_gene) | |
rm(msig_df) | |
#run once | |
fx.GSEA<-function(genelist, terms2genes){ | |
set.seed(42) | |
gsea.result<-GSEA(genelist, TERM2GENE = terms2genes, pvalueCutoff = 0.1, | |
pAdjustMethod = "BH", by="fgsea", seed=T, eps= 0) | |
return(gsea.result) | |
} | |
fx.gseGO<-function(genelist, ontology){ | |
set.seed(42) | |
gsea.result<-gseGO(genelist, ont = ontology, OrgDb = org.Hs.eg.db, keyType = "ENTREZID", | |
pvalueCutoff = 0.1, pAdjustMethod = "BH", by="fgsea", seed=T, eps= 0) | |
return(gsea.result) | |
} | |
if(file.exists(paste0(outdir,"/msig_all.rds"))){msig_all<- readRDS(file = paste0(outdir,"/msig_all.rds"))} | |
if(!exists("msig_all")) | |
{ | |
ls.msig<-lapply(ls.gsea, function(X){fx.GSEA(X, msig_Hall_C1_CP_TFT_MOD_HP_C8)}) | |
ls.msig<-lapply(ls.msig, function(X){setReadable(X, org.Hs.eg.db, keyType = "ENTREZID")}) | |
ls.msig2<-Map(mutate, ls.msig, Category = names(ls.msig)) | |
msig_all<-ls.msig2[[1]] | |
logplim<-15 | |
msig_all@result<-do.call("rbind", lapply(ls.msig2, slot, 'result')) %>% | |
arrange(rank) %>% add_column("Score" = pmin(-log(.$p.adjust, 10),logplim) * sign(.$NES), .after = 1) | |
} | |
if(file.exists(paste0(outdir,"/GO_all.rds"))){go_all<- readRDS(file = paste0(outdir,"/GO_all.rds"))} | |
if(!exists("GO_all")) | |
{ | |
ls.go<-lapply(ls.gsea, function(X){fx.gseGO(X, "ALL")}) | |
ls.go<-lapply(ls.go, function(X){setReadable(X, org.Hs.eg.db, keyType = "ENTREZID")}) | |
ls.go2<-Map(mutate, ls.go, Category = names(ls.go)) | |
GO_all<-ls.go2[[1]] | |
logplim<-15 | |
GO_all@result<-do.call("rbind", lapply(ls.go2, function(X) {as.data.frame(X@result)})) %>% | |
arrange(rank) %>% add_column("Score" = pmin(-log(.$p.adjust, 10),logplim) * sign(.$NES), .after = 1) | |
} | |
fx.labels1<- Vectorize(function(X) { | |
s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 1 )) | |
abbreviate(s, minlength = 30, dot = T) | |
}) | |
fx.labels2<- Vectorize(function(X) { | |
s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 2)) | |
s<-str_replace_all(s, "_", " ") | |
abbreviate(s, minlength = 30, dot = T,) | |
}) | |
gpfx.gsea<-function(prefix, labelfx, padj, lab.filter, X){ | |
X1 <- X@result %>% filter(str_detect(Description, prefix) & | |
(Description %in% (X@result %>% filter(p.adjust <= padj & str_detect(Category, lab.filter)) %>% dplyr::pull(Description)))) | |
X2 <-X1 %>% filter(Category == lab.filter) | |
ggplot(X1 %>% mutate(Description = fct_relevel(Description, X2 %>% arrange(Score) %>% dplyr::pull(Description))), | |
aes(x=Score, y=Description)) + facet_wrap(vars(Category), nrow=1, drop = F) + | |
geom_segment(aes(xend=0, yend = Description)) + | |
geom_point(aes(color=NES, size = as.numeric(summary(strsplit(core_enrichment, "/"))[,1]))) + | |
scale_color_gradient2(low="blue",mid = "white", high="red") + | |
scale_y_discrete(labels = labelfx) + | |
theme_minimal(base_size = 7) + | |
xlab("signed log10-scaled p.adjust") + | |
ylab(prefix) + | |
labs(size="core genes") | |
} | |
vc.ORPHA881<-c("HP_Abnormality_of_the_ovary","HP_Short_neck","HP_Delayed_puberty","HP_Increased_circulating_gonadotropin_level","HP_Short_sternum","HP_Osteopenia","HP_Osteoporosis","HP_Growth_delay","HP_Intrauterine_growth_retardation","HP_Delayed_skeletal_maturation","HP_Cubitus_valgus","HP_High_urinary_gonadotropin_level","HP_Short_stature","HP_Wide_intermamillary_distance","HP_Aplasia/Hypoplasia_of_the_nipples","HP_Premature_ovarian_insufficiency","HP_Female_infertility","HP_Postnatal_growth_retardation","HP_Increased_upper_to_lower_segment_ratio","HP_Abnormal_forearm_bone_morphology","HP_Enlarged_thorax","HP_High_palate","HP_Retrognathia","HP_Micrognathia","HP_Hearing_impairment","HP_Low-set_ears","HP_Recurrent_otitis_media","HP_Webbed_neck","HP_Thickened_nuchal_skin_fold","HP_Broad_neck","HP_Behavioral_abnormality","HP_Anxiety","HP_Abnormal_nonverbal_communicative_behavior","HP_Primary_amenorrhea","HP_Hypertension","HP_Glucose_intolerance","HP_Secondary_amenorrhea","HP_Hashimoto_thyroiditis","HP_Shield_chest","HP_Specific_learning_disability","HP_Hepatic_steatosis","HP_Obesity","HP_Failure_to_thrive_in_infancy","HP_Hypoplastic_toenails","HP_Low_posterior_hairline","HP_High,_narrow_palate","HP_Kyphosis","HP_Genu_valgum","HP_Elevated_hepatic_transaminase","HP_Aortic_arch_aneurysm","HP_Dermatoglyphic_ridges_abnormal","HP_Enlargement_of_the_distal_femoral_epiphysis","HP_Irregular_proximal_tibial_epiphyses","HP_Abnormal_dermatoglyphics","HP_Neck_pterygia","HP_Short_4th_metacarpal","HP_Short_5th_metacarpal","HP_Hypermobility_of_toe_joints","HP_Horseshoe_kidney","HP_Ectopic_kidney","HP_Abnormality_of_the_dentition","HP_Epicanthus","HP_Cystic_hygroma","HP_Strabismus","HP_Ptosis","HP_Myopia","HP_Depression","HP_Pectus_excavatum","HP_Hyperinsulinemia","HP_Atypical_scarring_of_skin","HP_Melanocytic_nevus","HP_Lymphedema","HP_Vitiligo","HP_Abnormal_fingernail_morphology","HP_Hip_dysplasia","HP_Hepatic_fibrosis","HP_Alopecia","HP_Atrial_septal_defect","HP_Bicuspid_aortic_valve","HP_Prolonged_QT_interval","HP_Myocardial_infarction","HP_Coarctation_of_aorta","HP_Pes_planus","HP_Hyperconvex_fingernails","HP_Short_toe","HP_Celiac_disease","HP_Cholestatic_liver_disease","HP_Scoliosis","HP_Autoimmunity","HP_Madelung_deformity","HP_Inverted_nipples","HP_Nevus","HP_Reduced_bone_mineral_density","HP_Numerous_congenital_melanocytic_nevi","HP_Type_II_diabetes_mellitus","HP_Attention_deficit_hyperactivity_disorder","HP_Hyperlipidemia","HP_External_ear_malformation","HP_Aplasia/Hypoplasia_of_the_mandible","HP_Splayed_toes","HP_Delayed_social_development","HP_Thyroiditis","HP_Gonadoblastoma","HP_Gastrointestinal_angiodysplasia","HP_Cirrhosis","HP_Inflammation_of_the_large_intestine","HP_Biliary_cirrhosis","HP_Aortic_dissection","HP_Melanoma","HP_Hypoplastic_left_heart","HP_Gastrointestinal_inflammation","HP_Arterial_dissection","HP_Renal_hypoplasia/aplasia","HP_Neurodevelopmental_delay") | |
lab.filter <- "ave.mXO" | |
lab.filter2<- names(table(msig_all@result$Category))[c(1:7,11)] | |
pcut1<-0.05 | |
pcut2<-0.05 | |
glol.chr<-gpfx.gsea("chr", fx.labels1, pcut1, lab.filter, msig_all %>% filter(Category %in% lab.filter2)) | |
glol.tft<-gpfx.gsea("TARGET_GENES",fx.labels1, pcut1, lab.filter, msig_all %>% filter(Category %in% lab.filter2)) | |
glol.hallmark<-gpfx.gsea("^HALLMARK", fx.labels2, pcut1, lab.filter, msig_all %>% filter(Category %in% lab.filter2)) | |
glol.wp<-gpfx.gsea("^WP", fx.labels2, pcut1, lab.filter, msig_all %>% filter(Category %in% lab.filter2)) | |
glol.kegg<-gpfx.gsea("^KEGG", fx.labels2, pcut1, lab.filter, msig_all %>% filter(Category %in% lab.filter2)) | |
glol.desc<-gpfx.gsea("^DESCARTES", fx.labels2, pcut1, lab.filter, msig_all %>% filter(Category %in% lab.filter2)) | |
lab.filter <- "ave.mXO" | |
pcut1<-0.1 | |
pcut2<-0.1 | |
glol.hpTS<-gpfx.gsea("^HP", fx.labels2, pcut1, lab.filter, | |
msig_all %>%filter(Category %in% lab.filter2 & | |
Description %in% toupper(vc.ORPHA881))) | |
glol.hpTS + theme(legend.position = "top", legend.direction = "horizontal") + | |
scale_x_continuous(breaks=c(-2,0,2,4,6)) | |
ggsave(paste0(outdir,"/glol.hpTS.",lab.filter,".pdf"), device = "pdf", width = 4.5, height = 4.5, units = "in") | |
glol.wp + theme(legend.position = "top", legend.direction = "horizontal") | |
ggsave(paste0(outdir,"/glol.wp.",lab.filter,".pdf"), device = "pdf", width = 8, height = 6, units = "in") | |
saveRDS(msig_all, file = paste0(outdir,"/msig_all.rds")) | |
saveRDS(GO_all, file = paste0(outdir,"/GO_all.rds")) | |
lab.filter <- "ave.XO" | |
pcut1<-0.0001 | |
pcut2<-0.0001 | |
glol.bp<-gpfx.gsea(".", fx.labels1, pcut1, lab.filter, GO_all %>% filter(Category %in% lab.filter2)) | |
glol.cc<-gpfx.gsea(".", fx.labels1, pcut1, lab.filter, GO_all %>% filter(Category %in% lab.filter2 & ONTOLOGY == "CC")) | |
require(grid) | |
require(gridExtra) | |
ls.lol.plots<-list( | |
glol.hallmark + theme(axis.line = element_line(colour = "grey50"), legend.position = "top", legend.direction = "horizontal") + scale_size_area(max_size = 4), | |
glol.kegg + theme(axis.line = element_line(colour = "grey50"), legend.position = "top", legend.direction = "horizontal") + scale_size_area(max_size = 4), | |
glol.desc + theme(axis.line = element_line(colour = "grey50"), legend.position = "top", legend.direction = "horizontal") + scale_size_area(max_size =4) | |
) | |
vn.lol.plots<-sapply(ls.lol.plots, function(X){max(ggplot_build(X)$data[[1]] %>% filter(PANEL == 1) %>% dplyr::select(y))}) | |
ggsave(paste0(outdir,"/glols_",lab.filter,".pdf"), grid.arrange(grobs = ls.lol.plots, nrow=length(vn.lol.plots), | |
heights = c(10*vn.lol.plots/sum(vn.lol.plots))), | |
width = 6.5, height = 9, | |
device = "pdf", units = "in") | |
#ensembl_version<-"102" | |
unique_categories <- c("ave.XO", "ave.mXO", ".fXO", ".mXO1", ".mXO2", "fmE1", "fmE2", "m1m2O") | |
GO_all |> | |
as.data.frame() |> | |
mutate( | |
object_file = "GO_all", | |
ID = sprintf("GO%s_%s", ONTOLOGY, ID) | |
) |> | |
dplyr::select(-ONTOLOGY) |> | |
rbind( | |
msig_all |> | |
as.data.frame() |> | |
mutate(object_file = "msig_all") | |
) |> | |
filter(Category %in% unique_categories) |> | |
rename(comparison = "Category") |> | |
dplyr::select(-c(Score, object_file, enrichmentScore, leading_edge, rank)) |> | |
arrange(qvalue) |> | |
write_csv(paste0(outdir,"/Table_S2.csv")) | |
require(biomaRt) | |
require(tidyverse) | |
################################### | |
# RUN ANNOTATION FILE HERE # | |
################################### | |
source("./AhernNCC_annotation_SP.R") | |
df.VarEsc<-df.Tuk2017 %>% mutate(Reported = case_when(!(SYMBOL %in% df.Tuk2017$SYMBOL[df.Tuk2017$Reported_XCI_status %in% c("Escape","Variable")]) & | |
(SYMBOL %in% c("IRAK1","MBTPS2", "SMS") | | |
SYMBOL %in% rownames(df.Katsir2019) | | |
SYMBOL %in% rownames(df.Sauterad2021 %>% filter(str_detect(XCI.status,"E")))) ~ "Variable", | |
TRUE ~ Reported_XCI_status)) %>% | |
mutate_at(vars("Reported"), ~replace_na(.,"Unknown")) | |
df_ae_counts <- read_tsv("./Xgenes_NCC.txt", | |
col_types = cols(contig=col_character(), | |
variants=col_character())) |> | |
filter(contig == "X" & | |
totalCount > 0) |> rename(ensembl_id = name) |> | |
mutate(sample_id=str_extract(bam, "d[0-9]*")) |> mutate(RegistryID = sample_id) |> | |
left_join(df.design, by = "RegistryID") |> | |
mutate(ENSEMBL = t(matrix(unlist(strsplit(ensembl_id, "[.]")),nrow=2))[,1]) |> | |
left_join(df.VarEsc |> rownames_to_column("ENSEMBL"), by = "ENSEMBL") |> | |
dplyr::select(ensembl_id, aCount, bCount, totalCount, sample_id, Sample, condition, Sex, donor, | |
simple, karyotype, Xa, CellType, line, | |
ENSEMBL, SYMBOL, Region, Reported) | |
pc<-0 | |
sPcut<-0.1 | |
POW<-0.8 | |
P0<-0.2 #double of 0.1 cutoff used below, ie. estimating 90% power of identifying all escapees with an allelic ratio of 0.2 | |
P1<-0.01 # read error estimate | |
N<-c(1:1000) | |
K<-qbinom(sPcut, N, P0)-1 | |
#ALPHA<-pbinom(K, N, P0) | |
POWER<-pbinom(q = K, size = N,prob =P1) | |
minREAD<-min(which(POWER>=POW)) | |
df_ae_binom <- df_ae_counts |> filter(totalCount >= minREAD) |> | |
filter(str_detect(donor, "AG05278")) |> | |
mutate(aCount = aCount+pc, | |
bCount = bCount+pc) |> | |
rowwise() |> | |
mutate(LAF = min(aCount,bCount)/(aCount+bCount)) #|> | |
df_ae_binom2<- df_ae_binom |> filter(karyotype == "XO") |> group_by(ensembl_id) |> | |
summarise(maxLAFxo = max(LAF)) |> | |
left_join(df_ae_binom, by="ensembl_id") |> rowwise() |> | |
mutate(aCount = max(aCount - round(maxLAFxo * totalCount,digits = 0)-pc ,0)) |> | |
mutate(bCount = max(bCount - round(maxLAFxo * totalCount,digits = 0)-pc ,0)) |> | |
mutate(totalCount = aCount+bCount) |> | |
mutate(LAF = min(aCount,bCount)/(aCount+bCount)) |> | |
mutate(pBi = pbinom(q = min(aCount,bCount), size = (aCount+bCount), prob = (P0/2),lower.tail = F)) |> | |
mutate(tf.esc = ((pBi <= sPcut) & (LAF >= P0/2))) |> ungroup() |> | |
group_by(ensembl_id, line) |> mutate(aSum = sum(aCount), bSum = sum(bCount)) |> | |
mutate(gLAF = min(aSum,bSum)/(aSum+bSum)) |> | |
mutate(gpBi = pbinom(q = min(aSum,bSum), size = (aSum+bSum), prob = (P0/2),lower.tail = F)) |> | |
mutate(gtf.esc = ((gpBi <= sPcut) & (gLAF >= P0/2))) |> ungroup() | |
vc.PAIR<-c("ZFX","TXLNG","MXRA5","TBL1X", "USP9X", "KDM5C", "NLGN4X", "KDM6A" , "EIF1AX", "PRKX", "RPS4X", "TMSB4X", "DDX3X") #"TSPYL2") | |
vc.PAIRY<-c("ZFY","TXLNGY","MXRA5Y","TBL1Y", "USP9Y", "KDM5D", "NLGN4Y", "UTY" , "EIF1AY", "PRKY", "RPS4Y1", "TMSB4Y", "DDX3Y") #"TSPY1") | |
vc.PAR<-df_ae_binom2 %>% filter(Region == "PAR") %>% pull(SYMBOL) %>% unique() | |
vc.oESC<-df_ae_binom2 %>% filter(tf.esc & Reported %in% c("Escape","Variable","VarOther") & !(SYMBOL %in% c(vc.PAR, vc.PAIR))) %>% pull(SYMBOL) %>% unique() | |
vc.uESC<-df_ae_binom2 %>% filter(tf.esc & !(SYMBOL %in% c(vc.PAR, vc.PAIR, vc.oESC, "IDS")) & !is.na(SYMBOL)) %>% pull(SYMBOL) %>% unique() | |
vc.OFF<-df_ae_binom2 %>% filter(!gtf.esc & totalCount >= minREAD & !(SYMBOL %in% c(vc.PAR, vc.PAIR, vc.oESC, "IDS")) & !is.na(SYMBOL)) %>% pull(SYMBOL) %>% unique() | |
vtf.PAIR<-(df$gene_name %in% vc.PAIR) | |
vtf.PAIRY<-(df$gene_name %in% vc.PAIRY) | |
vtf.PAIR2<-(df$gene_name %in% c(vc.PAIR,vc.PAIRY)) | |
vtf.ESC<-(rownames(df) %in% (df_ae_binom2 %>% filter(tf.esc) %>% pull(ensembl_id) %>% unique())) | |
vtf.oESC<-(df$gene_name %in% vc.oESC) | |
vtf.uESC<-(df$gene_name %in% vc.uESC) | |
vtf.OFF<-(df$gene_name %in% vc.OFF) | |
require(tidyHeatmap) | |
library(circlize) | |
df4<-cbind(df, vtf.F, vtf.M1, vtf.M2, | |
fXO = vtf.F * df$fXO_log2FoldChange, | |
mXO1 = vtf.M1 * df$mXO1_log2FoldChange, | |
mXO2 = vtf.M2 * df$mXO2_log2FoldChange, | |
vst.med = rowMedians(assay(vsd3)), | |
vtf.X, vtf.Y, vtf.PAR, vtf.PAIR, vtf.PAIRY, vtf.ESC, vtf.oESC, vtf.uESC, vtf.OFF) | |
df.ae<-df4 |> mutate(ensembl_id = gene_id) |> | |
left_join(df_ae_binom2 |> dplyr::select(ensembl_id, sample_id, line, Sample, SYMBOL, #Region, Reported, | |
gLAF, gpBi, gtf.esc) %>% filter(!is.na(SYMBOL)) |> | |
pivot_wider(id_cols = c(ensembl_id), | |
names_from = c(line), values_from = c(gLAF, gpBi, gtf.esc), values_fn = unique) , by = "ensembl_id") %>% left_join(df.VarEsc[,c("SYMBOL","Region","Reported")] |> rownames_to_column("ENSEMBL"), by="ENSEMBL") | |
rownames(df.ae)<-df.ae$ensembl_id | |
df.ae<-df.ae[order(match(rownames(df.ae), rownames(df))),] | |
df.ae[is.na(df.ae$Reported),"Reported"]<-"LowAlleleC." | |
#vsd_abs contains SV-corrected vst counts | |
vsd_abs_ae <- as.data.frame(assay(vsd3)) %>% | |
rownames_to_column("ensembl_id") %>% pivot_longer(-ensembl_id, names_to="sample_name", values_to="vst_count") %>% | |
left_join(colData(vsd3) %>% as.data.frame %>% rownames_to_column(var="sample_name")) %>% | |
left_join(df.ae, by = "ensembl_id") | |
vsd_abs_markers <- left_join(vsd_abs_ae, df.Frith %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Frith)) %>% | |
left_join(df.Lee %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Lee)) %>% | |
left_join(df.KarzbrunDEGs %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, KarzbrunDEGs)) %>% | |
left_join(df.SimoesCosta %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, SimoesCosta)) %>% | |
left_join(df.Hsiao %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Hsiao)) %>% | |
left_join(df.Soldatov %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Soldatov)) | |
vsd_med_ae <- as.data.frame(assay(vsd3)-rowMedians(assay(vsd3))) %>% | |
rownames_to_column("ensembl_id") %>% pivot_longer(-ensembl_id, names_to="sample_name", values_to="vst_diff") %>% | |
left_join(colData(vsd3) %>% as.data.frame() %>% rownames_to_column(var="sample_name")) %>% | |
left_join(df.ae, by = "ensembl_id") | |
vsd_med_markers<-left_join(vsd_med_ae, df.Frith %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Frith)) %>% | |
left_join(df.Lee %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Lee)) %>% | |
left_join(df.KarzbrunDEGs %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, KarzbrunDEGs)) %>% | |
left_join(df.SimoesCosta %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, SimoesCosta)) %>% | |
left_join(df.Hsiao %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Hsiao)) %>% | |
left_join(df.Soldatov %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Soldatov)) | |
#Escape vs. DESEQ2 stats and med_vsd | |
fx.col.med = colorRamp2(c(-1, 0, 1), c("blue", "white", "red")) | |
tab.sign.All<-table(sign(df[which(vtf.F & vtf.M1 & vtf.M2),c("fXO_stat", "mXO1_stat", "mXO2_stat")])) | |
tab.sign.notM1<-table(sign(df[which(vtf.F & !vtf.M1 & vtf.M2),c("fXO_stat", "mXO2_stat")])) | |
tab.sign.notM2<-table(sign(df[which(vtf.F & vtf.M1 & !vtf.M2),c("fXO_stat", "mXO1_stat")])) | |
tab.sign.notF<-table(sign(df[which(!vtf.F & vtf.M1 & vtf.M2),c("mXO1_stat", "mXO2_stat")])) | |
p.sign.All<-signif(pbinom(sum(tab.sign.All[c(1,length(tab.sign.All))]), size = sum(tab.sign.All),prob = 0.25,lower.tail = F),2) | |
p.sign.notM1<-signif(pbinom(sum(tab.sign.notM1[c(1,length(tab.sign.notM1))]), size = sum(tab.sign.notM1),prob = 0.5,lower.tail = F),2) | |
p.sign.notM2<-signif(pbinom(sum(tab.sign.notM2[c(1,length(tab.sign.notM2))]), size = sum(tab.sign.notM2),prob = 0.5,lower.tail = F),2) | |
p.sign.notF<-signif(pbinom(sum(tab.sign.notF[c(1,length(tab.sign.notF))]), size = sum(tab.sign.notF),prob = 0.5,lower.tail = F),2) | |
label.All<-paste0("All (", sum(tab.sign.All[c(1,length(tab.sign.All))]),"/",sum(tab.sign.All), ") ",p.sign.All) | |
label.notM1<-paste0("notM1 (", sum(tab.sign.notM1[c(1,length(tab.sign.notM1))]),"/",sum(tab.sign.notM1), ") ",p.sign.notM1) | |
label.notM2<-paste0("notM2 (", sum(tab.sign.notM2[c(1,length(tab.sign.notM2))]),"/",sum(tab.sign.notM2), ") ",p.sign.notM2) | |
label.notF<-paste0("notF (", sum(tab.sign.notF[c(1,length(tab.sign.notF))]),"/",sum(tab.sign.notF), ") ",p.sign.notF) | |
tb1<-vsd_med_ae %>% filter((sign(fXO_stat) == sign(mXO1_stat)) & (sign(fXO_stat) == sign(mXO2_stat)) & vtf.F & vtf.M1 & vtf.M2) %>% add_column(DEGo = label.All) %>% add_column(Dir = "conc.") | |
tb2<-vsd_med_ae %>% filter((sign(fXO_stat) == sign(mXO1_stat)) & vtf.F & vtf.M1 & !vtf.M2) %>% add_column(DEGo = label.notM2) %>% add_column(Dir = "conc.") | |
tb3<-vsd_med_ae %>% filter((sign(fXO_stat) == sign(mXO2_stat)) & vtf.F & !vtf.M1 & vtf.M2) %>% add_column(DEGo = label.notM1) %>% add_column(Dir = "conc.") | |
tb4<-vsd_med_ae %>% filter((sign(mXO1_stat) == sign(mXO2_stat)) & !vtf.F & vtf.M1 & vtf.M2) %>% add_column(DEGo = label.notF) %>% add_column(Dir = "conc.") | |
tb5<-vsd_med_ae %>% filter(((sign(fXO_stat) != sign(mXO1_stat)) | (sign(fXO_stat) != sign(mXO2_stat))) & vtf.F & vtf.M1 & vtf.M2) %>% add_column(DEGo = label.All) %>% add_column(Dir = "disc.") | |
tb6<-vsd_med_ae %>% filter((sign(fXO_stat) != sign(mXO1_stat)) & vtf.F & vtf.M1 & !vtf.M2) %>% add_column(DEGo = label.notM2) %>% add_column(Dir = "disc.") | |
tb7<-vsd_med_ae %>% filter((sign(fXO_stat) != sign(mXO2_stat)) & vtf.F & !vtf.M1 & vtf.M2) %>% add_column(DEGo = label.notM1) %>% add_column(Dir = "disc.") | |
tb8<-vsd_med_ae %>% filter((sign(mXO1_stat) != sign(mXO2_stat)) & !vtf.F & vtf.M1 & vtf.M2) %>% add_column(DEGo = label.notF) %>% add_column(Dir = "disc.") | |
tb<- bind_rows(tb1, tb2, tb3, tb4, | |
tb5, tb6, tb7, tb8 | |
) | |
tbhm<- tb %>% dplyr::select(ENSEMBL, sample_name, vst_diff, fXO_stat, mXO1_stat, mXO2_stat, Dir, DEGo, condition, karyotype) %>% | |
group_by(DEGo) %>% | |
heatmap(ENSEMBL, sample_name, vst_diff, scale="none", palette_value = fx.col.med, | |
palette_grouping = list(rep("white",length(table(tb$DEGo))) )) %>% | |
add_bar(fXO_stat) %>% add_bar(mXO1_stat) %>% add_bar(mXO2_stat) %>% add_tile(Dir, palette=c("saddlebrown","tan")) %>% | |
add_tile(condition, palette = col.pal[tb$condition |> as.character() |> unique()]) %>% | |
add_tile(karyotype, palette = c("grey50", "grey75", "grey25")) | |
#tbhm | |
save_pdf(tbhm,filename = paste0(outdir, "/GlobalDEGs2.pdf"),width = 7.5, height=10, units = "in") | |
#Escape vs. DESEQ2 stats and med_vsd | |
l2fc<-0 | |
require(RColorBrewer) | |
col.pal3<-brewer.pal(n = 9, name = "Set1")[c(5:9)] | |
names(col.pal3)<-c("Escape","Inactive","LowAlleleC.","Unknown", "Variable") | |
fx.col.med = colorRamp2(c(-1, 0, 1), c("blue", "white", "red")) | |
tb1<-vsd_med_ae %>% filter(vtf.PAR) %>% add_column(Strata = "..PAR") | |
tb2<-vsd_med_ae %>% filter(vtf.PAIR) %>% add_column(Strata = ".PairX") | |
#tb3<-vsd_med_ae %>% filter(vtf.PAIRY) %>% add_column(Strata = ".PairY") | |
tb4<-vsd_med_ae %>% filter(vtf.oESC & | |
(gtf.esc_XX19 | gtf.esc_XX23) & | |
((fXO_padj <= ap & fXO_log2FoldChange <= -l2fc))) %>% | |
add_column(Strata = "known escapees") | |
tb5<-vsd_med_ae %>% filter( | |
vtf.uESC & | |
(gtf.esc_XX19 | gtf.esc_XX23) & | |
((fXO_padj <= ap & fXO_log2FoldChange <= -l2fc)) | |
) %>% add_column(Strata = "novel/reactivated") | |
tb_Hm_allelicX<- bind_rows(tb1, tb2, #tb3, | |
tb4, tb5) | |
tbhm<- tb_Hm_allelicX %>% drop_na(gene_name) %>% filter(!(gene_name == "PRKY")) %>% | |
arrange(condition) %>% group_by(Strata) %>% | |
heatmap(gene_name, sample_name, vst_diff, .scale="none", palette_value = fx.col.med, row_names_gp = gpar(fontsize = 6), cluster_rows = F) %>% | |
add_bar("gLAF_XX19") %>% add_bar("gLAF_XX23") %>% | |
add_bar(fXO) %>% add_bar(mXO1) %>% add_bar(mXO2) %>% | |
add_tile(condition, palette = col.pal[tb_Hm_allelicX$condition |> as.character() |> unique()]) %>% | |
add_tile(Reported, palette = col.pal3[tb_Hm_allelicX$Reported |> as.character() |> unique()]) | |
save_pdf(tbhm,filename = paste0(outdir,"/Hm_allelicX002.pdf"),width = 8, height=8, units = "in") | |
df.Markers_tb <- df.Markers.abs %>% | |
rownames_to_column("sample_name") %>% pivot_longer(-sample_name, names_to="Marker", values_to="vst_med") %>% | |
left_join(colData(vsd3) %>% as.data.frame %>% rownames_to_column(var="sample_name")) | |
require(pals) | |
hm.markers<-heatmap(df.Markers_tb %>% filter(!str_detect(Marker, "Hsiao")) %>% | |
group_by(donor), .row = Marker,.column = Sample, | |
.value = vst_med, scale = "none", cluster_rows=T, | |
palette_value = colorRamp2(c(7:13), turbo(7)) | |
) %>% | |
add_tile(karyotype, palette = c("grey90", "grey30", "grey60")) | |
save_pdf(hm.markers,filename = paste0(outdir,"/Hm.markers.pdf"),width = 8, height=5, units = "in") | |
require(ggprism) | |
require(rstatix) | |
#look at HOXexpression | |
fx.markers<-function(vsd_tibble, anno_label, vst_unit, miny, maxy) { | |
tbF<-vsd_tibble |> | |
mutate(unilabel = vsd_tibble |> select(all_of(anno_label)) |> unlist()) |> | |
filter(!is.na(unilabel) & (unilabel != ".F") & (vtf.F | vtf.M1 | vtf.M2)) |> | |
dplyr::select(unilabel, vst_unit, condition, donor,simple, line, gene_name) |> | |
rename(out_vst = all_of(vst_unit)) | |
gg.tbF<-ggplot(tbF, aes(x= simple, y=out_vst)) + | |
facet_grid(rows = vars(donor), cols = vars(unilabel)) + ylim(miny,maxy) + | |
geom_boxplot(aes(fill=condition), outlier.shape = NA, notch = T) + scale_fill_manual(values = col.pal) | |
tb_med<-tbF %>% rstatix::group_by(donor, unilabel, simple) %>% | |
rstatix::get_summary_stats(out_vst) %>% add_column(group1 = "EU", .before = 1) %>% | |
mutate(group2 = simple) |> | |
pivot_wider(id_cols = c(donor, unilabel, group1), names_from=group2, values_from = median) |> | |
mutate(diff = XO-EU) |> mutate(group2 = "XO") | |
tb_mwup<- tbF %>% rstatix::group_by(donor, unilabel) %>% | |
rstatix::wilcox_test(out_vst ~ simple, p.adjust.method = "none", ref.group = "EU", conf.level = 0.9, paired = F) | |
tb_mwup2<-tb_mwup %>% rstatix::add_xy_position() %>% | |
mutate(y.position = maxy-0.5) %>% mutate(p.adj = signif(p,digits = 2)) | |
gg.tbF + add_pvalue(tb_mwup2, label = "p.adj", remove.bracket = T, tip.length = 0, bracket.size = 0, label.size = 3) + | |
theme(text = element_text(size = 8)) + theme(legend.position = "none") | |
ggsave(filename = paste0(outdir,"/Hm.markers_",anno_label,"_",vst_unit,".pdf"),width = 2.5, height=3, units = "in") | |
} | |
fx.markers(vsd_med_markers, "Frith", "vst_diff", -1.5, 1.5) | |
fx.markers(vsd_med_markers, "KarzbrunDEGs", "vst_diff", -1.5, 1.5) | |
fx.markers(vsd_med_markers, "Lee", "vst_diff", -1.5, 1.5) | |
fx.markers(vsd_med_markers, "SimoesCosta", "vst_diff", -1.5, 1.5) | |
saveRDS(vsd_abs_ae, file = paste0(outdir,"/vsd_abs_ae.rds")) | |
saveRDS(vsd_med_ae, file = paste0(outdir,"/vsd_med_ae.rds")) | |
saveRDS(vsd_abs_markers, file = paste0(outdir,"/vsd_abs_markers.rds")) | |
saveRDS(vsd_med_markers, file = paste0(outdir,"/vsd_med_markers.rds")) | |
#vsd_abs_ae<-readRDS(file = paste0(outdir,"/vsd_abs_ae.rds")) | |
#vsd_med_ae<-readRDS(file = paste0(outdir,"/vsd_med_ae.rds")) | |
#vsd_med_markers<-readRDS(file = paste0(outdir,"/vsd_med_markers.rds")) | |
vcounts<-ggplot(vsd_abs_ae %>% filter((vtf.F | vtf.M1 | vtf.M2) & gene_name %in% c(vc.PAIR[!(vc.PAIR == "MXRA5")],vc.PAIRY)) %>% | |
mutate(gene_name = case_when(gene_name == "UTY" ~ "KDM6AL", | |
T ~ gene_name)), aes(y = vst_count, x = condition)) + | |
facet_wrap( vars(gene_name), ncol =6, scales = "free") + scale_colour_manual(values=col.pal) + | |
geom_boxplot(outlier.shape = NA, fill = NA) + | |
geom_jitter(aes(color = clone), shape = 20, size = 1) + | |
theme(axis.text.x=element_blank(),axis.ticks.x=element_blank()) | |
vcounts | |
ggsave(paste0(outdir,"/PAIR_NCCs.pdf"), | |
plot = vcounts + # theme(legend.position = "botton", legend.box = "horizontal", text=element_text(size=7)) + | |
scale_color_manual(values = c("violet", "darkmagenta", "deepskyblue", "blue", "springgreen", "darkgreen")) | |
, width = 7, height =7, units = "in") | |
vcounts<-ggplot(vsd_abs_ae |>filter(gene_name %in% c("XIST", "XACT", "FTX", "JPX", "FIRRE") | | |
ENSEMBL %in% c("ENSG00000241743" | |
)), aes(y = vst_count, x = condition)) + | |
facet_wrap( vars(gene_name), ncol =6) + scale_colour_manual(values=col.pal) + | |
geom_boxplot(outlier.shape = NA, fill = NA) + | |
geom_jitter(aes(color = clone), shape = 20, size = 1) + | |
theme(axis.text.x=element_blank(),axis.ticks.x=element_blank()) | |
vcounts | |
ggsave(paste0(outdir,"/XlncRNA_NCCs.pdf"), | |
plot = vcounts, width = 5, height =2, units = "in") | |
vcounts<-ggplot(vsd_abs_ae %>% filter(vtf.PAR) %>% | |
mutate(gene_name = case_when(gene_name %in% c("SPRY3","VAMP7","WASH6P","TMLHE","NA") ~ paste0(".",gene_name), | |
T ~ gene_name)), aes(y = vst_count, x = condition)) + | |
facet_wrap( vars(gene_name), ncol =4) + scale_colour_manual(values=col.pal) + | |
geom_boxplot(outlier.shape = NA, fill = NA) + #ylim(c(9,13)) + | |
geom_jitter(aes(color = condition), shape = 20, size = 1) + | |
theme(axis.text.x=element_blank(),axis.ticks.x=element_blank()) | |
vcounts | |
ggsave(paste0(outdir,"/PAR_NCCs.pdf"), | |
plot = vcounts + theme(legend.position = "botton", legend.box = "horizontal", text=element_text(size=7)) + | |
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + scale_color_manual(values = c("violet", "darkmagenta", "deepskyblue", "blue", "springgreen", "darkgreen")) | |
, width = 7, height =7, units = "in") | |
vcounts<-ggplot(vsd_abs_ae %>% filter(str_detect(gene_name, "^HOX") & !str_detect(gene_name, "-AS")), aes(y = vst_count, x = condition)) + | |
facet_wrap( vars(gene_name), ncol =6) + scale_colour_manual(values=col.pal) + | |
geom_boxplot(outlier.shape = NA, fill = NA) + ylim(7.5,15) + | |
geom_jitter(aes(color = condition), shape = 20, size = 1) + | |
theme(axis.text.x=element_blank(),axis.ticks.x=element_blank()) | |
vcounts | |
ggsave(paste0(outdir,"/HOX_NCCs.pdf"), | |
plot = vcounts + theme(legend.position = "botton", legend.box = "horizontal", text=element_text(size=7)) + | |
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + scale_color_manual(values = c("violet", "darkmagenta", "deepskyblue", "blue", "springgreen", "darkgreen")) | |
, width = 7, height =7, units = "in") | |
vc.top<-c("RPL10","RPL10A","RPL11","RPL12","RPL13","RPL13A","RPL14","RPL15","RPL17", | |
"RPL18","RPL18A","RPL19","RPL21","RPL22","RPL23","RPL23A","RPL24","RPL26", | |
"RPL27","RPL27A","RPL28","RPL29","RPL3","RPL30","RPL31","RPL32","RPL34", | |
"RPL35","RPL35A","RPL36","RPL36A","RPL37","RPL37A","RPL38","RPL39","RPL3L", | |
"RPL4","RPL41","RPL5","RPL6","RPL7","RPL7A","RPL8","RPL9","RPLP0","RPLP1", | |
"RPLP2","RPSA","RPS10","RPS11","RPS12","RPS13","RPS14","RPS15","RPS15A", | |
"RPS16","RPS17","RPS18","RPS19","RPS2","RPS20","RPS21","RPS23","RPS24", | |
"RPS25","RPS26","RPS27","RPS27A","RPS28","RPS29","RPS3","RPS3A","RPS4X", | |
"RPS4Y1","RPS4Y2","RPS5","RPS6","RPS7","RPS8","RPS9") | |
df.top <- df |> filter((gene_name %in% c(vc.top)) & #(mXO2_padj <= 0.1) & | |
!(str_detect(gene_name, "[XY]")) | |
) |> | |
dplyr::select(gene_name, mXO1_log2FoldChange, mXO2_log2FoldChange, fXO_log2FoldChange, | |
mXO1_padj, mXO2_padj, fXO_padj) |> | |
pivot_longer(cols = !(c(gene_name)), names_sep = "_", names_to= c("set",".value")) |> | |
mutate(subunit = case_when(str_detect(gene_name, "RPS") ~ "RPS", | |
str_detect(gene_name, "RPL") ~ "RPL", | |
T ~ "other"), | |
significant = case_when(padj <= 0.1 ~ T, T ~ F)) | |
gghistogram(df.top, x="log2FoldChange", y = "count", color = "significant", | |
fill = "significant", add = "mean", facet.by = c("set","subunit"), | |
binwidth=0.02) + | |
scale_y_continuous(breaks=seq(0,12,3)) + | |
theme(text= element_text(size = 7)) | |
ggsave(paste0(outdir,"/rpls_l2fc.pdf"), device = "pdf", width=3, height =4) | |
vc.top2<-c("FAU", "UBF", | |
"EIF3A","EIF3E","EIF3F","EIF3H","EIF4B","PABPC1","EEF1A1","EEF1B2","EEF1D", | |
"EEF1G","EEF2","TCTP","HNRNPA1","NAP1L1","VIM", "NPM1", "RACK1") | |
vc.test<-df[which(df$gene_name %in% vc.top2 | |
& (vtf.M1 & vtf.M2)) | |
,"gene_name"] | |
vcounts<-ggplot(vsd_abs_ae %>% filter(gene_name %in% vc.test), | |
aes(y = vst_count, x = condition)) + | |
facet_wrap( vars(gene_name), ncol =6, scales = "free") + scale_colour_manual(values=col.pal) + | |
geom_boxplot(outlier.shape = NA, fill = NA) + | |
geom_jitter(aes(color = condition), shape = 20, size = 1) + | |
theme(axis.text.x=element_blank(), | |
axis.ticks.x=element_blank(), | |
legend.position = "top") | |
vcounts | |
ggsave(paste0(outdir,"/top2.pdf"), device = "pdf", width=6, height =3) | |
rm(vsd_abs_ae) | |
rm(vsd_med_ae) | |
rm(vsd_abs_markers) | |
rm(vsd_med_markers) |