Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
BAP_trophoblast_45X/DESEQ2_analysis_v04_TBLs_pass.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
421 lines (363 sloc)
30.4 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
outdir<-dirname(rstudioapi::getSourceEditorContext()$path) | |
require(DESeq2) | |
require(biomaRt) | |
require(tidyverse) | |
require(ggplot2) | |
# can pull in gene names with your favorite method, I'm using my master "df" which lists all gene_names from count table, except those filtered out | |
vc.ENSG<-t(matrix(unlist(strsplit(rownames(df), "[.]")),nrow=2))[,1] | |
require(org.Hs.eg.db) | |
require(clusterProfiler) | |
All.df <- bitr(vc.ENSG, fromType = "ENSEMBL", | |
toType = c("ENTREZID", "SYMBOL"), | |
OrgDb = org.Hs.eg.db) | |
All.df.ENSG<-All.df[!duplicated(All.df$ENSEMBL),] | |
rownames(All.df.ENSG)<-All.df.ENSG$ENSEMBL | |
fx.gg_col<-function(n) { | |
hues = seq(15, 375, length = n + 1) | |
hcl(h = hues, l = 65, c = 100)[1:n] | |
} | |
################################### | |
# RUN ANNOTATION FILE HERE # | |
################################### | |
source("./AhernTBL_annotation_v01_pass.R") | |
################################### | |
# RUN AE SCRIPT FILE HERE # | |
################################### | |
source("./AE_analysis_v01_pass.R") | |
pdf(paste0(outdir,"/Escapee_DESEQ.pdf"), width = 6, height= 6, onefile=T, paper="letter") | |
par(mfrow=c(2,2), mai = rep(0.6, 4), cex=0.6, cex.lab = 0.7, cex.axis = 0.7, omi = rep(0, 4)) | |
pch.FAIL<-5 | |
pch.PAR<-18 | |
pch.PAIR<-18 | |
pch.oESC<-18 | |
pch.Y<-18 | |
col.PAR=adjustcolor( "black", alpha.f = 0.7) | |
col.PAIR=adjustcolor( "coral", alpha.f = 0.7) | |
col.oESC=adjustcolor( "cyan3", alpha.f = 0.7) | |
col.FAIL=adjustcolor( "red", alpha.f = 0.7) | |
col.Y=adjustcolor( "blue", alpha.f = 0.7) | |
fcex=0.5 | |
pcex=0.7 | |
plot(df$fXO_log2FoldChange[which(vtf.F & vtf.PAIR)], -log(df$fXO_padj[which(vtf.F & vtf.PAIR)],10),pch=pch.PAIR, col=col.PAIR, cex=pcex, | |
xlim=c(-3,1),ylim=c(0,80),xlab = "log2FC XO-XX19/23", ylab = "-log10(p.adj) XO-XX19/23", main = "fXO vs. XX19/23") | |
points(df$fXO_log2FoldChange[which(vtf.F & vtf.PAR)], -log(df$fXO_padj[which(vtf.F & vtf.PAR)],10), pch=pch.PAR, col=col.PAR, cex=pcex) | |
points(df$fXO_log2FoldChange[which(vtf.F & vtf.oESC)], -log(df$fXO_padj[which(vtf.F & vtf.oESC)],10), pch=pch.oESC, col=col.oESC, cex=pcex) | |
points(df$fXO_log2FoldChange[which(vtf.F & vtf.FAIL)], -log(df$fXO_padj[which(vtf.F & vtf.FAIL)],10), pch=pch.FAIL, col=col.FAIL, cex=pcex) | |
text(df$fXO_log2FoldChange[which(vtf.F & vtf.PAIR)], | |
-log(df$fXO_padj[which(vtf.F & vtf.PAIR)],10), | |
labels = df$gene_name[which(vtf.F & vtf.PAIR)], col=col.PAIR,pos = 3, cex=fcex, offset=0.3) | |
text(df$fXO_log2FoldChange[which(vtf.F & vtf.PAR)], | |
-log(df$fXO_padj[which(vtf.F & vtf.PAR)],10), | |
labels = df$gene_name[which(vtf.F & vtf.PAR)], col=col.PAR,pos = 2, cex=fcex, offset=0.3) | |
text(df$fXO_log2FoldChange[which(vtf.F & vtf.oESC)], | |
-log(df$fXO_padj[which(vtf.F & vtf.oESC)],10), | |
labels = df$gene_name[which(vtf.F & vtf.oESC)], col=col.oESC,pos = 4, cex=fcex, offset=0.3) | |
abline(v=c(-0.3,0.3),h=1.3, lty=2) | |
plot(df$FE_log2FoldChange[which(vtf.FE & vtf.PAIR)], -log(df$FE_padj[which(vtf.FE & vtf.PAIR)],10),pch=pch.PAIR, col=col.PAIR, cex=pcex, | |
xlab = "log2FC XX6-XX19/23", ylab = "-log10(p.adj) XX6-XX19/23", main = "XX6 vs. XX19/23", xlim=c(-3,1),ylim=c(0,40)) | |
points(df$FE_log2FoldChange[which(vtf.FE & vtf.PAR)], -log(df$FE_padj[which(vtf.FE & vtf.PAR)],10), pch=pch.PAR, col=col.PAR, cex=pcex) | |
points(df$FE_log2FoldChange[which(vtf.FE & vtf.oESC)], -log(df$FE_padj[which(vtf.FE & vtf.oESC)],10), pch=pch.oESC, col=col.oESC, cex=pcex) | |
points(df$FE_log2FoldChange[which(vtf.FE & vtf.FAIL)], -log(df$FE_padj[which(vtf.FE & vtf.FAIL)],10), pch=pch.FAIL, col=col.FAIL, cex=pcex) | |
text(df$FE_log2FoldChange[which(vtf.FE & vtf.PAIR)], | |
-log(df$FE_padj[which(vtf.FE & vtf.PAIR)],10), | |
labels = df$gene_name[which(vtf.FE & vtf.PAIR)], col=col.PAIR,pos = 3, cex=fcex, offset=0.3) | |
text(df$FE_log2FoldChange[which(vtf.FE & vtf.PAR)], | |
-log(df$FE_padj[which(vtf.FE & vtf.PAR)],10), | |
labels = df$gene_name[which(vtf.FE & vtf.PAR)], col=col.PAR,pos = 2, cex=fcex, offset=0.3) | |
text(df$FE_log2FoldChange[which(vtf.FE & vtf.oESC)], | |
-log(df$FE_padj[which(vtf.FE & vtf.oESC)],10), | |
labels = df$gene_name[which(vtf.FE & vtf.oESC)], col=col.oESC,pos = 4, cex=fcex, offset=0.3) | |
abline(v=c(-0.3,0.3),h=1.3, lty=2) | |
plot(df$fXO2_log2FoldChange[which(vtf.F2 & vtf.PAIR)], -log(df$fXO2_padj[which(vtf.F2 & vtf.PAIR)],10),pch=pch.PAIR, col=col.PAIR,cex=pcex, | |
xlab = "log2FC XO-XX6", ylab = "-log10(p.adj) XO-XX6", main = "fXO vs. XX6", xlim=c(-3,1),ylim=c(0,30)) | |
points(df$fXO2_log2FoldChange[which(vtf.F2 & vtf.PAR)], -log(df$fXO2_padj[which(vtf.F2 & vtf.PAR)],10), pch=pch.PAR, col=col.PAR, cex=pcex) | |
points(df$fXO2_log2FoldChange[which(vtf.F2 & vtf.oESC)], -log(df$fXO2_padj[which(vtf.F2 & vtf.oESC)],10), pch=pch.oESC, col=col.oESC, cex=pcex) | |
points(df$fXO2_log2FoldChange[which(vtf.F2 & vtf.FAIL)], -log(df$fXO2_padj[which(vtf.F2 & vtf.FAIL)],10), pch=pch.FAIL, col=col.FAIL, cex=pcex) | |
text(df$fXO2_log2FoldChange[which(vtf.F2 & vtf.PAIR)], | |
-log(df$fXO2_padj[which(vtf.F2 & vtf.PAIR)],10), | |
labels = df$gene_name[which(vtf.F2 & vtf.PAIR)], col=col.PAIR,pos = 3, cex=fcex, offset=0.3) | |
text(df$fXO2_log2FoldChange[which(vtf.F2 & vtf.PAR)], | |
-log(df$fXO2_padj[which(vtf.F2 & vtf.PAR)],10), | |
labels = df$gene_name[which(vtf.F2 & vtf.PAR)], col=col.PAR,pos = 2, cex=fcex, offset=0.3) | |
text(df$fXO2_log2FoldChange[which(vtf.F2 & vtf.oESC)], | |
-log(df$fXO2_padj[which(vtf.F2 & vtf.oESC)],10), | |
labels = df$gene_name[which(vtf.F2 & vtf.oESC)], col=col.oESC,pos = 4, cex=fcex, offset=0.3) | |
abline(v=c(-0.3,0.3),h=1.3,lty=2) | |
plot(df$mXO_log2FoldChange[which(vtf.M & vtf.Y)], -log(df$mXO_padj[which(vtf.M & vtf.Y)],10),pch=pch.PAIR, col=col.Y,cex=pcex, | |
xlab = "log2FC XO-XY", ylab = "-log10(p.adj) XO-XY", main = "mXO vs. XY", xlim=c(-14,1), ylim=c(0,60)) | |
points(df$mXO_log2FoldChange[which(vtf.M& vtf.PAR)], -log(df$mXO_padj[which(vtf.M & vtf.PAR)],10), pch=pch.PAR, col=col.PAR, cex=pcex) | |
points(df$mXO_log2FoldChange[which(vtf.M& vtf.oESC)], -log(df$mXO_padj[which(vtf.M & vtf.oESC)],10), pch=pch.oESC, col=col.oESC, cex=pcex) | |
text(df$mXO_log2FoldChange[which(vtf.M & vtf.Y)], | |
-log(df$mXO_padj[which(vtf.M & vtf.Y)],10), | |
labels = df$gene_name[which(vtf.M & vtf.Y)], col=col.Y,pos = 1, cex=fcex, offset=0.3) | |
text(df$mXO_log2FoldChange[which(vtf.M& vtf.PAR)], | |
-log(df$mXO_padj[which(vtf.M& vtf.PAR)],10), | |
labels = df$gene_name[which(vtf.M& vtf.PAR)], col=col.PAR,pos = 2, cex=fcex, offset=0.3) | |
text(df$mXO_log2FoldChange[which(vtf.M& vtf.oESC)], | |
-log(df$mXO_padj[which(vtf.M& vtf.oESC)],10), | |
labels = df$gene_name[which(vtf.M& vtf.oESC)], col=col.oESC,pos = 2, cex=fcex, offset=0.3) | |
abline(v=c(-0.3,0.3),h=1.3, lty=2) | |
dev.off() | |
require(tidyHeatmap) | |
library(circlize) | |
vtf.ae<-(!is.na(mtf.esc[,grep("C6XX",colnames(mtf.esc))]) & !is.na(mtf.esc[,grep("C19XX",colnames(mtf.esc))]) & !is.na(mtf.esc[,grep("C23XX",colnames(mtf.esc))])) | |
vtf.OFF<-(vtf.X & !vtf.oESC & !vtf.PAR & !vtf.PAIR & (rownames(df) %in% rownames(mtf.esc[vtf.ae,]))) | |
df2<-cbind(df, vtf.F, vtf.F2, vtf.FE, vtf.M, vtf.O, vtf.FM, vtf.MF, vtf.E, | |
fXO = vtf.F * df$fXO_log2FoldChange, fXO2 = vtf.F2 * df$fXO2_log2FoldChange, FE = vtf.FE * df$FE_log2FoldChange, | |
mXO = vtf.M * df$mXO_log2FoldChange, fmO = vtf.O * df$fmO_log2FoldChange, FM = vtf.FM * df$FM_log2FoldChange, | |
MF = vtf.MF * df$MF_log2FoldChange, fmE = vtf.E * df$fmE_log2FoldChange, vst.med = rowMedians(assay(vsd3)), | |
vtf.X, vtf.Y, vtf.PAR, vtf.PAIR, vtf.PAIRY, vtf.ESC, vtf.oESC, vtf.OFF) | |
stopifnot( all( rownames(ratio.fem.AB) == rownames(pBi.fem.AB) & | |
rownames(ratio.fem.AB) == rownames(mtf.esc))) | |
df.ae<-merge(df2, cbind(ratio.fem.AB[,-c(1:3)], pBi.fem.AB[,-c(1:3)], mtf.esc[,-c(1:3)], vtf.ae), by="row.names",all.x=T) | |
rownames(df.ae)<-df.ae[,1] | |
df.ae<-df.ae[order(match(rownames(df.ae), rownames(df))),-1] | |
#vsd_abs contains SV-corrected vst counts | |
vsd_abs_ae <- as.data.frame(assay(vsd3)) %>% | |
rownames_to_column("ensembl_id") %>% pivot_longer(-ensembl_id, names_to="sample_name", values_to="vst_count") %>% | |
left_join(dds@colData %>% as.data.frame %>% rownames_to_column(var="sample_name")) %>% | |
left_join(df.ae %>% rownames_to_column(var="ensembl_id")) | |
vsd_abs_markers <- left_join(vsd_abs_ae, df.chhabra %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Chhabra)) %>% | |
left_join(df.xiang %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Xiang)) %>% | |
left_join(df.west %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, West)) %>% | |
left_join(df.khan %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Khan)) %>% | |
left_join(df.zhou %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Zhou)) %>% | |
left_join(df.hsiao %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Hsiao)) | |
#vsd_med contains SV-corrected & row-median normalized vst counts | |
vsd_med_ae <- as.data.frame(assay(vsd3)-rowMedians(assay(vsd3))) %>% | |
rownames_to_column("ensembl_id") %>% pivot_longer(-ensembl_id, names_to="sample_name", values_to="vst_diff") %>% | |
left_join(dds@colData %>% as.data.frame %>% rownames_to_column(var="sample_name")) %>% | |
left_join(df.ae %>% rownames_to_column(var="ensembl_id")) %>% left_join(as_tibble(df.Tuk2017[,c(5:6,10)] |> rownames_to_column("ENSEMBL")), by = "ENSEMBL") %>% | |
mutate(Reported = case_when(!(gene_name %in% df.Tuk2017$SYMBOL[df.Tuk2017$Reported_XCI_status %in% c("Escape","Variable")]) & | |
(gene_name %in% c("IRAK1","MBTPS2", "SMS") | | |
gene_name %in% rownames(df.Katsir2019) | | |
gene_name %in% rownames(df.Sauterad2021 %>% filter(str_detect(XCI.status,"E")))) ~ "VarOther", | |
TRUE ~ Reported_XCI_status)) %>% | |
mutate_at(vars("Reported"), ~replace_na(.,"Unknown")) | |
vsd_med_markers<-left_join(vsd_med_ae, df.chhabra %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Chhabra)) %>% | |
left_join(df.xiang %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Xiang)) %>% | |
left_join(df.west %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, West)) %>% | |
left_join(df.khan %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Khan)) %>% | |
left_join(df.zhou %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Zhou)) %>% | |
left_join(df.hsiao %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Hsiao)) | |
require(grid) | |
require(gridExtra) | |
#Escape vs. DESEQ2 stats and med_vsd | |
fx.col.med = colorRamp2(c(-2, 0, 2), c("blue", "white", "red")) | |
tab.sign.All<-table(sign(df[which(vtf.F & vtf.M & vtf.FM),c("fXO_stat", "mXO_stat", "FM_stat")])) | |
tab.sign.notM<-table(sign(df[which(vtf.F & !vtf.M & vtf.FM),c("fXO_stat", "FM_stat")])) | |
tab.sign.notF<-table(sign(df[which(!vtf.F & vtf.M & vtf.FM),c("mXO_stat", "FM_stat")])) | |
tab.sign.notFM<-table(sign(df[which(vtf.F & vtf.M & !vtf.FM),c("fXO_stat", "mXO_stat")])) | |
p.sign.All<-signif(pbinom(sum(tab.sign.All[c(1,length(tab.sign.All))]), size = sum(tab.sign.All),prob = 0.25,lower.tail = F),2) | |
p.sign.notM<-signif(pbinom(sum(tab.sign.notM[c(1,length(tab.sign.notM))]), size = sum(tab.sign.notM),prob = 0.5,lower.tail = F),2) | |
p.sign.notF<-signif(pbinom(sum(tab.sign.notF[c(1,length(tab.sign.notF))]), size = sum(tab.sign.notF),prob = 0.5,lower.tail = F),2) | |
p.sign.notFM<-signif(pbinom(sum(tab.sign.notFM[c(1,length(tab.sign.notFM))]), size = sum(tab.sign.notFM),prob = 0.5,lower.tail = F),2) | |
label.All<-paste0("All (", sum(tab.sign.All[c(1,length(tab.sign.All))]),"/",sum(tab.sign.All), ") ",p.sign.All) | |
label.notM<-paste0("notM (", sum(tab.sign.notM[c(1,length(tab.sign.notM))]),"/",sum(tab.sign.notM), ") ",p.sign.notM) | |
label.notFM<-paste0("notFM (", sum(tab.sign.notFM[c(1,length(tab.sign.notFM))]),"/",sum(tab.sign.notFM), ") ",p.sign.notFM) | |
label.notF<-paste0("notF (", sum(tab.sign.notF[c(1,length(tab.sign.notF))]),"/",sum(tab.sign.notF), ") ",p.sign.notF) | |
tb1<-vsd_med_ae %>% filter((sign(fXO_stat) == sign(mXO_stat)) & (sign(fXO_stat) == sign(FM_stat)) & vtf.F & vtf.FM & vtf.M) %>% add_column(DEGo = label.All) %>% add_column(Dir = "conc.") | |
tb2<-vsd_med_ae %>% filter((sign(fXO_stat) == sign(FM_stat)) & vtf.F & vtf.FM & !vtf.M) %>% add_column(DEGo = label.notM) %>% add_column(Dir = "conc.") | |
tb3<-vsd_med_ae %>% filter((sign(fXO_stat) == sign(mXO_stat)) & vtf.F & !vtf.FM & vtf.M) %>% add_column(DEGo = label.notFM) %>% add_column(Dir = "conc.") | |
tb4<-vsd_med_ae %>% filter((sign(FM_stat) == sign(mXO_stat)) & !vtf.F & vtf.FM & vtf.M) %>% add_column(DEGo = label.notF) %>% add_column(Dir = "conc.") | |
tb5<-vsd_med_ae %>% filter(((sign(fXO_stat) != sign(mXO_stat)) | (sign(fXO_stat) != sign(FM_stat))) & vtf.F & vtf.FM & vtf.M) %>% add_column(DEGo = label.All) %>% add_column(Dir = "disc.") | |
tb6<-vsd_med_ae %>% filter((sign(fXO_stat) != sign(FM_stat)) & vtf.F & vtf.FM & !vtf.M) %>% add_column(DEGo = label.notM) %>% add_column(Dir = "disc.") | |
tb7<-vsd_med_ae %>% filter((sign(fXO_stat) != sign(mXO_stat)) & vtf.F & !vtf.FM & vtf.M) %>% add_column(DEGo = label.notFM) %>% add_column(Dir = "disc.") | |
tb8<-vsd_med_ae %>% filter((sign(FM_stat) != sign(mXO_stat)) & !vtf.F & vtf.FM & vtf.M) %>% add_column(DEGo = label.notF) %>% add_column(Dir = "disc.") | |
tb<- bind_rows(tb1, tb2, tb3, tb4, tb5, tb6, tb7, tb8) | |
tbhm<- tb %>% | |
group_by(DEGo) %>% | |
heatmap(gene_name, sample_name, vst_diff, .scale="none", palette_value = fx.col.med, | |
palette_grouping = list(rep("white",length(table(tb$DEGo))) )) %>% | |
add_bar(fXO_stat) %>% add_bar(mXO_stat) %>% add_bar(FM_stat) %>% add_tile(Dir, palette=c("grey30","grey")) %>% | |
add_tile(condition, palette = c("blue", "deepskyblue", "mediumpurple", "violet", "darkmagenta")) | |
save_pdf(tbhm,filename = paste0(outdir,"/Global_DEGs_TBLs.pdf"),width = 7.5, height=5, units = "in") | |
#Escape vs. DESEQ2 stats and med_vsd | |
#can lower l2fc slightlyfrom (-)0.3 to (-)0.25 for inclusion of EIF2S3 and IRAK1 | |
fx.col.med = colorRamp2(c(-2, 0, 2), c("blue", "white", "red")) | |
tb1<-vsd_med_ae %>% filter(vtf.PAR & | |
((fXO_padj <= ap & fXO_log2FoldChange <= -l2fc) | | |
(fXO2_padj <= ap & fXO2_log2FoldChange <= -l2fc) | | |
(mXO_padj <= ap & mXO_log2FoldChange <= -l2fc)) | |
) %>% add_column(Strata = ".PAR") | |
tb2<-vsd_med_ae %>% filter(vtf.PAIR & | |
((fXO_padj <= ap & fXO_log2FoldChange <= -l2fc) | | |
(fXO2_padj <= ap & fXO2_log2FoldChange <= -l2fc)) | |
) %>% add_column(Strata = "Pair") | |
tb3<-vsd_med_ae %>% filter(vtf.oESC & | |
(ae.esc_C6XX & ae.esc_C19XX & ae.esc_C23XX) & | |
((fXO_padj <= ap & fXO_log2FoldChange <= -l2fc) | | |
(fXO2_padj <= ap & fXO2_log2FoldChange <= -l2fc)) | |
) %>% add_column(Strata = "X_specific_all") | |
tb4<-vsd_med_ae %>% filter((!(ensembl_id %in% tb3$ensembl_id)) & | |
vtf.oESC & | |
((ae.esc_C6XX & ae.esc_C19XX) | (ae.esc_C19XX & ae.esc_C23XX) | (ae.esc_C6XX & ae.esc_C23XX)) & | |
((fXO_padj <= ap & fXO_log2FoldChange <= -l2fc) | | |
(fXO2_padj <= ap & fXO2_log2FoldChange <= -l2fc)) | |
) %>% add_column(Strata = "X-specific2") | |
tb5<-vsd_med_ae %>% filter(!(ensembl_id %in% tb3$ensembl_id) & | |
!(ensembl_id %in% tb4$ensembl_id) & | |
vtf.oESC & | |
((fXO_padj <= ap & fXO_log2FoldChange <= -l2fc) | | |
(fXO2_padj <= ap & fXO2_log2FoldChange <= -l2fc)) | |
) %>% add_column(Strata = "X-specific3") | |
tb_HmXvstDiff<- bind_rows(tb1, tb2, tb3, tb4, tb5) | |
tbhm<- tb_HmXvstDiff %>% | |
group_by(Strata) %>% | |
heatmap(gene_name, sample_name, vst_diff, .scale="none", palette_value = fx.col.med, row_names_gp = gpar(fontsize = 6), cluster_rows = F, | |
palette_grouping = list(rep("white",length(table(tb$Strata))) )) %>% | |
add_bar("ae.ratio_C19XX") %>% add_bar("ae.ratio_C23XX") %>% add_bar("ae.ratio_C6XX") %>% | |
add_bar(fXO) %>% add_bar(fXO2) %>% add_bar(mXO) %>% add_tile(condition, palette = c("blue", "deepskyblue", "mediumpurple", "violet", "darkmagenta")) %>% | |
add_tile(Reported, palette = list("Escape" = "yellow1", "Inactive" = "grey27", "Variable" = "lightgreen", "Unknown" = "grey", "VarOther" = "lightseagreen")) | |
save_pdf(tbhm,filename = paste0(outdir,"/HmXvstDiff_allelic_TBLs2e.pdf"),width = 8, height=8, units = "in") | |
# choose annotation suffix in grep, as well as annotation first author in is.na() and group_by() calls, and customize add_bar args | |
#choose either vst medians of df.Markers.abs or df.Markers.med (median-normalized) | |
# when "med" is used, supply fx.col.med for color. In addition, vst_counts changes to vsd_diff and the grep from df can be commented out | |
vc1<-rownames(df[df$gene_name %in% c("GATA2", "MSX2", "GATA3", "TFAP2A", "TFAP2C"),]) | |
vc2<-rownames(df[df$gene_name %in% c("TBX3", "HAND1", "ANKRD1", "ISL1", "CDX2", "NR2F2", "DLX3", "ARID5B"),]) | |
vc3<-rownames(df[df$gene_name %in% c("EPAS1", "PPARG", "VGLL1", "MEIS1", "HOXB2", "BARX2", "TP63", "GCM1", "LCP1", "CEBPA", "BNC1", "GRHL1", "MEIS2", "TFAP2B", "SMARCA2"),]) | |
tb1<-vsd_med_markers %>% filter(ensembl_id %in% vc1) %>% add_column(Krendl = "early") | |
tb2<-vsd_med_markers %>% filter(ensembl_id %in% vc2) %>% add_column(Krendl = "intermediate") | |
tb3<-vsd_med_markers %>% filter(ensembl_id %in% vc3) %>% add_column(Krendl = "late") | |
tb<- bind_rows(tb1, tb2, tb3) | |
tbhm<- tb %>% dplyr::select(Krendl, condition, gene_name, sample_name, vst_diff, fXO_log2FoldChange, mXO_log2FoldChange, vst.med) %>% group_by(Krendl, condition) %>% | |
heatmap(gene_name, sample_name, vst_diff, .scale="none", row_names_gp = gpar(fontsize = 7),palette_value = fx.col.med,#cluster_rows=F, | |
palette_grouping = list(rep("white",3), c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue" ))) %>% | |
add_bar(fXO_log2FoldChange) %>% add_bar(mXO_log2FoldChange) %>% | |
add_tile(vst.med) %>% add_tile(condition) | |
save_pdf(tbhm,filename = paste0(outdir,"/Krendl.pdf"),width = 7.5, height=4, units = "in") | |
df.Markers.tb<-df.Markers.abs[,grep(".C$", colnames(df.Markers.abs))]-mean(df.Markers.abs[,grep("E-AM.C$", colnames(df.Markers.abs))]) | |
df.Markers_tb <- df.Markers.abs %>% | |
rownames_to_column("sample_name") %>% pivot_longer(-sample_name, names_to="Marker", values_to="vst_abs") %>% | |
left_join(dds@colData %>% as.data.frame %>% rownames_to_column(var="sample_name")) | |
gb2.chhabra<-ggplot(vsd_abs_markers %>% filter(!is.na(Chhabra)), aes(y = vst_count, x = condition)) + | |
facet_wrap( vars(Chhabra), ncol=length(grep(".C$", colnames(df.Markers.abs)))) + | |
geom_boxplot(notch = T, aes(col = condition), fill = NA, outlier.alpha = 0.1) + theme(axis.text.x=element_blank()) + scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue")) | |
gb2.zhou<-ggplot(vsd_abs_markers %>% filter(!is.na(Zhou)), aes(y = vst_count, x = condition)) + | |
facet_wrap( vars(Zhou), ncol=length(grep(".Z$", colnames(df.Markers.abs)))) + | |
geom_boxplot(notch = T, aes(col = condition), fill = NA, outlier.alpha = 0.1) + theme(axis.text.x=element_blank()) + scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue")) | |
ls.gb2.plots<-list(gb2.zhou + scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue")) + theme(legend.position = "none") , | |
gb2.chhabra + scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue")) | |
) | |
ggsave(paste0(outdir,"/notch.med.Marker.pdf"), grid.arrange(grobs = ls.gb2.plots, nrow=1), | |
device = "pdf", width=7.5, height = 2.5, units = "in") | |
require(rstatix) | |
require(ggprism) | |
require(patchwork) | |
require(magrittr) | |
require(coin) | |
require(viridis) | |
tb.CZ<-vsd_abs_markers %>% filter(!is.na(Chhabra) & is.na(Zhou)) %>% mutate(unilabel = paste0(".",as.character(Chhabra))) %>% dplyr::select(unilabel, vst_count, condition) | |
tb.ZC<-vsd_abs_markers %>% filter(!is.na(Zhou) & is.na(Chhabra)) %>% mutate(unilabel = as.character(Zhou)) %>% dplyr::select(unilabel, vst_count, condition) | |
tb.CX<-vsd_abs_markers %>% filter(!is.na(Chhabra) & is.na(Xiang)) %>% mutate(unilabel = paste0(".",as.character(Chhabra))) %>% dplyr::select(unilabel, vst_count, condition) | |
tb.XC<-vsd_abs_markers %>% filter(!is.na(Xiang) & is.na(Chhabra)) %>% mutate(unilabel = as.character(Xiang)) %>% dplyr::select(unilabel, vst_count, condition) | |
gb.CZ<-ggplot(bind_rows(tb.CZ, tb.ZC), aes(y = vst_count, x = unilabel)) + | |
facet_wrap( vars(condition), nrow = 1) + | |
geom_boxplot(aes(fill=unilabel), notch = T, outlier.shape = NA) | |
gb.CX<-ggplot(bind_rows(tb.CX, tb.XC), aes(y = vst_count, x = unilabel)) + | |
facet_wrap( vars(condition), nrow = 1) + | |
geom_boxplot(aes(fill=unilabel), notch = T, outlier.shape = NA) | |
tb_mwup_CZ<-bind_rows(tb.CZ, tb.ZC) %>% rstatix::group_by(condition) %>% | |
rstatix::wilcox_test(vst_count ~ unilabel, p.adjust.method = "BH", ref.group = ".TE.C", conf.level = 0.95) | |
tb_vsum_CZ<-bind_rows(tb.CZ, tb.ZC) %>% group_by(condition, unilabel) %>% rstatix::get_summary_stats("vst_count") %>% | |
add_column(group1 = ".TE.C", .before = 1) %>% rename("group2" = unilabel) | |
tb_mwup_CX<-bind_rows(tb.CX, tb.XC) %>% rstatix::group_by(condition) %>% | |
rstatix::wilcox_test(vst_count ~ unilabel, p.adjust.method = "BH", ref.group = ".TE.C", conf.level = 0.95) | |
tb_vsum_CX<-bind_rows(tb.CX, tb.XC) %>% group_by(condition, unilabel) %>% rstatix::get_summary_stats("vst_count") %>% | |
add_column(group1 = ".TE.C", .before = 1) %>% rename("group2" = unilabel) | |
ls.gb_marker.plots<-list( | |
CZ = gb.CZ + coord_flip() + theme(legend.position = "none") + scale_fill_brewer(palette = "Set3") + theme(text=element_text(size=7)) + | |
add_pvalue(tb_mwup_CZ %>% rstatix::add_xy_position() %>% mutate(y.position = 15, p.adj = signif(p.adj,digits = 0)), label = "p.adj", label.size = 2, remove.bracket = TRUE) + | |
add_pvalue(tb_vsum_CZ %>% mutate(y.position = 5.5) %>% mutate(median = signif(median,3)), label = "median", label.size = 2, remove.bracket = TRUE), | |
CX = gb.CX + coord_flip() + theme(legend.position = "none") + scale_fill_brewer(palette = "Set3") + theme(text=element_text(size=7)) + | |
add_pvalue(tb_mwup_CX %>% rstatix::add_xy_position() %>% mutate(y.position = 15, p.adj = signif(p.adj,digits = 0)), label = "p.adj",label.size = 2, remove.bracket = TRUE) + | |
add_pvalue(tb_vsum_CX %>% mutate(y.position = 5.5) %>% mutate(median = signif(median,3)), label = "median",label.size = 2, remove.bracket = TRUE)) | |
ggsave("Marker.cond.vst.pdf", grid.arrange(grobs = ls.gb_marker.plots, nrow=2), | |
device = "pdf", width=7.5, height=7.5, units = "in") | |
#vtf.ESC2 is identical to vtf.ESC, except that it includes all PAR and PAIR genes, irrespective of whether they escape/are expressed | |
vtf.ESC2<-(vtf.oESC | vtf.PAR | vtf.PAIR) | |
dds<-dds2 | |
mcols(dds)<-cbind(mcols(dds), basepairs= as.numeric(metadata(dds)[rownames(mcols(dds)),"Length"])) | |
fpkm_abs_ae <- as.data.frame(fpkm(dds,robust = T)) %>% | |
rownames_to_column("ensembl_id") %>% pivot_longer(-ensembl_id, names_to="sample_name", values_to="FPKM") %>% | |
left_join(dds@colData %>% as.data.frame %>% rownames_to_column(var="sample_name")) %>% | |
left_join(df.ae %>% rownames_to_column(var="ensembl_id")) | |
fpkm_abs_ae2<- fpkm_abs_ae %>% add_column( | |
auto.all = deframe(fpkm_abs_ae %>% filter(!(Chr %in% c("chrX","chrY","chrM")) & FPKM >= 1) %>% | |
summarise(auto.all = median(`FPKM`, na.rm = TRUE)))) | |
tb0.fpkm<-fpkm_abs_ae2 %>% filter(!(Chr %in% c("chrX", "chrY", "chrM")) & FPKM >=1) %>% add_column(Strata = "Autosomal") %>% mutate(fpkm_ratio = (FPKM/auto.all)) | |
tb1.fpkm<-fpkm_abs_ae2 %>% filter(vtf.PAR & FPKM >=1) %>% add_column(Strata = "PAR") %>% mutate(fpkm_ratio = (FPKM/auto.all)) | |
tb2.fpkm<-fpkm_abs_ae2 %>% filter(vtf.PAIR & FPKM >=1) %>% add_column(Strata = "PAIR") %>% mutate(fpkm_ratio = (FPKM/auto.all)) | |
tb3.fpkm<-fpkm_abs_ae2 %>% filter(vtf.oESC & FPKM >=1) %>% add_column(Strata = "oESC") %>% mutate(fpkm_ratio = (FPKM/auto.all)) | |
tb4.fpkm<-fpkm_abs_ae2 %>% filter(vtf.X & !vtf.ESC2 & FPKM >=1) %>% add_column(Strata = ".Silenced")%>% mutate(fpkm_ratio = (FPKM/auto.all)) | |
tb.fpkm<-bind_rows(tb0.fpkm, tb1.fpkm, tb2.fpkm, tb3.fpkm, tb4.fpkm) %>% dplyr::select(Strata, condition, FPKM, auto.all, fpkm_ratio) | |
tb_mwu_fp1<-tb.fpkm %>% rstatix::group_by(Strata) %>% | |
rstatix::wilcox_test(fpkm_ratio ~ condition, p.adjust.method = "none", ref.group = "X1_femaleXO", conf.level = 0.95) | |
tb_fsum<-tb.fpkm %>% group_by(condition, Strata) %>% rstatix::get_summary_stats("fpkm_ratio") %>% #filter(Strata != "Autosomal") %>% | |
add_column(group1 = "Autosomal", .before = 1) %>% dplyr::rename("group2" = Strata) | |
gb.counts.f2<-ggplot(tb.fpkm, aes(y = fpkm_ratio, x = reorder(Strata, desc(Strata)))) + | |
facet_wrap( vars(condition), nrow = 5) + #, scales = "free_y") + | |
geom_boxplot(aes(col = condition), fill = NA, notch=T, outlier.shape = NA) + scale_y_continuous(trans='log10') + coord_flip() + | |
scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue")) | |
vsd_abs_ae2<-vsd_abs_ae %>% add_column( | |
auto.all = deframe(vsd_abs_ae %>% filter(!(Chr %in% c("chrX","chrY","chrM"))) %>% summarise(auto.all = median(`vst_count`, na.rm = TRUE)))) | |
tb0.vsd<-vsd_abs_ae2 %>% filter(!(Chr %in% c("chrX", "chrY", "chrM"))) %>% add_column(Strata = "Autosomal") %>% mutate(vst_diff = (vst_count-auto.all)) | |
tb1.vsd<-vsd_abs_ae2 %>% filter(vtf.PAR) %>% add_column(Strata = "PAR") %>% mutate(vst_diff = (vst_count-auto.all)) | |
tb2.vsd<-vsd_abs_ae2 %>% filter(vtf.PAIR) %>% add_column(Strata = "PAIR") %>% mutate(vst_diff = (vst_count-auto.all)) | |
tb3.vsd<-vsd_abs_ae2 %>% filter(vtf.oESC) %>% add_column(Strata = "oESC") %>% mutate(vst_diff = (vst_count-auto.all)) | |
tb4.vsd<-vsd_abs_ae2 %>% filter(vtf.X & !vtf.ESC2) %>% add_column(Strata = ".Silenced")%>% mutate(vst_diff = (vst_count-auto.all)) | |
tb.vsd<-bind_rows(tb0.vsd, tb1.vsd, tb2.vsd, tb3.vsd, tb4.vsd) %>% dplyr::select(Strata, condition, vst_count, auto.all, vst_diff) | |
tb_mwu_vp1<-tb.vsd %>% rstatix::group_by(Strata) %>% | |
rstatix::wilcox_test(vst_diff ~ condition, p.adjust.method = "none", ref.group = "X1_femaleXO", conf.level = 0.95) | |
tb_vsum<-tb.vsd %>% group_by(condition, Strata) %>% rstatix::get_summary_stats("vst_diff") %>% | |
add_column(group1 = "Autosomal", .before = 1) %>% rename("group2" = Strata) | |
gb.counts.v1<-ggplot(tb.vsd, aes(y = vst_diff, x = reorder(condition, desc(condition)))) + | |
facet_wrap( vars(Strata), nrow = 6) + #, scales = "free_y") + | |
geom_boxplot(aes(col = condition), fill = NA, notch=T, outlier.shape = NA) | |
ls.gbvf.plots<-list(gb.counts.v1 + coord_flip() + theme(legend.position = "none", text=element_text(size=7)) + scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue")) + | |
add_pvalue(tb_mwu_vp1 %>% rstatix::add_xy_position(fun = "max") %>% filter(p.adj <= 0.1) %>% mutate(y.position = 10) | |
, label = "p.adj", remove.bracket = TRUE), | |
gb.counts.f2 + coord_flip() + theme(legend.position = "none", text=element_text(size=7)) + scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue")) + | |
add_pvalue(tb_fsum %>% mutate(y.position = 200) %>% mutate(median = signif(median,2)) | |
, label = "median", remove.bracket = TRUE) | |
) | |
ggsave(paste0(outdir,"/XA.vstfpkm.pdf"), grid.arrange(grobs = ls.gbvf.plots, ncol=length(ls.gbvf.plots)), | |
device = "pdf", width=3.5, height=4, units = "in") | |
vsd_abs_ae2$condition <- factor(vsd_abs_ae2$condition, levels = c("X1_femaleXX", "X2_femaleXX", "X1_femaleXO", "X3_maleXY", "X3_maleXO")) | |
vcounts<-ggplot(vsd_abs_ae2 %>% filter(gene_name %in% c("FTX","JPX","XIST","XACT")), aes(y = vst_count, x = condition)) + | |
facet_wrap( vars(gene_name), scales = "free_y", nrow =1) + | |
geom_jitter(aes(color = condition), shape = 20, size = 1) + scale_color_manual(values = c("darkmagenta", "mediumpurple", "violet", "blue", "deepskyblue")) + | |
geom_boxplot(outlier.shape = NA, fill = NA) | |
ggsave(paste0(outdir,"/XIST.pdf"), | |
plot = vcounts + theme(legend.position = "botton", legend.box = "horizontal", text=element_text(size=7)) + | |
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + scale_color_manual(values = c("darkmagenta", "mediumpurple", "violet", "blue", "deepskyblue")) | |
, width = 4, height =2, units = "in") | |
vcounts<-ggplot(vsd_abs_ae2 %>% filter(gene_name %in% c("TFAP2A", "TFAP2C", "GATA2", "GATA3", "HLA-G", "PGF")), aes(y = vst_count, x = condition)) + | |
facet_wrap( vars(gene_name), scales = "free_y", nrow =2) + | |
geom_jitter(aes(color = condition), shape = 20, size = 1) + | |
geom_boxplot(outlier.shape = NA, fill = NA) | |
vcounts | |
ggsave(paste0(outdir,"/YFGs.pdf"), | |
plot = vcounts + theme(legend.position = "botton", legend.box = "horizontal", text=element_text(size=7)) + | |
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + scale_color_manual(values = c("darkmagenta", "mediumpurple", "violet", "blue", "deepskyblue")) | |
, width = 3, height =2.5, units = "in") | |
vc.PAIR<-c("ZFX","TXLNG", "TBL1X", "USP9X", "KDM5C", "NLGN4X", "KDM6A" , "EIF1AX", "PRKX", "RPS4X", "TMSB4X", "DDX3X") #"MXRA5", "TSPYL2") | |
vc.PAIRY<-c("ZFY","TXLNGY", "TBL1Y", "USP9Y", "KDM5D", "NLGN4Y", "UTY" , "EIF1AY", "PRKY", "RPS4Y1", "TMSB4Y", "DDX3Y") #"MXRA5Y","TSPY1") | |
vcounts<-ggplot(vsd_abs_ae2 %>% filter(gene_name %in% vc.PAIRY) #c("RPS4Y1", "DDX3Y", "KDM5D", "USP9Y", "EIF1AY", "UTY", "NLGN4Y", "TXLNGY", "TMSB4Y", "ZFY", "PRKY", "TBL1Y")) | |
, aes(y = vst_count, x = condition)) + | |
facet_wrap( vars(gene_name), nrow =4) + | |
geom_jitter(aes(color = condition), shape = 20, size = 1) + | |
geom_boxplot(outlier.shape = NA, fill = NA) + ylim(c(5, 16)) | |
ggsave("Ypair_TBLs.pdf", | |
plot = vcounts + theme(legend.position = "botton", legend.box = "horizontal", text=element_text(size=7)) + | |
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue")) | |
, width = 7, height =7, units = "in") | |
vcounts<-ggplot(vsd_abs_ae2 %>% filter(gene_name %in% vc.PAIR) | |
, aes(y = vst_count, x = condition)) + | |
facet_wrap( vars(gene_name), nrow =4) + | |
geom_jitter(aes(color = condition), shape = 20, size = 1) + | |
geom_boxplot(outlier.shape = NA, fill = NA) + ylim(c(5,16)) | |
ggsave("Xpair_TBLs.pdf", | |
plot = vcounts + theme(legend.position = "botton", legend.box = "horizontal", text=element_text(size=7)) + | |
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue")) | |
, width = 7, height =7, units = "in") |