Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
outdir<-dirname(rstudioapi::getSourceEditorContext()$path)
require(DESeq2)
require(biomaRt)
require(tidyverse)
require(ggplot2)
# can pull in gene names with your favorite method, I'm using my master "df" which lists all gene_names from count table, except those filtered out
vc.ENSG<-t(matrix(unlist(strsplit(rownames(df), "[.]")),nrow=2))[,1]
require(org.Hs.eg.db)
require(clusterProfiler)
All.df <- bitr(vc.ENSG, fromType = "ENSEMBL",
toType = c("ENTREZID", "SYMBOL"),
OrgDb = org.Hs.eg.db)
All.df.ENSG<-All.df[!duplicated(All.df$ENSEMBL),]
rownames(All.df.ENSG)<-All.df.ENSG$ENSEMBL
fx.gg_col<-function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
###################################
# RUN ANNOTATION FILE HERE #
###################################
source("./AhernTBL_annotation_v01_pass.R")
###################################
# RUN AE SCRIPT FILE HERE #
###################################
source("./AE_analysis_v01_pass.R")
pdf(paste0(outdir,"/Escapee_DESEQ.pdf"), width = 6, height= 6, onefile=T, paper="letter")
par(mfrow=c(2,2), mai = rep(0.6, 4), cex=0.6, cex.lab = 0.7, cex.axis = 0.7, omi = rep(0, 4))
pch.FAIL<-5
pch.PAR<-18
pch.PAIR<-18
pch.oESC<-18
pch.Y<-18
col.PAR=adjustcolor( "black", alpha.f = 0.7)
col.PAIR=adjustcolor( "coral", alpha.f = 0.7)
col.oESC=adjustcolor( "cyan3", alpha.f = 0.7)
col.FAIL=adjustcolor( "red", alpha.f = 0.7)
col.Y=adjustcolor( "blue", alpha.f = 0.7)
fcex=0.5
pcex=0.7
plot(df$fXO_log2FoldChange[which(vtf.F & vtf.PAIR)], -log(df$fXO_padj[which(vtf.F & vtf.PAIR)],10),pch=pch.PAIR, col=col.PAIR, cex=pcex,
xlim=c(-3,1),ylim=c(0,80),xlab = "log2FC XO-XX19/23", ylab = "-log10(p.adj) XO-XX19/23", main = "fXO vs. XX19/23")
points(df$fXO_log2FoldChange[which(vtf.F & vtf.PAR)], -log(df$fXO_padj[which(vtf.F & vtf.PAR)],10), pch=pch.PAR, col=col.PAR, cex=pcex)
points(df$fXO_log2FoldChange[which(vtf.F & vtf.oESC)], -log(df$fXO_padj[which(vtf.F & vtf.oESC)],10), pch=pch.oESC, col=col.oESC, cex=pcex)
points(df$fXO_log2FoldChange[which(vtf.F & vtf.FAIL)], -log(df$fXO_padj[which(vtf.F & vtf.FAIL)],10), pch=pch.FAIL, col=col.FAIL, cex=pcex)
text(df$fXO_log2FoldChange[which(vtf.F & vtf.PAIR)],
-log(df$fXO_padj[which(vtf.F & vtf.PAIR)],10),
labels = df$gene_name[which(vtf.F & vtf.PAIR)], col=col.PAIR,pos = 3, cex=fcex, offset=0.3)
text(df$fXO_log2FoldChange[which(vtf.F & vtf.PAR)],
-log(df$fXO_padj[which(vtf.F & vtf.PAR)],10),
labels = df$gene_name[which(vtf.F & vtf.PAR)], col=col.PAR,pos = 2, cex=fcex, offset=0.3)
text(df$fXO_log2FoldChange[which(vtf.F & vtf.oESC)],
-log(df$fXO_padj[which(vtf.F & vtf.oESC)],10),
labels = df$gene_name[which(vtf.F & vtf.oESC)], col=col.oESC,pos = 4, cex=fcex, offset=0.3)
abline(v=c(-0.3,0.3),h=1.3, lty=2)
plot(df$FE_log2FoldChange[which(vtf.FE & vtf.PAIR)], -log(df$FE_padj[which(vtf.FE & vtf.PAIR)],10),pch=pch.PAIR, col=col.PAIR, cex=pcex,
xlab = "log2FC XX6-XX19/23", ylab = "-log10(p.adj) XX6-XX19/23", main = "XX6 vs. XX19/23", xlim=c(-3,1),ylim=c(0,40))
points(df$FE_log2FoldChange[which(vtf.FE & vtf.PAR)], -log(df$FE_padj[which(vtf.FE & vtf.PAR)],10), pch=pch.PAR, col=col.PAR, cex=pcex)
points(df$FE_log2FoldChange[which(vtf.FE & vtf.oESC)], -log(df$FE_padj[which(vtf.FE & vtf.oESC)],10), pch=pch.oESC, col=col.oESC, cex=pcex)
points(df$FE_log2FoldChange[which(vtf.FE & vtf.FAIL)], -log(df$FE_padj[which(vtf.FE & vtf.FAIL)],10), pch=pch.FAIL, col=col.FAIL, cex=pcex)
text(df$FE_log2FoldChange[which(vtf.FE & vtf.PAIR)],
-log(df$FE_padj[which(vtf.FE & vtf.PAIR)],10),
labels = df$gene_name[which(vtf.FE & vtf.PAIR)], col=col.PAIR,pos = 3, cex=fcex, offset=0.3)
text(df$FE_log2FoldChange[which(vtf.FE & vtf.PAR)],
-log(df$FE_padj[which(vtf.FE & vtf.PAR)],10),
labels = df$gene_name[which(vtf.FE & vtf.PAR)], col=col.PAR,pos = 2, cex=fcex, offset=0.3)
text(df$FE_log2FoldChange[which(vtf.FE & vtf.oESC)],
-log(df$FE_padj[which(vtf.FE & vtf.oESC)],10),
labels = df$gene_name[which(vtf.FE & vtf.oESC)], col=col.oESC,pos = 4, cex=fcex, offset=0.3)
abline(v=c(-0.3,0.3),h=1.3, lty=2)
plot(df$fXO2_log2FoldChange[which(vtf.F2 & vtf.PAIR)], -log(df$fXO2_padj[which(vtf.F2 & vtf.PAIR)],10),pch=pch.PAIR, col=col.PAIR,cex=pcex,
xlab = "log2FC XO-XX6", ylab = "-log10(p.adj) XO-XX6", main = "fXO vs. XX6", xlim=c(-3,1),ylim=c(0,30))
points(df$fXO2_log2FoldChange[which(vtf.F2 & vtf.PAR)], -log(df$fXO2_padj[which(vtf.F2 & vtf.PAR)],10), pch=pch.PAR, col=col.PAR, cex=pcex)
points(df$fXO2_log2FoldChange[which(vtf.F2 & vtf.oESC)], -log(df$fXO2_padj[which(vtf.F2 & vtf.oESC)],10), pch=pch.oESC, col=col.oESC, cex=pcex)
points(df$fXO2_log2FoldChange[which(vtf.F2 & vtf.FAIL)], -log(df$fXO2_padj[which(vtf.F2 & vtf.FAIL)],10), pch=pch.FAIL, col=col.FAIL, cex=pcex)
text(df$fXO2_log2FoldChange[which(vtf.F2 & vtf.PAIR)],
-log(df$fXO2_padj[which(vtf.F2 & vtf.PAIR)],10),
labels = df$gene_name[which(vtf.F2 & vtf.PAIR)], col=col.PAIR,pos = 3, cex=fcex, offset=0.3)
text(df$fXO2_log2FoldChange[which(vtf.F2 & vtf.PAR)],
-log(df$fXO2_padj[which(vtf.F2 & vtf.PAR)],10),
labels = df$gene_name[which(vtf.F2 & vtf.PAR)], col=col.PAR,pos = 2, cex=fcex, offset=0.3)
text(df$fXO2_log2FoldChange[which(vtf.F2 & vtf.oESC)],
-log(df$fXO2_padj[which(vtf.F2 & vtf.oESC)],10),
labels = df$gene_name[which(vtf.F2 & vtf.oESC)], col=col.oESC,pos = 4, cex=fcex, offset=0.3)
abline(v=c(-0.3,0.3),h=1.3,lty=2)
plot(df$mXO_log2FoldChange[which(vtf.M & vtf.Y)], -log(df$mXO_padj[which(vtf.M & vtf.Y)],10),pch=pch.PAIR, col=col.Y,cex=pcex,
xlab = "log2FC XO-XY", ylab = "-log10(p.adj) XO-XY", main = "mXO vs. XY", xlim=c(-14,1), ylim=c(0,60))
points(df$mXO_log2FoldChange[which(vtf.M& vtf.PAR)], -log(df$mXO_padj[which(vtf.M & vtf.PAR)],10), pch=pch.PAR, col=col.PAR, cex=pcex)
points(df$mXO_log2FoldChange[which(vtf.M& vtf.oESC)], -log(df$mXO_padj[which(vtf.M & vtf.oESC)],10), pch=pch.oESC, col=col.oESC, cex=pcex)
text(df$mXO_log2FoldChange[which(vtf.M & vtf.Y)],
-log(df$mXO_padj[which(vtf.M & vtf.Y)],10),
labels = df$gene_name[which(vtf.M & vtf.Y)], col=col.Y,pos = 1, cex=fcex, offset=0.3)
text(df$mXO_log2FoldChange[which(vtf.M& vtf.PAR)],
-log(df$mXO_padj[which(vtf.M& vtf.PAR)],10),
labels = df$gene_name[which(vtf.M& vtf.PAR)], col=col.PAR,pos = 2, cex=fcex, offset=0.3)
text(df$mXO_log2FoldChange[which(vtf.M& vtf.oESC)],
-log(df$mXO_padj[which(vtf.M& vtf.oESC)],10),
labels = df$gene_name[which(vtf.M& vtf.oESC)], col=col.oESC,pos = 2, cex=fcex, offset=0.3)
abline(v=c(-0.3,0.3),h=1.3, lty=2)
dev.off()
require(tidyHeatmap)
library(circlize)
vtf.ae<-(!is.na(mtf.esc[,grep("C6XX",colnames(mtf.esc))]) & !is.na(mtf.esc[,grep("C19XX",colnames(mtf.esc))]) & !is.na(mtf.esc[,grep("C23XX",colnames(mtf.esc))]))
vtf.OFF<-(vtf.X & !vtf.oESC & !vtf.PAR & !vtf.PAIR & (rownames(df) %in% rownames(mtf.esc[vtf.ae,])))
df2<-cbind(df, vtf.F, vtf.F2, vtf.FE, vtf.M, vtf.O, vtf.FM, vtf.MF, vtf.E,
fXO = vtf.F * df$fXO_log2FoldChange, fXO2 = vtf.F2 * df$fXO2_log2FoldChange, FE = vtf.FE * df$FE_log2FoldChange,
mXO = vtf.M * df$mXO_log2FoldChange, fmO = vtf.O * df$fmO_log2FoldChange, FM = vtf.FM * df$FM_log2FoldChange,
MF = vtf.MF * df$MF_log2FoldChange, fmE = vtf.E * df$fmE_log2FoldChange, vst.med = rowMedians(assay(vsd3)),
vtf.X, vtf.Y, vtf.PAR, vtf.PAIR, vtf.PAIRY, vtf.ESC, vtf.oESC, vtf.OFF)
stopifnot( all( rownames(ratio.fem.AB) == rownames(pBi.fem.AB) &
rownames(ratio.fem.AB) == rownames(mtf.esc)))
df.ae<-merge(df2, cbind(ratio.fem.AB[,-c(1:3)], pBi.fem.AB[,-c(1:3)], mtf.esc[,-c(1:3)], vtf.ae), by="row.names",all.x=T)
rownames(df.ae)<-df.ae[,1]
df.ae<-df.ae[order(match(rownames(df.ae), rownames(df))),-1]
#vsd_abs contains SV-corrected vst counts
vsd_abs_ae <- as.data.frame(assay(vsd3)) %>%
rownames_to_column("ensembl_id") %>% pivot_longer(-ensembl_id, names_to="sample_name", values_to="vst_count") %>%
left_join(dds@colData %>% as.data.frame %>% rownames_to_column(var="sample_name")) %>%
left_join(df.ae %>% rownames_to_column(var="ensembl_id"))
vsd_abs_markers <- left_join(vsd_abs_ae, df.chhabra %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Chhabra)) %>%
left_join(df.xiang %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Xiang)) %>%
left_join(df.west %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, West)) %>%
left_join(df.khan %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Khan)) %>%
left_join(df.zhou %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Zhou)) %>%
left_join(df.hsiao %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Hsiao))
#vsd_med contains SV-corrected & row-median normalized vst counts
vsd_med_ae <- as.data.frame(assay(vsd3)-rowMedians(assay(vsd3))) %>%
rownames_to_column("ensembl_id") %>% pivot_longer(-ensembl_id, names_to="sample_name", values_to="vst_diff") %>%
left_join(dds@colData %>% as.data.frame %>% rownames_to_column(var="sample_name")) %>%
left_join(df.ae %>% rownames_to_column(var="ensembl_id")) %>% left_join(as_tibble(df.Tuk2017[,c(5:6,10)] |> rownames_to_column("ENSEMBL")), by = "ENSEMBL") %>%
mutate(Reported = case_when(!(gene_name %in% df.Tuk2017$SYMBOL[df.Tuk2017$Reported_XCI_status %in% c("Escape","Variable")]) &
(gene_name %in% c("IRAK1","MBTPS2", "SMS") |
gene_name %in% rownames(df.Katsir2019) |
gene_name %in% rownames(df.Sauterad2021 %>% filter(str_detect(XCI.status,"E")))) ~ "VarOther",
TRUE ~ Reported_XCI_status)) %>%
mutate_at(vars("Reported"), ~replace_na(.,"Unknown"))
vsd_med_markers<-left_join(vsd_med_ae, df.chhabra %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Chhabra)) %>%
left_join(df.xiang %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Xiang)) %>%
left_join(df.west %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, West)) %>%
left_join(df.khan %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Khan)) %>%
left_join(df.zhou %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Zhou)) %>%
left_join(df.hsiao %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Hsiao))
require(grid)
require(gridExtra)
#Escape vs. DESEQ2 stats and med_vsd
fx.col.med = colorRamp2(c(-2, 0, 2), c("blue", "white", "red"))
tab.sign.All<-table(sign(df[which(vtf.F & vtf.M & vtf.FM),c("fXO_stat", "mXO_stat", "FM_stat")]))
tab.sign.notM<-table(sign(df[which(vtf.F & !vtf.M & vtf.FM),c("fXO_stat", "FM_stat")]))
tab.sign.notF<-table(sign(df[which(!vtf.F & vtf.M & vtf.FM),c("mXO_stat", "FM_stat")]))
tab.sign.notFM<-table(sign(df[which(vtf.F & vtf.M & !vtf.FM),c("fXO_stat", "mXO_stat")]))
p.sign.All<-signif(pbinom(sum(tab.sign.All[c(1,length(tab.sign.All))]), size = sum(tab.sign.All),prob = 0.25,lower.tail = F),2)
p.sign.notM<-signif(pbinom(sum(tab.sign.notM[c(1,length(tab.sign.notM))]), size = sum(tab.sign.notM),prob = 0.5,lower.tail = F),2)
p.sign.notF<-signif(pbinom(sum(tab.sign.notF[c(1,length(tab.sign.notF))]), size = sum(tab.sign.notF),prob = 0.5,lower.tail = F),2)
p.sign.notFM<-signif(pbinom(sum(tab.sign.notFM[c(1,length(tab.sign.notFM))]), size = sum(tab.sign.notFM),prob = 0.5,lower.tail = F),2)
label.All<-paste0("All (", sum(tab.sign.All[c(1,length(tab.sign.All))]),"/",sum(tab.sign.All), ") ",p.sign.All)
label.notM<-paste0("notM (", sum(tab.sign.notM[c(1,length(tab.sign.notM))]),"/",sum(tab.sign.notM), ") ",p.sign.notM)
label.notFM<-paste0("notFM (", sum(tab.sign.notFM[c(1,length(tab.sign.notFM))]),"/",sum(tab.sign.notFM), ") ",p.sign.notFM)
label.notF<-paste0("notF (", sum(tab.sign.notF[c(1,length(tab.sign.notF))]),"/",sum(tab.sign.notF), ") ",p.sign.notF)
tb1<-vsd_med_ae %>% filter((sign(fXO_stat) == sign(mXO_stat)) & (sign(fXO_stat) == sign(FM_stat)) & vtf.F & vtf.FM & vtf.M) %>% add_column(DEGo = label.All) %>% add_column(Dir = "conc.")
tb2<-vsd_med_ae %>% filter((sign(fXO_stat) == sign(FM_stat)) & vtf.F & vtf.FM & !vtf.M) %>% add_column(DEGo = label.notM) %>% add_column(Dir = "conc.")
tb3<-vsd_med_ae %>% filter((sign(fXO_stat) == sign(mXO_stat)) & vtf.F & !vtf.FM & vtf.M) %>% add_column(DEGo = label.notFM) %>% add_column(Dir = "conc.")
tb4<-vsd_med_ae %>% filter((sign(FM_stat) == sign(mXO_stat)) & !vtf.F & vtf.FM & vtf.M) %>% add_column(DEGo = label.notF) %>% add_column(Dir = "conc.")
tb5<-vsd_med_ae %>% filter(((sign(fXO_stat) != sign(mXO_stat)) | (sign(fXO_stat) != sign(FM_stat))) & vtf.F & vtf.FM & vtf.M) %>% add_column(DEGo = label.All) %>% add_column(Dir = "disc.")
tb6<-vsd_med_ae %>% filter((sign(fXO_stat) != sign(FM_stat)) & vtf.F & vtf.FM & !vtf.M) %>% add_column(DEGo = label.notM) %>% add_column(Dir = "disc.")
tb7<-vsd_med_ae %>% filter((sign(fXO_stat) != sign(mXO_stat)) & vtf.F & !vtf.FM & vtf.M) %>% add_column(DEGo = label.notFM) %>% add_column(Dir = "disc.")
tb8<-vsd_med_ae %>% filter((sign(FM_stat) != sign(mXO_stat)) & !vtf.F & vtf.FM & vtf.M) %>% add_column(DEGo = label.notF) %>% add_column(Dir = "disc.")
tb<- bind_rows(tb1, tb2, tb3, tb4, tb5, tb6, tb7, tb8)
tbhm<- tb %>%
group_by(DEGo) %>%
heatmap(gene_name, sample_name, vst_diff, .scale="none", palette_value = fx.col.med,
palette_grouping = list(rep("white",length(table(tb$DEGo))) )) %>%
add_bar(fXO_stat) %>% add_bar(mXO_stat) %>% add_bar(FM_stat) %>% add_tile(Dir, palette=c("grey30","grey")) %>%
add_tile(condition, palette = c("blue", "deepskyblue", "mediumpurple", "violet", "darkmagenta"))
save_pdf(tbhm,filename = paste0(outdir,"/Global_DEGs_TBLs.pdf"),width = 7.5, height=5, units = "in")
#Escape vs. DESEQ2 stats and med_vsd
#can lower l2fc slightlyfrom (-)0.3 to (-)0.25 for inclusion of EIF2S3 and IRAK1
fx.col.med = colorRamp2(c(-2, 0, 2), c("blue", "white", "red"))
tb1<-vsd_med_ae %>% filter(vtf.PAR &
((fXO_padj <= ap & fXO_log2FoldChange <= -l2fc) |
(fXO2_padj <= ap & fXO2_log2FoldChange <= -l2fc) |
(mXO_padj <= ap & mXO_log2FoldChange <= -l2fc))
) %>% add_column(Strata = ".PAR")
tb2<-vsd_med_ae %>% filter(vtf.PAIR &
((fXO_padj <= ap & fXO_log2FoldChange <= -l2fc) |
(fXO2_padj <= ap & fXO2_log2FoldChange <= -l2fc))
) %>% add_column(Strata = "Pair")
tb3<-vsd_med_ae %>% filter(vtf.oESC &
(ae.esc_C6XX & ae.esc_C19XX & ae.esc_C23XX) &
((fXO_padj <= ap & fXO_log2FoldChange <= -l2fc) |
(fXO2_padj <= ap & fXO2_log2FoldChange <= -l2fc))
) %>% add_column(Strata = "X_specific_all")
tb4<-vsd_med_ae %>% filter((!(ensembl_id %in% tb3$ensembl_id)) &
vtf.oESC &
((ae.esc_C6XX & ae.esc_C19XX) | (ae.esc_C19XX & ae.esc_C23XX) | (ae.esc_C6XX & ae.esc_C23XX)) &
((fXO_padj <= ap & fXO_log2FoldChange <= -l2fc) |
(fXO2_padj <= ap & fXO2_log2FoldChange <= -l2fc))
) %>% add_column(Strata = "X-specific2")
tb5<-vsd_med_ae %>% filter(!(ensembl_id %in% tb3$ensembl_id) &
!(ensembl_id %in% tb4$ensembl_id) &
vtf.oESC &
((fXO_padj <= ap & fXO_log2FoldChange <= -l2fc) |
(fXO2_padj <= ap & fXO2_log2FoldChange <= -l2fc))
) %>% add_column(Strata = "X-specific3")
tb_HmXvstDiff<- bind_rows(tb1, tb2, tb3, tb4, tb5)
tbhm<- tb_HmXvstDiff %>%
group_by(Strata) %>%
heatmap(gene_name, sample_name, vst_diff, .scale="none", palette_value = fx.col.med, row_names_gp = gpar(fontsize = 6), cluster_rows = F,
palette_grouping = list(rep("white",length(table(tb$Strata))) )) %>%
add_bar("ae.ratio_C19XX") %>% add_bar("ae.ratio_C23XX") %>% add_bar("ae.ratio_C6XX") %>%
add_bar(fXO) %>% add_bar(fXO2) %>% add_bar(mXO) %>% add_tile(condition, palette = c("blue", "deepskyblue", "mediumpurple", "violet", "darkmagenta")) %>%
add_tile(Reported, palette = list("Escape" = "yellow1", "Inactive" = "grey27", "Variable" = "lightgreen", "Unknown" = "grey", "VarOther" = "lightseagreen"))
save_pdf(tbhm,filename = paste0(outdir,"/HmXvstDiff_allelic_TBLs2e.pdf"),width = 8, height=8, units = "in")
# choose annotation suffix in grep, as well as annotation first author in is.na() and group_by() calls, and customize add_bar args
#choose either vst medians of df.Markers.abs or df.Markers.med (median-normalized)
# when "med" is used, supply fx.col.med for color. In addition, vst_counts changes to vsd_diff and the grep from df can be commented out
vc1<-rownames(df[df$gene_name %in% c("GATA2", "MSX2", "GATA3", "TFAP2A", "TFAP2C"),])
vc2<-rownames(df[df$gene_name %in% c("TBX3", "HAND1", "ANKRD1", "ISL1", "CDX2", "NR2F2", "DLX3", "ARID5B"),])
vc3<-rownames(df[df$gene_name %in% c("EPAS1", "PPARG", "VGLL1", "MEIS1", "HOXB2", "BARX2", "TP63", "GCM1", "LCP1", "CEBPA", "BNC1", "GRHL1", "MEIS2", "TFAP2B", "SMARCA2"),])
tb1<-vsd_med_markers %>% filter(ensembl_id %in% vc1) %>% add_column(Krendl = "early")
tb2<-vsd_med_markers %>% filter(ensembl_id %in% vc2) %>% add_column(Krendl = "intermediate")
tb3<-vsd_med_markers %>% filter(ensembl_id %in% vc3) %>% add_column(Krendl = "late")
tb<- bind_rows(tb1, tb2, tb3)
tbhm<- tb %>% dplyr::select(Krendl, condition, gene_name, sample_name, vst_diff, fXO_log2FoldChange, mXO_log2FoldChange, vst.med) %>% group_by(Krendl, condition) %>%
heatmap(gene_name, sample_name, vst_diff, .scale="none", row_names_gp = gpar(fontsize = 7),palette_value = fx.col.med,#cluster_rows=F,
palette_grouping = list(rep("white",3), c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue" ))) %>%
add_bar(fXO_log2FoldChange) %>% add_bar(mXO_log2FoldChange) %>%
add_tile(vst.med) %>% add_tile(condition)
save_pdf(tbhm,filename = paste0(outdir,"/Krendl.pdf"),width = 7.5, height=4, units = "in")
df.Markers.tb<-df.Markers.abs[,grep(".C$", colnames(df.Markers.abs))]-mean(df.Markers.abs[,grep("E-AM.C$", colnames(df.Markers.abs))])
df.Markers_tb <- df.Markers.abs %>%
rownames_to_column("sample_name") %>% pivot_longer(-sample_name, names_to="Marker", values_to="vst_abs") %>%
left_join(dds@colData %>% as.data.frame %>% rownames_to_column(var="sample_name"))
gb2.chhabra<-ggplot(vsd_abs_markers %>% filter(!is.na(Chhabra)), aes(y = vst_count, x = condition)) +
facet_wrap( vars(Chhabra), ncol=length(grep(".C$", colnames(df.Markers.abs)))) +
geom_boxplot(notch = T, aes(col = condition), fill = NA, outlier.alpha = 0.1) + theme(axis.text.x=element_blank()) + scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue"))
gb2.zhou<-ggplot(vsd_abs_markers %>% filter(!is.na(Zhou)), aes(y = vst_count, x = condition)) +
facet_wrap( vars(Zhou), ncol=length(grep(".Z$", colnames(df.Markers.abs)))) +
geom_boxplot(notch = T, aes(col = condition), fill = NA, outlier.alpha = 0.1) + theme(axis.text.x=element_blank()) + scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue"))
ls.gb2.plots<-list(gb2.zhou + scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue")) + theme(legend.position = "none") ,
gb2.chhabra + scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue"))
)
ggsave(paste0(outdir,"/notch.med.Marker.pdf"), grid.arrange(grobs = ls.gb2.plots, nrow=1),
device = "pdf", width=7.5, height = 2.5, units = "in")
require(rstatix)
require(ggprism)
require(patchwork)
require(magrittr)
require(coin)
require(viridis)
tb.CZ<-vsd_abs_markers %>% filter(!is.na(Chhabra) & is.na(Zhou)) %>% mutate(unilabel = paste0(".",as.character(Chhabra))) %>% dplyr::select(unilabel, vst_count, condition)
tb.ZC<-vsd_abs_markers %>% filter(!is.na(Zhou) & is.na(Chhabra)) %>% mutate(unilabel = as.character(Zhou)) %>% dplyr::select(unilabel, vst_count, condition)
tb.CX<-vsd_abs_markers %>% filter(!is.na(Chhabra) & is.na(Xiang)) %>% mutate(unilabel = paste0(".",as.character(Chhabra))) %>% dplyr::select(unilabel, vst_count, condition)
tb.XC<-vsd_abs_markers %>% filter(!is.na(Xiang) & is.na(Chhabra)) %>% mutate(unilabel = as.character(Xiang)) %>% dplyr::select(unilabel, vst_count, condition)
gb.CZ<-ggplot(bind_rows(tb.CZ, tb.ZC), aes(y = vst_count, x = unilabel)) +
facet_wrap( vars(condition), nrow = 1) +
geom_boxplot(aes(fill=unilabel), notch = T, outlier.shape = NA)
gb.CX<-ggplot(bind_rows(tb.CX, tb.XC), aes(y = vst_count, x = unilabel)) +
facet_wrap( vars(condition), nrow = 1) +
geom_boxplot(aes(fill=unilabel), notch = T, outlier.shape = NA)
tb_mwup_CZ<-bind_rows(tb.CZ, tb.ZC) %>% rstatix::group_by(condition) %>%
rstatix::wilcox_test(vst_count ~ unilabel, p.adjust.method = "BH", ref.group = ".TE.C", conf.level = 0.95)
tb_vsum_CZ<-bind_rows(tb.CZ, tb.ZC) %>% group_by(condition, unilabel) %>% rstatix::get_summary_stats("vst_count") %>%
add_column(group1 = ".TE.C", .before = 1) %>% rename("group2" = unilabel)
tb_mwup_CX<-bind_rows(tb.CX, tb.XC) %>% rstatix::group_by(condition) %>%
rstatix::wilcox_test(vst_count ~ unilabel, p.adjust.method = "BH", ref.group = ".TE.C", conf.level = 0.95)
tb_vsum_CX<-bind_rows(tb.CX, tb.XC) %>% group_by(condition, unilabel) %>% rstatix::get_summary_stats("vst_count") %>%
add_column(group1 = ".TE.C", .before = 1) %>% rename("group2" = unilabel)
ls.gb_marker.plots<-list(
CZ = gb.CZ + coord_flip() + theme(legend.position = "none") + scale_fill_brewer(palette = "Set3") + theme(text=element_text(size=7)) +
add_pvalue(tb_mwup_CZ %>% rstatix::add_xy_position() %>% mutate(y.position = 15, p.adj = signif(p.adj,digits = 0)), label = "p.adj", label.size = 2, remove.bracket = TRUE) +
add_pvalue(tb_vsum_CZ %>% mutate(y.position = 5.5) %>% mutate(median = signif(median,3)), label = "median", label.size = 2, remove.bracket = TRUE),
CX = gb.CX + coord_flip() + theme(legend.position = "none") + scale_fill_brewer(palette = "Set3") + theme(text=element_text(size=7)) +
add_pvalue(tb_mwup_CX %>% rstatix::add_xy_position() %>% mutate(y.position = 15, p.adj = signif(p.adj,digits = 0)), label = "p.adj",label.size = 2, remove.bracket = TRUE) +
add_pvalue(tb_vsum_CX %>% mutate(y.position = 5.5) %>% mutate(median = signif(median,3)), label = "median",label.size = 2, remove.bracket = TRUE))
ggsave("Marker.cond.vst.pdf", grid.arrange(grobs = ls.gb_marker.plots, nrow=2),
device = "pdf", width=7.5, height=7.5, units = "in")
#vtf.ESC2 is identical to vtf.ESC, except that it includes all PAR and PAIR genes, irrespective of whether they escape/are expressed
vtf.ESC2<-(vtf.oESC | vtf.PAR | vtf.PAIR)
dds<-dds2
mcols(dds)<-cbind(mcols(dds), basepairs= as.numeric(metadata(dds)[rownames(mcols(dds)),"Length"]))
fpkm_abs_ae <- as.data.frame(fpkm(dds,robust = T)) %>%
rownames_to_column("ensembl_id") %>% pivot_longer(-ensembl_id, names_to="sample_name", values_to="FPKM") %>%
left_join(dds@colData %>% as.data.frame %>% rownames_to_column(var="sample_name")) %>%
left_join(df.ae %>% rownames_to_column(var="ensembl_id"))
fpkm_abs_ae2<- fpkm_abs_ae %>% add_column(
auto.all = deframe(fpkm_abs_ae %>% filter(!(Chr %in% c("chrX","chrY","chrM")) & FPKM >= 1) %>%
summarise(auto.all = median(`FPKM`, na.rm = TRUE))))
tb0.fpkm<-fpkm_abs_ae2 %>% filter(!(Chr %in% c("chrX", "chrY", "chrM")) & FPKM >=1) %>% add_column(Strata = "Autosomal") %>% mutate(fpkm_ratio = (FPKM/auto.all))
tb1.fpkm<-fpkm_abs_ae2 %>% filter(vtf.PAR & FPKM >=1) %>% add_column(Strata = "PAR") %>% mutate(fpkm_ratio = (FPKM/auto.all))
tb2.fpkm<-fpkm_abs_ae2 %>% filter(vtf.PAIR & FPKM >=1) %>% add_column(Strata = "PAIR") %>% mutate(fpkm_ratio = (FPKM/auto.all))
tb3.fpkm<-fpkm_abs_ae2 %>% filter(vtf.oESC & FPKM >=1) %>% add_column(Strata = "oESC") %>% mutate(fpkm_ratio = (FPKM/auto.all))
tb4.fpkm<-fpkm_abs_ae2 %>% filter(vtf.X & !vtf.ESC2 & FPKM >=1) %>% add_column(Strata = ".Silenced")%>% mutate(fpkm_ratio = (FPKM/auto.all))
tb.fpkm<-bind_rows(tb0.fpkm, tb1.fpkm, tb2.fpkm, tb3.fpkm, tb4.fpkm) %>% dplyr::select(Strata, condition, FPKM, auto.all, fpkm_ratio)
tb_mwu_fp1<-tb.fpkm %>% rstatix::group_by(Strata) %>%
rstatix::wilcox_test(fpkm_ratio ~ condition, p.adjust.method = "none", ref.group = "X1_femaleXO", conf.level = 0.95)
tb_fsum<-tb.fpkm %>% group_by(condition, Strata) %>% rstatix::get_summary_stats("fpkm_ratio") %>% #filter(Strata != "Autosomal") %>%
add_column(group1 = "Autosomal", .before = 1) %>% dplyr::rename("group2" = Strata)
gb.counts.f2<-ggplot(tb.fpkm, aes(y = fpkm_ratio, x = reorder(Strata, desc(Strata)))) +
facet_wrap( vars(condition), nrow = 5) + #, scales = "free_y") +
geom_boxplot(aes(col = condition), fill = NA, notch=T, outlier.shape = NA) + scale_y_continuous(trans='log10') + coord_flip() +
scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue"))
vsd_abs_ae2<-vsd_abs_ae %>% add_column(
auto.all = deframe(vsd_abs_ae %>% filter(!(Chr %in% c("chrX","chrY","chrM"))) %>% summarise(auto.all = median(`vst_count`, na.rm = TRUE))))
tb0.vsd<-vsd_abs_ae2 %>% filter(!(Chr %in% c("chrX", "chrY", "chrM"))) %>% add_column(Strata = "Autosomal") %>% mutate(vst_diff = (vst_count-auto.all))
tb1.vsd<-vsd_abs_ae2 %>% filter(vtf.PAR) %>% add_column(Strata = "PAR") %>% mutate(vst_diff = (vst_count-auto.all))
tb2.vsd<-vsd_abs_ae2 %>% filter(vtf.PAIR) %>% add_column(Strata = "PAIR") %>% mutate(vst_diff = (vst_count-auto.all))
tb3.vsd<-vsd_abs_ae2 %>% filter(vtf.oESC) %>% add_column(Strata = "oESC") %>% mutate(vst_diff = (vst_count-auto.all))
tb4.vsd<-vsd_abs_ae2 %>% filter(vtf.X & !vtf.ESC2) %>% add_column(Strata = ".Silenced")%>% mutate(vst_diff = (vst_count-auto.all))
tb.vsd<-bind_rows(tb0.vsd, tb1.vsd, tb2.vsd, tb3.vsd, tb4.vsd) %>% dplyr::select(Strata, condition, vst_count, auto.all, vst_diff)
tb_mwu_vp1<-tb.vsd %>% rstatix::group_by(Strata) %>%
rstatix::wilcox_test(vst_diff ~ condition, p.adjust.method = "none", ref.group = "X1_femaleXO", conf.level = 0.95)
tb_vsum<-tb.vsd %>% group_by(condition, Strata) %>% rstatix::get_summary_stats("vst_diff") %>%
add_column(group1 = "Autosomal", .before = 1) %>% rename("group2" = Strata)
gb.counts.v1<-ggplot(tb.vsd, aes(y = vst_diff, x = reorder(condition, desc(condition)))) +
facet_wrap( vars(Strata), nrow = 6) + #, scales = "free_y") +
geom_boxplot(aes(col = condition), fill = NA, notch=T, outlier.shape = NA)
ls.gbvf.plots<-list(gb.counts.v1 + coord_flip() + theme(legend.position = "none", text=element_text(size=7)) + scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue")) +
add_pvalue(tb_mwu_vp1 %>% rstatix::add_xy_position(fun = "max") %>% filter(p.adj <= 0.1) %>% mutate(y.position = 10)
, label = "p.adj", remove.bracket = TRUE),
gb.counts.f2 + coord_flip() + theme(legend.position = "none", text=element_text(size=7)) + scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue")) +
add_pvalue(tb_fsum %>% mutate(y.position = 200) %>% mutate(median = signif(median,2))
, label = "median", remove.bracket = TRUE)
)
ggsave(paste0(outdir,"/XA.vstfpkm.pdf"), grid.arrange(grobs = ls.gbvf.plots, ncol=length(ls.gbvf.plots)),
device = "pdf", width=3.5, height=4, units = "in")
vsd_abs_ae2$condition <- factor(vsd_abs_ae2$condition, levels = c("X1_femaleXX", "X2_femaleXX", "X1_femaleXO", "X3_maleXY", "X3_maleXO"))
vcounts<-ggplot(vsd_abs_ae2 %>% filter(gene_name %in% c("FTX","JPX","XIST","XACT")), aes(y = vst_count, x = condition)) +
facet_wrap( vars(gene_name), scales = "free_y", nrow =1) +
geom_jitter(aes(color = condition), shape = 20, size = 1) + scale_color_manual(values = c("darkmagenta", "mediumpurple", "violet", "blue", "deepskyblue")) +
geom_boxplot(outlier.shape = NA, fill = NA)
ggsave(paste0(outdir,"/XIST.pdf"),
plot = vcounts + theme(legend.position = "botton", legend.box = "horizontal", text=element_text(size=7)) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + scale_color_manual(values = c("darkmagenta", "mediumpurple", "violet", "blue", "deepskyblue"))
, width = 4, height =2, units = "in")
vcounts<-ggplot(vsd_abs_ae2 %>% filter(gene_name %in% c("TFAP2A", "TFAP2C", "GATA2", "GATA3", "HLA-G", "PGF")), aes(y = vst_count, x = condition)) +
facet_wrap( vars(gene_name), scales = "free_y", nrow =2) +
geom_jitter(aes(color = condition), shape = 20, size = 1) +
geom_boxplot(outlier.shape = NA, fill = NA)
vcounts
ggsave(paste0(outdir,"/YFGs.pdf"),
plot = vcounts + theme(legend.position = "botton", legend.box = "horizontal", text=element_text(size=7)) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + scale_color_manual(values = c("darkmagenta", "mediumpurple", "violet", "blue", "deepskyblue"))
, width = 3, height =2.5, units = "in")
vc.PAIR<-c("ZFX","TXLNG", "TBL1X", "USP9X", "KDM5C", "NLGN4X", "KDM6A" , "EIF1AX", "PRKX", "RPS4X", "TMSB4X", "DDX3X") #"MXRA5", "TSPYL2")
vc.PAIRY<-c("ZFY","TXLNGY", "TBL1Y", "USP9Y", "KDM5D", "NLGN4Y", "UTY" , "EIF1AY", "PRKY", "RPS4Y1", "TMSB4Y", "DDX3Y") #"MXRA5Y","TSPY1")
vcounts<-ggplot(vsd_abs_ae2 %>% filter(gene_name %in% vc.PAIRY) #c("RPS4Y1", "DDX3Y", "KDM5D", "USP9Y", "EIF1AY", "UTY", "NLGN4Y", "TXLNGY", "TMSB4Y", "ZFY", "PRKY", "TBL1Y"))
, aes(y = vst_count, x = condition)) +
facet_wrap( vars(gene_name), nrow =4) +
geom_jitter(aes(color = condition), shape = 20, size = 1) +
geom_boxplot(outlier.shape = NA, fill = NA) + ylim(c(5, 16))
ggsave("Ypair_TBLs.pdf",
plot = vcounts + theme(legend.position = "botton", legend.box = "horizontal", text=element_text(size=7)) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue"))
, width = 7, height =7, units = "in")
vcounts<-ggplot(vsd_abs_ae2 %>% filter(gene_name %in% vc.PAIR)
, aes(y = vst_count, x = condition)) +
facet_wrap( vars(gene_name), nrow =4) +
geom_jitter(aes(color = condition), shape = 20, size = 1) +
geom_boxplot(outlier.shape = NA, fill = NA) + ylim(c(5,16))
ggsave("Xpair_TBLs.pdf",
plot = vcounts + theme(legend.position = "botton", legend.box = "horizontal", text=element_text(size=7)) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue"))
, width = 7, height =7, units = "in")