Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
setwd(dirname(rstudioapi::getSourceEditorContext()$path))
require(tidyverse)
### NCC
FeatureColumn = 1
LastMetaColumn = 6
PathDesign.csv = "./coldata_NCC_final.csv"
SampleColumn = 1
df.design2<-read_csv(file = PathDesign.csv) %>% as.data.frame() %>%
mutate(files = sprintf("./NCC_salmon/%s_quant/quant.sf", RegistryID)) %>%
add_column(CellType = "NCC") %>%
mutate(line = paste0(karyotype,replace_na(parse_number(str_split_fixed(Sample, "_", 3)[,2]),replace = 1))) %>%
mutate(donor = str_split_fixed(Sample, "_", 3)[,1],
condition2 = condition) %>%
mutate(condition = clone,
condition3 = case_when(clone == "X1_femaleXO" ~ "femXO",
clone == "X1_femaleXX" ~ "femXX",
clone == "X3_maleXO" ~ "male1XO",
clone == "X3_maleXY" ~ "male1XY",
clone == "X4_maleXO" ~ "male2XO",
clone == "X4_maleXY" ~ "male2XY",
T ~ NA)) %>%
relocate(simple, .after = donor)
df.design<-df.design2|>mutate(names = RegistryID)
require(DESeq2)
require(tximeta)
sum_exp <- tximeta(df.design)
gene_sum_exp <- summarizeToGene(sum_exp)
designform="Diff + condition"
findSV <- T
removeSV <- findSV
minCounts <- 20
minSamples <- 4
minAve <- 10
vc.design<-unlist(strsplit(designform, split = " "))
vc.designLast<-unlist(strsplit(tail(vc.design,1), split = ":"))
outdir = paste0("./repout_ex_",designform,"_SV_",removeSV)
dir.create(outdir)
dds0<- DESeqDataSet(gene_sum_exp,
design = formula(paste0("~" ,designform)))
if(!all(c(vc.design[(vc.design != "+") & (vc.design != tail(vc.design,1))],vc.designLast) %in% colnames(df.design)))
{stop("Check your designform and sample_sheet")}
write_csv(as.data.frame(counts(dds0)) |>
rownames_to_column(var = "ENSEMBL_ID"),
file = paste0(outdir,"/GSE264742_counts.csv.gz"))
write_csv(as.data.frame(dds0@colData) |> dplyr::select(-c(names, clone, condition, condition2, Xa, karyotype, simple)) |>
rename(Sample_ID = "RegistryID",
Cell_Type = "CellType",
Cell_line = "line",
condition = "condition3") |>
arrange(Sample_ID),
file = paste0(outdir,"/Samples.csv"))
#Pre-filter based on https://support.bioconductor.org/p/65091/
dds1 <- estimateSizeFactors(dds0)
mn.nc1 <- counts(dds1, normalized=TRUE)
filter1 <- rowSums(mn.nc1 >= minCounts) >= minSamples
dds1 <- dds1[filter1,]
mn.nc2 <- counts(dds1, normalized=TRUE)
filter2 <- rowMeans(mn.nc2) >= minAve
dds1 <- dds1[filter2,]
print("Sample names match in correct order. Returning filtered dds")
if(findSV){
require("sva")
require("limma")
mod <- model.matrix (design(dds1), colData(dds1))
mod0 <- model.matrix (~ 1, colData(dds1))
n.sv = num.sv(mn.nc2,mod,method="leek")
n.sv
ls.svseq <- svaseq(mn.nc2, mod, mod0, method="irw")
dds1.sva <- dds1
colnames(ls.svseq$sv) <- colnames(ls.svseq$sv, do.NULL = FALSE, prefix = "SV")
colData(dds1.sva)<-cbind(colData(dds1.sva), ls.svseq$sv)
design(dds1.sva)<-as.formula(paste0("~",paste(colnames(ls.svseq$sv), collapse="+"), "+", designform))
dds2 <- DESeq(dds1.sva)
dds2<-dds2[which(mcols(dds2)$betaConv),]
#dds2 is for DESEQ2
vsd=vst(dds2, blind = FALSE)
vsd3<-vsd
}
if(!findSV){print("Skipping SVA...")
dds2 <- DESeq(dds1)
dds2<-dds2[which(mcols(dds2)$betaConv),]
vsd=vst(dds2, blind = FALSE); vsd3 = vsd
}
if(removeSV)
{assay(vsd3) <- limma::removeBatchEffect(assay(vsd3)
, covariates = colData(vsd3)[,c(grep("SV", colnames(colData(vsd3)), value=T),vc.design[(vc.design != "+") & (vc.design != tail(vc.design,1))])]
, design = model.matrix( ~ colData(dds2)[,tail(vc.design,1)]))}
require(ggplot2)
fx.gg_col<-function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
col.pal<- c("violet", "darkmagenta",
"deepskyblue", "blue",
"springgreen", "darkgreen")
names(col.pal)<-c("X1_femaleXO", "X1_femaleXX",
"X3_maleXO","X3_maleXY","X4_maleXO","X4_maleXY")
#names(col.pal)<-sort(unique(dds2$clone))
df.ggPCA<-plotPCA(vsd, intgroup=c(colnames(colData(dds2))), returnData=T)
df.ggPCA3<-plotPCA(vsd3, intgroup=c(colnames(colData(dds2))), returnData=T)
percentVar <- round(100 * attr(df.ggPCA3, "percentVar"))
gg.PCA3<-ggplot(df.ggPCA3, aes(x = PC1, y = PC2, color = factor(condition), shape = factor(karyotype), label=RegistryID)) +
geom_point(size =3) +
xlab(paste0("PC1: ", percentVar[1], "% variance")) +
ylab(paste0("PC2: ", percentVar[2], "% variance")) +
ggtitle(paste0("SV =",findSV,", design =",designform)) +
scale_color_manual(values = col.pal) +
theme(text = element_text(size = 8))
gg.PCA3
ggsave(paste0(outdir,"/PCA3.pdf"), gg.PCA3, device = "pdf", width=4, height =2)
require(ggdendro)
sampleTree <- hclust(dist(t(assay(vsd3))), method = "average")
ggdendrogram(sampleTree) + labs(title="WGCNA Sample Dendrogram")
sample_dend_data <- dendro_data(sampleTree, type = "rectangle")
sample_dend_data$labels <- sample_dend_data$labels %>%
left_join(df.design |>
dplyr::select(RegistryID, clone, karyotype, donor), by=c("label"="RegistryID"))
col.pal2<-c(col.pal, fx.gg_col(length(unique(dds2$donor))), "grey30", "grey60", "grey90")
names(col.pal2)<-c(names(col.pal), sort(unique(dds2$donor)), sort(unique(dds2$karyotype)))
ggplot() +
geom_segment(data=sample_dend_data$segments,
mapping=aes(x = x, y = y, xend = xend, yend = yend))+
geom_text(data = sample_dend_data$labels,
aes(x=x, y=y, label = label),
angle = 90, size = 3) +
geom_tile(data = sample_dend_data$labels,
aes(x=x,y=-6, fill=clone), height=2) +
annotate(geom = "text", x=52, y=-6, label="clone") +
scale_x_continuous(limits=c(0,60)) +
geom_tile(data = sample_dend_data$labels,
aes(x=x,y=-9, fill=karyotype), height=2) +
annotate(geom = "text", x=90, y=-9, label="karyotype") +
geom_tile(data = sample_dend_data$labels,
aes(x=x,y=-12, fill=donor), height=2) +
annotate(geom = "text", x=90, y=-12, label="donor") +
scale_fill_manual(values = col.pal2) + theme_bw() + ggtitle(paste0("SV =",findSV,", design =",designform))
ggsave(paste0(outdir,"/Dendrogram.pdf"))
rm(dds0)
rm(dds1)
rm(dds1.sva)
dds<-dds2
rm(dds2)
saveRDS(dds, file = paste0(outdir,"/dds.rds"))
saveRDS(vsd, file = paste0(outdir,"/vsd.rds"))
saveRDS(vsd3, file = paste0(outdir,"/vsd3.rds"))
dds<-readRDS(file = paste0(outdir,"/dds.rds"))
ap <- 0.1
l2fc<-0
require(org.Hs.eg.db)
require(tidyverse)
require(clusterProfiler)
require(BioVenn)
if(vc.designLast == "condition")
{
DF.resF<-results(dds, contrast=c("condition","X1_femaleXO","X1_femaleXX"), alpha = ap)
DF.resM1<-results(dds, contrast=c("condition","X3_maleXO","X3_maleXY"), alpha = ap)
DF.resM2<-results(dds, contrast=c("condition","X4_maleXO","X4_maleXY"), alpha = ap)
DF.resO1<-results(dds, contrast=c("condition","X1_femaleXO","X3_maleXO"), alpha = ap)
DF.resO2<-results(dds, contrast=c("condition","X1_femaleXO","X4_maleXO"), alpha = ap)
DF.resO3 <-results(dds, contrast=c("condition","X3_maleXO","X4_maleXO"), alpha = ap)
DF.resE1<-results(dds, contrast=c("condition","X1_femaleXO","X3_maleXY"), alpha = ap)
DF.resE2 <- results(dds, contrast=c("condition","X1_femaleXO","X4_maleXY"), alpha = ap)
DF.resE3 <- results(dds, contrast=c("condition","X3_maleXY","X4_maleXY"), alpha = ap)
df.resF<-as.data.frame(DF.resF)
df.resM1<-as.data.frame(DF.resM1)
df.resM2<-as.data.frame(DF.resM2)
df.resO1<-as.data.frame(DF.resO1)
df.resO2<-as.data.frame(DF.resO2)
df.resO3 <- as.data.frame(DF.resO3)
df.resE1<-as.data.frame(DF.resE1)
df.resE2<-as.data.frame(DF.resE2)
df.resE3 <- as.data.frame(DF.resE3)
colnames(df.resF)<-paste("fXO", colnames(df.resF), sep = "_")
colnames(df.resM1)<-paste("mXO1", colnames(df.resM1), sep = "_")
colnames(df.resM2)<-paste("mXO2", colnames(df.resM2), sep = "_")
colnames(df.resO1)<-paste("fmO1", colnames(df.resO1), sep = "_")
colnames(df.resO2)<-paste("fmO2", colnames(df.resO2), sep = "_")
colnames(df.resO3) <- paste("m1m2O", colnames(df.resO3), sep = "_")
colnames(df.resE1)<-paste("fmE1", colnames(df.resE1), sep = "_")
colnames(df.resE2)<-paste("fmE2", colnames(df.resE2), sep = "_")
colnames(df.resE3)<-paste("m1m2E", colnames(df.resE3), sep = "_")
df.all<-cbind(assay(vsd3), df.resF,
df.resM1,
df.resM2,
df.resO1,
df.resO2,
df.resO3, df.resE1,
df.resE2,
df.resE3)
}
#metadata(dds)<-df.meta
df.meta<-rowRanges(dds) |> as.data.frame()
df.meta$ENSEMBL <- row.names(df.meta)
df.meta$ENSEMBL <- gsub("\\..*","", df.meta$ENSEMBL)
All.df <- bitr(df.meta$ENSEMBL, fromType = "ENSEMBL",
toType = c("ENTREZID", "SYMBOL"),
OrgDb = org.Hs.eg.db)
All.df.ENSG<-All.df[!duplicated(All.df$ENSEMBL),]
rownames(All.df.ENSG)<-All.df.ENSG$ENSEMBL
df.all2<-merge(df.meta[,c(1:LastMetaColumn,length(df.meta))], df.all, by = "row.names")
rownames(df.all2)<-df.all2$Row.names
colnames(df.all2)[1]<-"gene_name"
df<-df.all2[order(match(rownames(df.all2), rownames(assay(vsd3)))),] |>
mutate(gene_name = All.df.ENSG[ENSEMBL,"SYMBOL"])
rm(df.all)
rm(df.all2)
# # all vtf
if(vc.designLast == "condition")
{
vtf.F <- (df$fXO_padj <= ap & abs(df$fXO_log2FoldChange) >= l2fc)
vtf.M1 <- (df$mXO1_padj <= ap & abs(df$mXO1_log2FoldChange) >= l2fc)
vtf.M2 <- (df$mXO2_padj <= ap & abs(df$mXO2_log2FoldChange) >= l2fc)
vtf.O1 <- (df$fmO1_padj <= ap & abs(df$fmO1_log2FoldChange) >= l2fc)
vtf.O2 <- (df$fmO2_padj <= ap & abs(df$fmO2_log2FoldChange) >= l2fc)
vtf.O3 <- (df$m1m2O_padj <= ap & abs(df$m1m2O_log2FoldChange) >= l2fc)
vtf.E1 <- (df$fmE1_padj <= ap & abs(df$fmE1_log2FoldChange) >= l2fc)
vtf.E2 <- (df$fmE2_padj <= ap & abs(df$fmE2_log2FoldChange) >= l2fc)
vtf.E3 <- (df$m1m2E_padj <= ap & abs(df$m1m2E_log2FoldChange) >= l2fc)
names(vtf.F)<-rownames(df)
names(vtf.M1)<-rownames(df)
names(vtf.M2)<-rownames(df)
names(vtf.O1)<-rownames(df)
names(vtf.O2)<-rownames(df)
names(vtf.O3)<-rownames(df)
names(vtf.E1)<-rownames(df)
names(vtf.E2)<-rownames(df)
names(vtf.E3)<-rownames(df)
}
if(!("seqnames" %in% colnames(df)))
{
vtf.X <- (df$Chr == "chrX" & as.numeric(df$Start) > 2750000 & as.numeric(df$End)<155701383)
vtf.Y <- (df$Chr == "chrY" & as.numeric(df$Start) > 2750000 & as.numeric(df$End)<56887903)
vtf.PAR <- ((df$Chr == "chrX" | df$Chr == "chrY") & !(vtf.X) & !(vtf.Y))
}
if("seqnames" %in% colnames(df))
{
vtf.X <- (df$seqnames == "chrX" & as.numeric(df$start) > 2750000 & as.numeric(df$end)> 2750000 & as.numeric(df$start)<155701383 & as.numeric(df$end)<155701383)
vtf.Y <- (df$seqnames == "chrY" & as.numeric(df$start) > 2750000 & as.numeric(df$end)> 2750000 & as.numeric(df$start) < 56887903 & as.numeric(df$end)<56887903)
vtf.PAR <- ((df$seqnames == "chrX" | df$seqnames == "chrY") & !(vtf.X) & !(vtf.Y))
}
names(vtf.X)<-rownames(df)
names(vtf.Y)<-rownames(df)
names(vtf.PAR)<-rownames(df)
vn.oFM1<-length(which(vtf.F & vtf.M1))
vn.oFM2<-length(which(vtf.F & vtf.M2))
vn.oM1M2<-length(which(vtf.M1 & vtf.M2))
p.oFM1<-phyper(vn.oFM1-1, length(which(vtf.M1)),
length(dds)-length(which(vtf.M1)),
length(which(vtf.F)), lower.tail = F)
p.oFM2<-phyper(vn.oFM2-1, length(which(vtf.M2)),
length(dds)-length(which(vtf.M2)),
length(which(vtf.F)), lower.tail = F)
p.oM1M2<-phyper(vn.oM1M2-1, length(which(vtf.M2)),
length(dds)-length(which(vtf.M2)),
length(which(vtf.M1)), lower.tail = F)
biovenn<-draw.venn(
rownames(df[which(df$fXO_padj<=ap & (abs(df$fXO_log2FoldChange) >= l2fc)),]),
rownames(df[which(df$mXO1_padj<=ap & (abs(df$mXO1_log2FoldChange) >= l2fc)),]),
rownames(df[which(df$mXO2_padj<=ap & (abs(df$mXO2_log2FoldChange) >= l2fc)),]),
subtitle = signif(p.oM1M2,2), title = "b",
xtitle = "", ytitle = signif(p.oFM2,2), ztitle = signif(p.oFM1,2),
x_c = "violet", y_c ="deepskyblue", z_c = "springgreen",
width = 500, height = 500,
output = "pdf", nrtype = "abs", filename = paste0(outdir,"/venn.pdf")
)
require(preprocessCore)
require(limma)
if(vc.designLast == "condition")
{
vc.ENSG<-df$ENSEMBL
mn.stat<-matrix(cbind(df$fXO_stat,
df$mXO1_stat,
df$mXO2_stat,
df$fmO1_stat,
df$fmO2_stat,
df$m1m2O_stat,
df$fmE1_stat,
df$fmE2_stat,
df$m1m2E_stat
), ncol = 9)
rownames(mn.stat)<-vc.ENSG
colnames(mn.stat)<-c("fXO",
"mXO1",
"mXO2",
"fmO1",
"fmO2",
"m1m2O",
"fmE1",
"fmE2",
"m1m2E"
)
rownames(mn.stat)<-vc.ENSG
df.gsea1<-na.omit(merge(All.df.ENSG, mn.stat, by="row.names"))
mn.stat_qn<-normalize.quantiles(matrix(unlist(df.gsea1[,-c(1:4)]), ncol = length(colnames(df.gsea1[,-c(1:4)]))))
colnames(mn.stat_qn)<-colnames(mn.stat)
df.gsea2<-cbind(df.gsea1[c(1:4)],
mn.stat_qn,
"avemXO" = rowMeans(mn.stat_qn[,grep("^mXO",colnames(mn.stat_qn))]),
"aveXO" = rowMeans(mn.stat_qn[,grep("XO",colnames(mn.stat_qn))])
)
df.gsea2<-df.gsea2[!duplicated(df.gsea2$ENTREZID),]
ls.gsea <- list(
.fXO = (df.gsea2 %>%
arrange(desc(fXO)) %>%
dplyr::select(ENTREZID, fXO) %>%
deframe()
),
.mXO1 = (df.gsea2 %>%
arrange(desc(mXO1)) %>%
dplyr::select(ENTREZID, mXO1) %>%
deframe()
),
.mXO2 = (df.gsea2 %>%
arrange(desc(mXO2)) %>%
dplyr::select(ENTREZID, mXO2) %>%
deframe()
),
fmO1 = (df.gsea2 %>%
arrange(desc(fmO1)) %>%
dplyr::select(ENTREZID, fmO1) %>%
deframe()
),
fmE1 = (df.gsea2 %>%
arrange(desc(fmE1)) %>%
dplyr::select(ENTREZID, fmE1) %>%
deframe()
),
fmO2 = (df.gsea2 %>%
arrange(desc(fmO2)) %>%
dplyr::select(ENTREZID, fmO2) %>%
deframe()
),
fmE2 = (df.gsea2 %>%
arrange(desc(fmE2)) %>%
dplyr::select(ENTREZID, fmE2) %>%
deframe()
),
m1m2O = (df.gsea2 %>%
arrange(desc(m1m2O)) %>%
dplyr::select(ENTREZID, m1m2O) %>%
deframe()
),
m1m2E = (df.gsea2 %>%
arrange(desc(m1m2E)) %>%
dplyr::select(ENTREZID, m1m2E) %>%
deframe()
),
ave.XO = (df.gsea2 %>%
arrange(desc(aveXO)) %>%
dplyr::select(ENTREZID, aveXO) %>%
deframe()
),
ave.mXO = (df.gsea2 %>%
arrange(desc(avemXO)) %>%
dplyr::select(ENTREZID, avemXO) %>%
deframe()
)
)
}
rm(dds)
saveRDS(df.gsea2, file = paste0(outdir,"/df.gsea2.rds"))
saveRDS(ls.gsea, file = paste0(outdir,"/ls.gsea.rds"))
df.DEGtable<-df %>%
dplyr::select(-(starts_with(c("d", "Start", "End", "Strand", "Length", "fmO", "m1m2E"))
| ends_with(c("basemean", "lfcSE", "pvalue"))))
df.DEGtable$ENSEMBL <- row.names(df)
write.csv(df.DEGtable, file = paste0(outdir,"/DEG.csv"), row.names = F,quote = F)
require(msigdbr)
require(tidyverse)
msig_df <- msigdbr(species = "Homo sapiens")
require(clusterProfiler)
require(org.Hs.eg.db)
require(GOSemSim)
require(enrichplot)
require(DOSE)
pcut1 <-0.5
qcut1 <- 0.5
pcut2 <- 0.05
qcut2 <- 0.05
msig_Hall_C1_CP_TFT_MOD_HP_C8 <- msig_df %>%
filter(gs_cat == "H" |
gs_cat == "C1" |
(gs_cat == "C2" & str_detect(gs_subcat, "^CP:")) |
(gs_cat == "C3" & str_detect(gs_subcat, "^TFT:")) |
gs_cat == "C4" |
(gs_cat == "C5" & str_detect(gs_subcat, "^HPO")) |
gs_cat == "C8") %>%
dplyr::select(gs_name, entrez_gene)
rm(msig_df)
#run once
fx.GSEA<-function(genelist, terms2genes){
set.seed(42)
gsea.result<-GSEA(genelist, TERM2GENE = terms2genes, pvalueCutoff = 0.1,
pAdjustMethod = "BH", by="fgsea", seed=T, eps= 0)
return(gsea.result)
}
fx.gseGO<-function(genelist, ontology){
set.seed(42)
gsea.result<-gseGO(genelist, ont = ontology, OrgDb = org.Hs.eg.db, keyType = "ENTREZID",
pvalueCutoff = 0.1, pAdjustMethod = "BH", by="fgsea", seed=T, eps= 0)
return(gsea.result)
}
if(file.exists(paste0(outdir,"/msig_all.rds"))){msig_all<- readRDS(file = paste0(outdir,"/msig_all.rds"))}
if(!exists("msig_all"))
{
ls.msig<-lapply(ls.gsea, function(X){fx.GSEA(X, msig_Hall_C1_CP_TFT_MOD_HP_C8)})
ls.msig<-lapply(ls.msig, function(X){setReadable(X, org.Hs.eg.db, keyType = "ENTREZID")})
ls.msig2<-Map(mutate, ls.msig, Category = names(ls.msig))
msig_all<-ls.msig2[[1]]
logplim<-15
msig_all@result<-do.call("rbind", lapply(ls.msig2, slot, 'result')) %>%
arrange(rank) %>% add_column("Score" = pmin(-log(.$p.adjust, 10),logplim) * sign(.$NES), .after = 1)
}
if(file.exists(paste0(outdir,"/GO_all.rds"))){go_all<- readRDS(file = paste0(outdir,"/GO_all.rds"))}
if(!exists("GO_all"))
{
ls.go<-lapply(ls.gsea, function(X){fx.gseGO(X, "ALL")})
ls.go<-lapply(ls.go, function(X){setReadable(X, org.Hs.eg.db, keyType = "ENTREZID")})
ls.go2<-Map(mutate, ls.go, Category = names(ls.go))
GO_all<-ls.go2[[1]]
logplim<-15
GO_all@result<-do.call("rbind", lapply(ls.go2, function(X) {as.data.frame(X@result)})) %>%
arrange(rank) %>% add_column("Score" = pmin(-log(.$p.adjust, 10),logplim) * sign(.$NES), .after = 1)
}
fx.labels1<- Vectorize(function(X) {
s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 1 ))
abbreviate(s, minlength = 30, dot = T)
})
fx.labels2<- Vectorize(function(X) {
s <- toupper(sapply(strsplit(str_replace(X, "_", ":"), "[:]"), getElement, 2))
s<-str_replace_all(s, "_", " ")
abbreviate(s, minlength = 30, dot = T,)
})
gpfx.gsea<-function(prefix, labelfx, padj, lab.filter, X){
X1 <- X@result %>% filter(str_detect(Description, prefix) &
(Description %in% (X@result %>% filter(p.adjust <= padj & str_detect(Category, lab.filter)) %>% dplyr::pull(Description))))
X2 <-X1 %>% filter(Category == lab.filter)
ggplot(X1 %>% mutate(Description = fct_relevel(Description, X2 %>% arrange(Score) %>% dplyr::pull(Description))),
aes(x=Score, y=Description)) + facet_wrap(vars(Category), nrow=1, drop = F) +
geom_segment(aes(xend=0, yend = Description)) +
geom_point(aes(color=NES, size = as.numeric(summary(strsplit(core_enrichment, "/"))[,1]))) +
scale_color_gradient2(low="blue",mid = "white", high="red") +
scale_y_discrete(labels = labelfx) +
theme_minimal(base_size = 7) +
xlab("signed log10-scaled p.adjust") +
ylab(prefix) +
labs(size="core genes")
}
vc.ORPHA881<-c("HP_Abnormality_of_the_ovary","HP_Short_neck","HP_Delayed_puberty","HP_Increased_circulating_gonadotropin_level","HP_Short_sternum","HP_Osteopenia","HP_Osteoporosis","HP_Growth_delay","HP_Intrauterine_growth_retardation","HP_Delayed_skeletal_maturation","HP_Cubitus_valgus","HP_High_urinary_gonadotropin_level","HP_Short_stature","HP_Wide_intermamillary_distance","HP_Aplasia/Hypoplasia_of_the_nipples","HP_Premature_ovarian_insufficiency","HP_Female_infertility","HP_Postnatal_growth_retardation","HP_Increased_upper_to_lower_segment_ratio","HP_Abnormal_forearm_bone_morphology","HP_Enlarged_thorax","HP_High_palate","HP_Retrognathia","HP_Micrognathia","HP_Hearing_impairment","HP_Low-set_ears","HP_Recurrent_otitis_media","HP_Webbed_neck","HP_Thickened_nuchal_skin_fold","HP_Broad_neck","HP_Behavioral_abnormality","HP_Anxiety","HP_Abnormal_nonverbal_communicative_behavior","HP_Primary_amenorrhea","HP_Hypertension","HP_Glucose_intolerance","HP_Secondary_amenorrhea","HP_Hashimoto_thyroiditis","HP_Shield_chest","HP_Specific_learning_disability","HP_Hepatic_steatosis","HP_Obesity","HP_Failure_to_thrive_in_infancy","HP_Hypoplastic_toenails","HP_Low_posterior_hairline","HP_High,_narrow_palate","HP_Kyphosis","HP_Genu_valgum","HP_Elevated_hepatic_transaminase","HP_Aortic_arch_aneurysm","HP_Dermatoglyphic_ridges_abnormal","HP_Enlargement_of_the_distal_femoral_epiphysis","HP_Irregular_proximal_tibial_epiphyses","HP_Abnormal_dermatoglyphics","HP_Neck_pterygia","HP_Short_4th_metacarpal","HP_Short_5th_metacarpal","HP_Hypermobility_of_toe_joints","HP_Horseshoe_kidney","HP_Ectopic_kidney","HP_Abnormality_of_the_dentition","HP_Epicanthus","HP_Cystic_hygroma","HP_Strabismus","HP_Ptosis","HP_Myopia","HP_Depression","HP_Pectus_excavatum","HP_Hyperinsulinemia","HP_Atypical_scarring_of_skin","HP_Melanocytic_nevus","HP_Lymphedema","HP_Vitiligo","HP_Abnormal_fingernail_morphology","HP_Hip_dysplasia","HP_Hepatic_fibrosis","HP_Alopecia","HP_Atrial_septal_defect","HP_Bicuspid_aortic_valve","HP_Prolonged_QT_interval","HP_Myocardial_infarction","HP_Coarctation_of_aorta","HP_Pes_planus","HP_Hyperconvex_fingernails","HP_Short_toe","HP_Celiac_disease","HP_Cholestatic_liver_disease","HP_Scoliosis","HP_Autoimmunity","HP_Madelung_deformity","HP_Inverted_nipples","HP_Nevus","HP_Reduced_bone_mineral_density","HP_Numerous_congenital_melanocytic_nevi","HP_Type_II_diabetes_mellitus","HP_Attention_deficit_hyperactivity_disorder","HP_Hyperlipidemia","HP_External_ear_malformation","HP_Aplasia/Hypoplasia_of_the_mandible","HP_Splayed_toes","HP_Delayed_social_development","HP_Thyroiditis","HP_Gonadoblastoma","HP_Gastrointestinal_angiodysplasia","HP_Cirrhosis","HP_Inflammation_of_the_large_intestine","HP_Biliary_cirrhosis","HP_Aortic_dissection","HP_Melanoma","HP_Hypoplastic_left_heart","HP_Gastrointestinal_inflammation","HP_Arterial_dissection","HP_Renal_hypoplasia/aplasia","HP_Neurodevelopmental_delay")
lab.filter <- "ave.mXO"
lab.filter2<- names(table(msig_all@result$Category))[c(1:7,11)]
pcut1<-0.05
pcut2<-0.05
glol.chr<-gpfx.gsea("chr", fx.labels1, pcut1, lab.filter, msig_all %>% filter(Category %in% lab.filter2))
glol.tft<-gpfx.gsea("TARGET_GENES",fx.labels1, pcut1, lab.filter, msig_all %>% filter(Category %in% lab.filter2))
glol.hallmark<-gpfx.gsea("^HALLMARK", fx.labels2, pcut1, lab.filter, msig_all %>% filter(Category %in% lab.filter2))
glol.wp<-gpfx.gsea("^WP", fx.labels2, pcut1, lab.filter, msig_all %>% filter(Category %in% lab.filter2))
glol.kegg<-gpfx.gsea("^KEGG", fx.labels2, pcut1, lab.filter, msig_all %>% filter(Category %in% lab.filter2))
glol.desc<-gpfx.gsea("^DESCARTES", fx.labels2, pcut1, lab.filter, msig_all %>% filter(Category %in% lab.filter2))
lab.filter <- "ave.mXO"
pcut1<-0.1
pcut2<-0.1
glol.hpTS<-gpfx.gsea("^HP", fx.labels2, pcut1, lab.filter,
msig_all %>%filter(Category %in% lab.filter2 &
Description %in% toupper(vc.ORPHA881)))
glol.hpTS + theme(legend.position = "top", legend.direction = "horizontal") +
scale_x_continuous(breaks=c(-2,0,2,4,6))
ggsave(paste0(outdir,"/glol.hpTS.",lab.filter,".pdf"), device = "pdf", width = 4.5, height = 4.5, units = "in")
glol.wp + theme(legend.position = "top", legend.direction = "horizontal")
ggsave(paste0(outdir,"/glol.wp.",lab.filter,".pdf"), device = "pdf", width = 8, height = 6, units = "in")
saveRDS(msig_all, file = paste0(outdir,"/msig_all.rds"))
saveRDS(GO_all, file = paste0(outdir,"/GO_all.rds"))
lab.filter <- "ave.XO"
pcut1<-0.0001
pcut2<-0.0001
glol.bp<-gpfx.gsea(".", fx.labels1, pcut1, lab.filter, GO_all %>% filter(Category %in% lab.filter2))
glol.cc<-gpfx.gsea(".", fx.labels1, pcut1, lab.filter, GO_all %>% filter(Category %in% lab.filter2 & ONTOLOGY == "CC"))
require(grid)
require(gridExtra)
ls.lol.plots<-list(
glol.hallmark + theme(axis.line = element_line(colour = "grey50"), legend.position = "top", legend.direction = "horizontal") + scale_size_area(max_size = 4),
glol.kegg + theme(axis.line = element_line(colour = "grey50"), legend.position = "top", legend.direction = "horizontal") + scale_size_area(max_size = 4),
glol.desc + theme(axis.line = element_line(colour = "grey50"), legend.position = "top", legend.direction = "horizontal") + scale_size_area(max_size =4)
)
vn.lol.plots<-sapply(ls.lol.plots, function(X){max(ggplot_build(X)$data[[1]] %>% filter(PANEL == 1) %>% dplyr::select(y))})
ggsave(paste0(outdir,"/glols_",lab.filter,".pdf"), grid.arrange(grobs = ls.lol.plots, nrow=length(vn.lol.plots),
heights = c(10*vn.lol.plots/sum(vn.lol.plots))),
width = 6.5, height = 9,
device = "pdf", units = "in")
#ensembl_version<-"102"
unique_categories <- c("ave.XO", "ave.mXO", ".fXO", ".mXO1", ".mXO2", "fmE1", "fmE2", "m1m2O")
GO_all |>
as.data.frame() |>
mutate(
object_file = "GO_all",
ID = sprintf("GO%s_%s", ONTOLOGY, ID)
) |>
dplyr::select(-ONTOLOGY) |>
rbind(
msig_all |>
as.data.frame() |>
mutate(object_file = "msig_all")
) |>
filter(Category %in% unique_categories) |>
rename(comparison = "Category") |>
dplyr::select(-c(Score, object_file, enrichmentScore, leading_edge, rank)) |>
arrange(qvalue) |>
write_csv(paste0(outdir,"/Table_S2.csv"))
require(biomaRt)
require(tidyverse)
###################################
# RUN ANNOTATION FILE HERE #
###################################
source("./AhernNCC_annotation_SP.R")
df.VarEsc<-df.Tuk2017 %>% mutate(Reported = case_when(!(SYMBOL %in% df.Tuk2017$SYMBOL[df.Tuk2017$Reported_XCI_status %in% c("Escape","Variable")]) &
(SYMBOL %in% c("IRAK1","MBTPS2", "SMS") |
SYMBOL %in% rownames(df.Katsir2019) |
SYMBOL %in% rownames(df.Sauterad2021 %>% filter(str_detect(XCI.status,"E")))) ~ "Variable",
TRUE ~ Reported_XCI_status)) %>%
mutate_at(vars("Reported"), ~replace_na(.,"Unknown"))
df_ae_counts <- read_tsv("./Xgenes_NCC.txt",
col_types = cols(contig=col_character(),
variants=col_character())) |>
filter(contig == "X" &
totalCount > 0) |> rename(ensembl_id = name) |>
mutate(sample_id=str_extract(bam, "d[0-9]*")) |> mutate(RegistryID = sample_id) |>
left_join(df.design, by = "RegistryID") |>
mutate(ENSEMBL = t(matrix(unlist(strsplit(ensembl_id, "[.]")),nrow=2))[,1]) |>
left_join(df.VarEsc |> rownames_to_column("ENSEMBL"), by = "ENSEMBL") |>
dplyr::select(ensembl_id, aCount, bCount, totalCount, sample_id, Sample, condition, Sex, donor,
simple, karyotype, Xa, CellType, line,
ENSEMBL, SYMBOL, Region, Reported)
pc<-0
sPcut<-0.1
POW<-0.8
P0<-0.2 #double of 0.1 cutoff used below, ie. estimating 90% power of identifying all escapees with an allelic ratio of 0.2
P1<-0.01 # read error estimate
N<-c(1:1000)
K<-qbinom(sPcut, N, P0)-1
#ALPHA<-pbinom(K, N, P0)
POWER<-pbinom(q = K, size = N,prob =P1)
minREAD<-min(which(POWER>=POW))
df_ae_binom <- df_ae_counts |> filter(totalCount >= minREAD) |>
filter(str_detect(donor, "AG05278")) |>
mutate(aCount = aCount+pc,
bCount = bCount+pc) |>
rowwise() |>
mutate(LAF = min(aCount,bCount)/(aCount+bCount)) #|>
df_ae_binom2<- df_ae_binom |> filter(karyotype == "XO") |> group_by(ensembl_id) |>
summarise(maxLAFxo = max(LAF)) |>
left_join(df_ae_binom, by="ensembl_id") |> rowwise() |>
mutate(aCount = max(aCount - round(maxLAFxo * totalCount,digits = 0)-pc ,0)) |>
mutate(bCount = max(bCount - round(maxLAFxo * totalCount,digits = 0)-pc ,0)) |>
mutate(totalCount = aCount+bCount) |>
mutate(LAF = min(aCount,bCount)/(aCount+bCount)) |>
mutate(pBi = pbinom(q = min(aCount,bCount), size = (aCount+bCount), prob = (P0/2),lower.tail = F)) |>
mutate(tf.esc = ((pBi <= sPcut) & (LAF >= P0/2))) |> ungroup() |>
group_by(ensembl_id, line) |> mutate(aSum = sum(aCount), bSum = sum(bCount)) |>
mutate(gLAF = min(aSum,bSum)/(aSum+bSum)) |>
mutate(gpBi = pbinom(q = min(aSum,bSum), size = (aSum+bSum), prob = (P0/2),lower.tail = F)) |>
mutate(gtf.esc = ((gpBi <= sPcut) & (gLAF >= P0/2))) |> ungroup()
vc.PAIR<-c("ZFX","TXLNG","MXRA5","TBL1X", "USP9X", "KDM5C", "NLGN4X", "KDM6A" , "EIF1AX", "PRKX", "RPS4X", "TMSB4X", "DDX3X") #"TSPYL2")
vc.PAIRY<-c("ZFY","TXLNGY","MXRA5Y","TBL1Y", "USP9Y", "KDM5D", "NLGN4Y", "UTY" , "EIF1AY", "PRKY", "RPS4Y1", "TMSB4Y", "DDX3Y") #"TSPY1")
vc.PAR<-df_ae_binom2 %>% filter(Region == "PAR") %>% pull(SYMBOL) %>% unique()
vc.oESC<-df_ae_binom2 %>% filter(tf.esc & Reported %in% c("Escape","Variable","VarOther") & !(SYMBOL %in% c(vc.PAR, vc.PAIR))) %>% pull(SYMBOL) %>% unique()
vc.uESC<-df_ae_binom2 %>% filter(tf.esc & !(SYMBOL %in% c(vc.PAR, vc.PAIR, vc.oESC, "IDS")) & !is.na(SYMBOL)) %>% pull(SYMBOL) %>% unique()
vc.OFF<-df_ae_binom2 %>% filter(!gtf.esc & totalCount >= minREAD & !(SYMBOL %in% c(vc.PAR, vc.PAIR, vc.oESC, "IDS")) & !is.na(SYMBOL)) %>% pull(SYMBOL) %>% unique()
vtf.PAIR<-(df$gene_name %in% vc.PAIR)
vtf.PAIRY<-(df$gene_name %in% vc.PAIRY)
vtf.PAIR2<-(df$gene_name %in% c(vc.PAIR,vc.PAIRY))
vtf.ESC<-(rownames(df) %in% (df_ae_binom2 %>% filter(tf.esc) %>% pull(ensembl_id) %>% unique()))
vtf.oESC<-(df$gene_name %in% vc.oESC)
vtf.uESC<-(df$gene_name %in% vc.uESC)
vtf.OFF<-(df$gene_name %in% vc.OFF)
require(tidyHeatmap)
library(circlize)
df4<-cbind(df, vtf.F, vtf.M1, vtf.M2,
fXO = vtf.F * df$fXO_log2FoldChange,
mXO1 = vtf.M1 * df$mXO1_log2FoldChange,
mXO2 = vtf.M2 * df$mXO2_log2FoldChange,
vst.med = rowMedians(assay(vsd3)),
vtf.X, vtf.Y, vtf.PAR, vtf.PAIR, vtf.PAIRY, vtf.ESC, vtf.oESC, vtf.uESC, vtf.OFF)
df.ae<-df4 |> mutate(ensembl_id = gene_id) |>
left_join(df_ae_binom2 |> dplyr::select(ensembl_id, sample_id, line, Sample, SYMBOL, #Region, Reported,
gLAF, gpBi, gtf.esc) %>% filter(!is.na(SYMBOL)) |>
pivot_wider(id_cols = c(ensembl_id),
names_from = c(line), values_from = c(gLAF, gpBi, gtf.esc), values_fn = unique) , by = "ensembl_id") %>% left_join(df.VarEsc[,c("SYMBOL","Region","Reported")] |> rownames_to_column("ENSEMBL"), by="ENSEMBL")
rownames(df.ae)<-df.ae$ensembl_id
df.ae<-df.ae[order(match(rownames(df.ae), rownames(df))),]
df.ae[is.na(df.ae$Reported),"Reported"]<-"LowAlleleC."
#vsd_abs contains SV-corrected vst counts
vsd_abs_ae <- as.data.frame(assay(vsd3)) %>%
rownames_to_column("ensembl_id") %>% pivot_longer(-ensembl_id, names_to="sample_name", values_to="vst_count") %>%
left_join(colData(vsd3) %>% as.data.frame %>% rownames_to_column(var="sample_name")) %>%
left_join(df.ae, by = "ensembl_id")
vsd_abs_markers <- left_join(vsd_abs_ae, df.Frith %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Frith)) %>%
left_join(df.Lee %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Lee)) %>%
left_join(df.KarzbrunDEGs %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, KarzbrunDEGs)) %>%
left_join(df.SimoesCosta %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, SimoesCosta)) %>%
left_join(df.Hsiao %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Hsiao)) %>%
left_join(df.Soldatov %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Soldatov))
vsd_med_ae <- as.data.frame(assay(vsd3)-rowMedians(assay(vsd3))) %>%
rownames_to_column("ensembl_id") %>% pivot_longer(-ensembl_id, names_to="sample_name", values_to="vst_diff") %>%
left_join(colData(vsd3) %>% as.data.frame() %>% rownames_to_column(var="sample_name")) %>%
left_join(df.ae, by = "ensembl_id")
vsd_med_markers<-left_join(vsd_med_ae, df.Frith %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Frith)) %>%
left_join(df.Lee %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Lee)) %>%
left_join(df.KarzbrunDEGs %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, KarzbrunDEGs)) %>%
left_join(df.SimoesCosta %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, SimoesCosta)) %>%
left_join(df.Hsiao %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Hsiao)) %>%
left_join(df.Soldatov %>% rownames_to_column(var="gene_name") %>% dplyr::select(gene_name, Soldatov))
#Escape vs. DESEQ2 stats and med_vsd
fx.col.med = colorRamp2(c(-1, 0, 1), c("blue", "white", "red"))
tab.sign.All<-table(sign(df[which(vtf.F & vtf.M1 & vtf.M2),c("fXO_stat", "mXO1_stat", "mXO2_stat")]))
tab.sign.notM1<-table(sign(df[which(vtf.F & !vtf.M1 & vtf.M2),c("fXO_stat", "mXO2_stat")]))
tab.sign.notM2<-table(sign(df[which(vtf.F & vtf.M1 & !vtf.M2),c("fXO_stat", "mXO1_stat")]))
tab.sign.notF<-table(sign(df[which(!vtf.F & vtf.M1 & vtf.M2),c("mXO1_stat", "mXO2_stat")]))
p.sign.All<-signif(pbinom(sum(tab.sign.All[c(1,length(tab.sign.All))]), size = sum(tab.sign.All),prob = 0.25,lower.tail = F),2)
p.sign.notM1<-signif(pbinom(sum(tab.sign.notM1[c(1,length(tab.sign.notM1))]), size = sum(tab.sign.notM1),prob = 0.5,lower.tail = F),2)
p.sign.notM2<-signif(pbinom(sum(tab.sign.notM2[c(1,length(tab.sign.notM2))]), size = sum(tab.sign.notM2),prob = 0.5,lower.tail = F),2)
p.sign.notF<-signif(pbinom(sum(tab.sign.notF[c(1,length(tab.sign.notF))]), size = sum(tab.sign.notF),prob = 0.5,lower.tail = F),2)
label.All<-paste0("All (", sum(tab.sign.All[c(1,length(tab.sign.All))]),"/",sum(tab.sign.All), ") ",p.sign.All)
label.notM1<-paste0("notM1 (", sum(tab.sign.notM1[c(1,length(tab.sign.notM1))]),"/",sum(tab.sign.notM1), ") ",p.sign.notM1)
label.notM2<-paste0("notM2 (", sum(tab.sign.notM2[c(1,length(tab.sign.notM2))]),"/",sum(tab.sign.notM2), ") ",p.sign.notM2)
label.notF<-paste0("notF (", sum(tab.sign.notF[c(1,length(tab.sign.notF))]),"/",sum(tab.sign.notF), ") ",p.sign.notF)
tb1<-vsd_med_ae %>% filter((sign(fXO_stat) == sign(mXO1_stat)) & (sign(fXO_stat) == sign(mXO2_stat)) & vtf.F & vtf.M1 & vtf.M2) %>% add_column(DEGo = label.All) %>% add_column(Dir = "conc.")
tb2<-vsd_med_ae %>% filter((sign(fXO_stat) == sign(mXO1_stat)) & vtf.F & vtf.M1 & !vtf.M2) %>% add_column(DEGo = label.notM2) %>% add_column(Dir = "conc.")
tb3<-vsd_med_ae %>% filter((sign(fXO_stat) == sign(mXO2_stat)) & vtf.F & !vtf.M1 & vtf.M2) %>% add_column(DEGo = label.notM1) %>% add_column(Dir = "conc.")
tb4<-vsd_med_ae %>% filter((sign(mXO1_stat) == sign(mXO2_stat)) & !vtf.F & vtf.M1 & vtf.M2) %>% add_column(DEGo = label.notF) %>% add_column(Dir = "conc.")
tb5<-vsd_med_ae %>% filter(((sign(fXO_stat) != sign(mXO1_stat)) | (sign(fXO_stat) != sign(mXO2_stat))) & vtf.F & vtf.M1 & vtf.M2) %>% add_column(DEGo = label.All) %>% add_column(Dir = "disc.")
tb6<-vsd_med_ae %>% filter((sign(fXO_stat) != sign(mXO1_stat)) & vtf.F & vtf.M1 & !vtf.M2) %>% add_column(DEGo = label.notM2) %>% add_column(Dir = "disc.")
tb7<-vsd_med_ae %>% filter((sign(fXO_stat) != sign(mXO2_stat)) & vtf.F & !vtf.M1 & vtf.M2) %>% add_column(DEGo = label.notM1) %>% add_column(Dir = "disc.")
tb8<-vsd_med_ae %>% filter((sign(mXO1_stat) != sign(mXO2_stat)) & !vtf.F & vtf.M1 & vtf.M2) %>% add_column(DEGo = label.notF) %>% add_column(Dir = "disc.")
tb<- bind_rows(tb1, tb2, tb3, tb4,
tb5, tb6, tb7, tb8
)
tbhm<- tb %>% dplyr::select(ENSEMBL, sample_name, vst_diff, fXO_stat, mXO1_stat, mXO2_stat, Dir, DEGo, condition, karyotype) %>%
group_by(DEGo) %>%
heatmap(ENSEMBL, sample_name, vst_diff, scale="none", palette_value = fx.col.med,
palette_grouping = list(rep("white",length(table(tb$DEGo))) )) %>%
add_bar(fXO_stat) %>% add_bar(mXO1_stat) %>% add_bar(mXO2_stat) %>% add_tile(Dir, palette=c("saddlebrown","tan")) %>%
add_tile(condition, palette = col.pal[tb$condition |> as.character() |> unique()]) %>%
add_tile(karyotype, palette = c("grey50", "grey75", "grey25"))
#tbhm
save_pdf(tbhm,filename = paste0(outdir, "/GlobalDEGs2.pdf"),width = 7.5, height=10, units = "in")
#Escape vs. DESEQ2 stats and med_vsd
l2fc<-0
require(RColorBrewer)
col.pal3<-brewer.pal(n = 9, name = "Set1")[c(5:9)]
names(col.pal3)<-c("Escape","Inactive","LowAlleleC.","Unknown", "Variable")
fx.col.med = colorRamp2(c(-1, 0, 1), c("blue", "white", "red"))
tb1<-vsd_med_ae %>% filter(vtf.PAR) %>% add_column(Strata = "..PAR")
tb2<-vsd_med_ae %>% filter(vtf.PAIR) %>% add_column(Strata = ".PairX")
#tb3<-vsd_med_ae %>% filter(vtf.PAIRY) %>% add_column(Strata = ".PairY")
tb4<-vsd_med_ae %>% filter(vtf.oESC &
(gtf.esc_XX19 | gtf.esc_XX23) &
((fXO_padj <= ap & fXO_log2FoldChange <= -l2fc))) %>%
add_column(Strata = "known escapees")
tb5<-vsd_med_ae %>% filter(
vtf.uESC &
(gtf.esc_XX19 | gtf.esc_XX23) &
((fXO_padj <= ap & fXO_log2FoldChange <= -l2fc))
) %>% add_column(Strata = "novel/reactivated")
tb_Hm_allelicX<- bind_rows(tb1, tb2, #tb3,
tb4, tb5)
tbhm<- tb_Hm_allelicX %>% drop_na(gene_name) %>% filter(!(gene_name == "PRKY")) %>%
arrange(condition) %>% group_by(Strata) %>%
heatmap(gene_name, sample_name, vst_diff, .scale="none", palette_value = fx.col.med, row_names_gp = gpar(fontsize = 6), cluster_rows = F) %>%
add_bar("gLAF_XX19") %>% add_bar("gLAF_XX23") %>%
add_bar(fXO) %>% add_bar(mXO1) %>% add_bar(mXO2) %>%
add_tile(condition, palette = col.pal[tb_Hm_allelicX$condition |> as.character() |> unique()]) %>%
add_tile(Reported, palette = col.pal3[tb_Hm_allelicX$Reported |> as.character() |> unique()])
save_pdf(tbhm,filename = paste0(outdir,"/Hm_allelicX002.pdf"),width = 8, height=8, units = "in")
df.Markers_tb <- df.Markers.abs %>%
rownames_to_column("sample_name") %>% pivot_longer(-sample_name, names_to="Marker", values_to="vst_med") %>%
left_join(colData(vsd3) %>% as.data.frame %>% rownames_to_column(var="sample_name"))
require(pals)
hm.markers<-heatmap(df.Markers_tb %>% filter(!str_detect(Marker, "Hsiao")) %>%
group_by(donor), .row = Marker,.column = Sample,
.value = vst_med, scale = "none", cluster_rows=T,
palette_value = colorRamp2(c(7:13), turbo(7))
) %>%
add_tile(karyotype, palette = c("grey90", "grey30", "grey60"))
save_pdf(hm.markers,filename = paste0(outdir,"/Hm.markers.pdf"),width = 8, height=5, units = "in")
require(ggprism)
require(rstatix)
#look at HOXexpression
fx.markers<-function(vsd_tibble, anno_label, vst_unit, miny, maxy) {
tbF<-vsd_tibble |>
mutate(unilabel = vsd_tibble |> select(all_of(anno_label)) |> unlist()) |>
filter(!is.na(unilabel) & (unilabel != ".F") & (vtf.F | vtf.M1 | vtf.M2)) |>
dplyr::select(unilabel, vst_unit, condition, donor,simple, line, gene_name) |>
rename(out_vst = all_of(vst_unit))
gg.tbF<-ggplot(tbF, aes(x= simple, y=out_vst)) +
facet_grid(rows = vars(donor), cols = vars(unilabel)) + ylim(miny,maxy) +
geom_boxplot(aes(fill=condition), outlier.shape = NA, notch = T) + scale_fill_manual(values = col.pal)
tb_med<-tbF %>% rstatix::group_by(donor, unilabel, simple) %>%
rstatix::get_summary_stats(out_vst) %>% add_column(group1 = "EU", .before = 1) %>%
mutate(group2 = simple) |>
pivot_wider(id_cols = c(donor, unilabel, group1), names_from=group2, values_from = median) |>
mutate(diff = XO-EU) |> mutate(group2 = "XO")
tb_mwup<- tbF %>% rstatix::group_by(donor, unilabel) %>%
rstatix::wilcox_test(out_vst ~ simple, p.adjust.method = "none", ref.group = "EU", conf.level = 0.9, paired = F)
tb_mwup2<-tb_mwup %>% rstatix::add_xy_position() %>%
mutate(y.position = maxy-0.5) %>% mutate(p.adj = signif(p,digits = 2))
gg.tbF + add_pvalue(tb_mwup2, label = "p.adj", remove.bracket = T, tip.length = 0, bracket.size = 0, label.size = 3) +
theme(text = element_text(size = 8)) + theme(legend.position = "none")
ggsave(filename = paste0(outdir,"/Hm.markers_",anno_label,"_",vst_unit,".pdf"),width = 2.5, height=3, units = "in")
}
fx.markers(vsd_med_markers, "Frith", "vst_diff", -1.5, 1.5)
fx.markers(vsd_med_markers, "KarzbrunDEGs", "vst_diff", -1.5, 1.5)
fx.markers(vsd_med_markers, "Lee", "vst_diff", -1.5, 1.5)
fx.markers(vsd_med_markers, "SimoesCosta", "vst_diff", -1.5, 1.5)
saveRDS(vsd_abs_ae, file = paste0(outdir,"/vsd_abs_ae.rds"))
saveRDS(vsd_med_ae, file = paste0(outdir,"/vsd_med_ae.rds"))
saveRDS(vsd_abs_markers, file = paste0(outdir,"/vsd_abs_markers.rds"))
saveRDS(vsd_med_markers, file = paste0(outdir,"/vsd_med_markers.rds"))
#vsd_abs_ae<-readRDS(file = paste0(outdir,"/vsd_abs_ae.rds"))
#vsd_med_ae<-readRDS(file = paste0(outdir,"/vsd_med_ae.rds"))
#vsd_med_markers<-readRDS(file = paste0(outdir,"/vsd_med_markers.rds"))
vcounts<-ggplot(vsd_abs_ae %>% filter((vtf.F | vtf.M1 | vtf.M2) & gene_name %in% c(vc.PAIR[!(vc.PAIR == "MXRA5")],vc.PAIRY)) %>%
mutate(gene_name = case_when(gene_name == "UTY" ~ "KDM6AL",
T ~ gene_name)), aes(y = vst_count, x = condition)) +
facet_wrap( vars(gene_name), ncol =6, scales = "free") + scale_colour_manual(values=col.pal) +
geom_boxplot(outlier.shape = NA, fill = NA) +
geom_jitter(aes(color = clone), shape = 20, size = 1) +
theme(axis.text.x=element_blank(),axis.ticks.x=element_blank())
vcounts
ggsave(paste0(outdir,"/PAIR_NCCs.pdf"),
plot = vcounts + # theme(legend.position = "botton", legend.box = "horizontal", text=element_text(size=7)) +
scale_color_manual(values = c("violet", "darkmagenta", "deepskyblue", "blue", "springgreen", "darkgreen"))
, width = 7, height =7, units = "in")
vcounts<-ggplot(vsd_abs_ae |>filter(gene_name %in% c("XIST", "XACT", "FTX", "JPX", "FIRRE") |
ENSEMBL %in% c("ENSG00000241743"
)), aes(y = vst_count, x = condition)) +
facet_wrap( vars(gene_name), ncol =6) + scale_colour_manual(values=col.pal) +
geom_boxplot(outlier.shape = NA, fill = NA) +
geom_jitter(aes(color = clone), shape = 20, size = 1) +
theme(axis.text.x=element_blank(),axis.ticks.x=element_blank())
vcounts
ggsave(paste0(outdir,"/XlncRNA_NCCs.pdf"),
plot = vcounts, width = 5, height =2, units = "in")
vcounts<-ggplot(vsd_abs_ae %>% filter(vtf.PAR) %>%
mutate(gene_name = case_when(gene_name %in% c("SPRY3","VAMP7","WASH6P","TMLHE","NA") ~ paste0(".",gene_name),
T ~ gene_name)), aes(y = vst_count, x = condition)) +
facet_wrap( vars(gene_name), ncol =4) + scale_colour_manual(values=col.pal) +
geom_boxplot(outlier.shape = NA, fill = NA) + #ylim(c(9,13)) +
geom_jitter(aes(color = condition), shape = 20, size = 1) +
theme(axis.text.x=element_blank(),axis.ticks.x=element_blank())
vcounts
ggsave(paste0(outdir,"/PAR_NCCs.pdf"),
plot = vcounts + theme(legend.position = "botton", legend.box = "horizontal", text=element_text(size=7)) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + scale_color_manual(values = c("violet", "darkmagenta", "deepskyblue", "blue", "springgreen", "darkgreen"))
, width = 7, height =7, units = "in")
vcounts<-ggplot(vsd_abs_ae %>% filter(str_detect(gene_name, "^HOX") & !str_detect(gene_name, "-AS")), aes(y = vst_count, x = condition)) +
facet_wrap( vars(gene_name), ncol =6) + scale_colour_manual(values=col.pal) +
geom_boxplot(outlier.shape = NA, fill = NA) + ylim(7.5,15) +
geom_jitter(aes(color = condition), shape = 20, size = 1) +
theme(axis.text.x=element_blank(),axis.ticks.x=element_blank())
vcounts
ggsave(paste0(outdir,"/HOX_NCCs.pdf"),
plot = vcounts + theme(legend.position = "botton", legend.box = "horizontal", text=element_text(size=7)) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + scale_color_manual(values = c("violet", "darkmagenta", "deepskyblue", "blue", "springgreen", "darkgreen"))
, width = 7, height =7, units = "in")
vc.top<-c("RPL10","RPL10A","RPL11","RPL12","RPL13","RPL13A","RPL14","RPL15","RPL17",
"RPL18","RPL18A","RPL19","RPL21","RPL22","RPL23","RPL23A","RPL24","RPL26",
"RPL27","RPL27A","RPL28","RPL29","RPL3","RPL30","RPL31","RPL32","RPL34",
"RPL35","RPL35A","RPL36","RPL36A","RPL37","RPL37A","RPL38","RPL39","RPL3L",
"RPL4","RPL41","RPL5","RPL6","RPL7","RPL7A","RPL8","RPL9","RPLP0","RPLP1",
"RPLP2","RPSA","RPS10","RPS11","RPS12","RPS13","RPS14","RPS15","RPS15A",
"RPS16","RPS17","RPS18","RPS19","RPS2","RPS20","RPS21","RPS23","RPS24",
"RPS25","RPS26","RPS27","RPS27A","RPS28","RPS29","RPS3","RPS3A","RPS4X",
"RPS4Y1","RPS4Y2","RPS5","RPS6","RPS7","RPS8","RPS9")
df.top <- df |> filter((gene_name %in% c(vc.top)) & #(mXO2_padj <= 0.1) &
!(str_detect(gene_name, "[XY]"))
) |>
dplyr::select(gene_name, mXO1_log2FoldChange, mXO2_log2FoldChange, fXO_log2FoldChange,
mXO1_padj, mXO2_padj, fXO_padj) |>
pivot_longer(cols = !(c(gene_name)), names_sep = "_", names_to= c("set",".value")) |>
mutate(subunit = case_when(str_detect(gene_name, "RPS") ~ "RPS",
str_detect(gene_name, "RPL") ~ "RPL",
T ~ "other"),
significant = case_when(padj <= 0.1 ~ T, T ~ F))
gghistogram(df.top, x="log2FoldChange", y = "count", color = "significant",
fill = "significant", add = "mean", facet.by = c("set","subunit"),
binwidth=0.02) +
scale_y_continuous(breaks=seq(0,12,3)) +
theme(text= element_text(size = 7))
ggsave(paste0(outdir,"/rpls_l2fc.pdf"), device = "pdf", width=3, height =4)
vc.top2<-c("FAU", "UBF",
"EIF3A","EIF3E","EIF3F","EIF3H","EIF4B","PABPC1","EEF1A1","EEF1B2","EEF1D",
"EEF1G","EEF2","TCTP","HNRNPA1","NAP1L1","VIM", "NPM1", "RACK1")
vc.test<-df[which(df$gene_name %in% vc.top2
& (vtf.M1 & vtf.M2))
,"gene_name"]
vcounts<-ggplot(vsd_abs_ae %>% filter(gene_name %in% vc.test),
aes(y = vst_count, x = condition)) +
facet_wrap( vars(gene_name), ncol =6, scales = "free") + scale_colour_manual(values=col.pal) +
geom_boxplot(outlier.shape = NA, fill = NA) +
geom_jitter(aes(color = condition), shape = 20, size = 1) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
legend.position = "top")
vcounts
ggsave(paste0(outdir,"/top2.pdf"), device = "pdf", width=6, height =3)
rm(vsd_abs_ae)
rm(vsd_med_ae)
rm(vsd_abs_markers)
rm(vsd_med_markers)