Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
BAP_trophoblast_45X/DESEQ2_input_v03_TBLs_pass.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
208 lines (176 sloc)
8.62 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PathCount.txt = "counts_TBLs_Male_AG09270_Female_AG05278A.txt" | |
FeatureColumn = 1 | |
LastMetaColumn = 6 | |
PathDesign.csv = "coldata_TBLs_Both_AG05278A_AG09270_byXa.csv" | |
SampleColumn = 1 | |
TrimFront <- "X.home.FCAM.dahern.Trophoblast_RNAseq.Alignment.Strand." | |
TrimBack <- "_mapped.bam" | |
designform <- "condition" | |
findSV <- T | |
removeSV <- findSV | |
minCounts <- 20 | |
minSamples <- 2 | |
minAve <- 10 | |
require(tidyverse) | |
df.counts<-as.data.frame(read.table(file = PathCount.txt, sep="\t", header = TRUE, row.names = FeatureColumn)) | |
df.design<-as.data.frame(read.csv(file = PathDesign.csv, header = TRUE, row.names = SampleColumn)) %>% | |
mutate(line = str_split_fixed(Samples, "_", 3)[,2]) | |
vc.design<-unlist(strsplit(designform, split = " ")) | |
vc.designLast<-unlist(strsplit(tail(vc.design,1), split = ":")) | |
if(!all(c(vc.design[(vc.design != "+") & (vc.design != tail(vc.design,1))],vc.designLast) %in% colnames(df.design))) | |
{stop("Check your designform and sample_sheet")} | |
df.design_ID <- df.design | |
df.design_ID$ID <- row.names(df.design_ID) | |
colnames(df.counts)<-sub(TrimBack, "", sub(TrimFront, "", colnames(df.counts))) | |
ls.meta<-apply(df.counts[,c(1:LastMetaColumn)], 2, function(X) strsplit(as.character(X), split = ";")) | |
df.meta<-as.data.frame(lapply(ls.meta, function(X) unlist(lapply(X, function(Y) Y[1])))) | |
rownames(df.meta)<-rownames(df.counts) | |
mn.counts<-as.matrix(df.counts[,rownames(df.design)]) | |
all(rownames(df.design) == colnames(mn.counts)) | |
require("DESeq2") | |
dds0 <- DESeqDataSetFromMatrix(countData = mn.counts, | |
colData = df.design_ID, | |
design = formula(paste0("~" ,designform))) | |
#Pre-filter based on https://support.bioconductor.org/p/65091/ | |
dds1 <- estimateSizeFactors(dds0) | |
mn.nc1 <- counts(dds1, normalized=TRUE) | |
filter1 <- rowSums(mn.nc1 >= minCounts) >= minSamples | |
dds1 <- dds1[filter1,] | |
mn.nc2 <- counts(dds1, normalized=TRUE) | |
filter2 <- rowMeans(mn.nc2) >= minAve | |
dds1 <- dds1[filter2,] | |
print("Sample names match in correct order. Returning filtered dds") | |
if(findSV){ | |
require("sva") | |
require("limma") | |
mod <- model.matrix (design(dds1), colData(dds1)) | |
mod0 <- model.matrix (~ 1, colData(dds1)) | |
n.sv = num.sv(mn.nc2,mod,method="leek") | |
n.sv | |
ls.svseq <- svaseq(mn.nc2, mod, mod0, method="irw") | |
dds1.sva <- dds1 | |
colnames(ls.svseq$sv) <- colnames(ls.svseq$sv, do.NULL = FALSE, prefix = "SV") | |
colData(dds1.sva)<-cbind(colData(dds1.sva), ls.svseq$sv) | |
design(dds1.sva)<-as.formula(paste0("~",paste(colnames(ls.svseq$sv), collapse="+"), "+", designform)) | |
dds2 <- DESeq(dds1.sva) | |
# dds2 is for DESEQ2 | |
vsd=vst(dds2, blind = FALSE) | |
# re-detection of SVs in vsd for SV correction in vst-based plots | |
ls2.svseq <- svaseq(assay(vsd), mod, mod0, method="irw") | |
dds1.sva <- dds1 | |
colnames(ls2.svseq$sv) <- colnames(ls2.svseq$sv, do.NULL = FALSE, prefix = "SV") | |
colData(dds1.sva)<-cbind(colData(dds1.sva), ls2.svseq$sv) | |
design(dds1.sva)<-as.formula(paste0("~",paste(colnames(ls2.svseq$sv), collapse="+"), "+", designform)) | |
dds3 <- DESeq(dds1.sva) | |
vsd3=vst(dds3, blind = FALSE) | |
} | |
if(!findSV){print("Skipping SVA...") | |
dds2 <- DESeq(dds1) | |
dds2<-dds2[which(mcols(dds2)$betaConv),] | |
vsd=vst(dds2, blind = FALSE); vsd3 = vsd | |
} | |
if(removeSV) | |
{ | |
require(limma) | |
assay(vsd3) <- limma::removeBatchEffect(assay(vsd3) | |
, covariates = colData(vsd3)[,c(grep("SV", colnames(colData(vsd3))))] | |
, design = model.matrix( ~ colData(dds2)[,vc.designLast[1]])) | |
} | |
require(ggplot2) | |
fx.gg_col<-function(n) { | |
hues = seq(15, 375, length = n + 1) | |
hcl(h = hues, l = 65, c = 100)[1:n] | |
} | |
#df.ggPCA<-plotPCA(vsd, intgroup=c(colnames(df.design)), returnData=T) | |
df.ggPCA3<-plotPCA(vsd3, intgroup=c(colnames(df.design)), returnData=T) | |
percentVar <- round(100 * attr(df.ggPCA3, "percentVar")) | |
gg.PCA3<-ggplot(df.ggPCA3, aes(x = PC1, y = PC2, color = factor(label), shape = factor(simple))) + | |
geom_point(size =3, stroke = 1.5) + scale_shape_manual(values=c(19, 21)) + | |
scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue")) + | |
xlab(paste0("PC1: ", percentVar[1], "% variance")) + | |
ylab(paste0("PC2: ", percentVar[2], "% variance")) + | |
theme(legend.key.size = unit(0.2, 'cm')) | |
gg.PCA3 | |
ggsave("PCA3_TBLs_RmvSVs.pdf", gg.PCA3, device = "pdf", width = 4*percentVar[1]/max(percentVar), height=5*percentVar[2]/max(percentVar), units = "in") | |
metadata(dds2)<-df.meta | |
dds<-dds2 | |
ap <- 0.05 | |
l2fc<-0.3 | |
DF.resF<-results(dds, contrast=c("condition","X1_femaleXO","X1_femaleXX"), alpha = ap) | |
DF.resFE<-results(dds, contrast=c("condition","X2_femaleXX","X1_femaleXX"), alpha = ap) | |
DF.resF2<-results(dds, contrast=c("condition","X1_femaleXO","X2_femaleXX"), alpha = ap) | |
df.resF2<-as.data.frame(DF.resF2) | |
df.resFE<-as.data.frame(DF.resFE) | |
colnames(df.resF2)<-paste("fXO2", colnames(df.resF2), sep = "_") | |
colnames(df.resFE)<-paste("FE", colnames(df.resFE), sep = "_") | |
DF.resM<-results(dds, contrast=c("condition","X3_maleXO","X3_maleXY"), alpha = ap) | |
DF.resO<-results(dds, contrast=c("condition","X1_femaleXO","X3_maleXO"), alpha = ap) | |
DF.resFM<-results(dds, contrast=c("condition","X1_femaleXO","X3_maleXY"), alpha = ap) | |
DF.resMF<-results(dds, contrast=c("condition","X3_maleXO","X1_femaleXX"), alpha = ap) | |
DF.resE<-results(dds, contrast=c("condition","X1_femaleXX","X3_maleXY"), alpha = ap) | |
df.resF<-as.data.frame(DF.resF) | |
df.resM<-as.data.frame(DF.resM) | |
df.resO<-as.data.frame(DF.resO) | |
df.resFM<-as.data.frame(DF.resFM) | |
df.resMF<-as.data.frame(DF.resMF) | |
df.resE<-as.data.frame(DF.resE) | |
colnames(df.resF)<-paste("fXO", colnames(df.resF), sep = "_") | |
colnames(df.resM)<-paste("mXO", colnames(df.resM), sep = "_") | |
colnames(df.resO)<-paste("fmO", colnames(df.resO), sep = "_") | |
colnames(df.resFM)<-paste("FM", colnames(df.resFM), sep = "_") | |
colnames(df.resMF)<-paste("MF", colnames(df.resMF), sep = "_") | |
colnames(df.resE)<-paste("fmE", colnames(df.resE), sep = "_") | |
stopifnot( all( rownames(df.resF) == rownames(assay(vsd3)) & | |
rownames(df.resF) == rownames(df.resF2) & | |
rownames(df.resF) == rownames(df.resFE) & | |
rownames(df.resF) == rownames(df.resM) & | |
rownames(df.resF) == rownames(df.resO) & | |
rownames(df.resF) == rownames(df.resFM) & | |
rownames(df.resF) == rownames(df.resMF) & | |
rownames(df.resF) == rownames(df.resE))) | |
df.all<-cbind(assay(vsd3), df.resF, df.resF2, df.resFE, df.resM, df.resO, df.resFM, df.resMF, df.resE) | |
df.all2<-merge(metadata(dds), df.all, by = "row.names") | |
rownames(df.all2)<-df.all2$Row.names | |
df.all2$Row.names<-t(matrix(unlist(strsplit(rownames(df.all2), "[.]")),nrow=2))[,1] | |
colnames(df.all2)[1]<-"ENSEMBL" | |
df<-df.all2[order(match(rownames(df.all2), rownames(assay(vsd3)))),] | |
vtf.F2 <- (df$fXO2_padj <= ap & abs(df$fXO2_log2FoldChange) >= l2fc) | |
vtf.FE <- (df$FE_padj <= ap & abs(df$FE_log2FoldChange) >= l2fc) | |
names(vtf.F2)<-rownames(df) | |
names(vtf.FE)<-rownames(df) | |
vtf.F <- (df$fXO_padj <= ap & abs(df$fXO_log2FoldChange) >= l2fc) | |
vtf.M <- (df$mXO_padj <= ap & abs(df$mXO_log2FoldChange) >= l2fc) | |
vtf.O <- (df$fmO_padj <= ap & abs(df$fmO_log2FoldChange) >= l2fc) | |
vtf.FM <- (df$FM_padj <= ap & abs(df$FM_log2FoldChange) >= l2fc) | |
vtf.MF <- (df$MF_padj <= ap & abs(df$MF_log2FoldChange) >= l2fc) | |
vtf.E <- (df$fmE_padj <= ap & abs(df$fmE_log2FoldChange) >= l2fc) | |
vtf.X <- (df$Chr == "chrX" & as.numeric(df$Start) > 2781479 & as.numeric(df$End)<155701383) | |
vtf.Y <- (df$Chr == "chrY" & as.numeric(df$Start) > 2781479 & as.numeric(df$End)<56887903) | |
vtf.PAR <- ((df$Chr == "chrX" | df$Chr == "chrY") & !(vtf.X) & !(vtf.Y)) | |
names(vtf.F)<-rownames(df) | |
names(vtf.M)<-rownames(df) | |
names(vtf.O)<-rownames(df) | |
names(vtf.FM)<-rownames(df) | |
names(vtf.MF)<-rownames(df) | |
names(vtf.E)<-rownames(df) | |
names(vtf.X)<-rownames(df) | |
names(vtf.Y)<-rownames(df) | |
names(vtf.PAR)<-rownames(df) | |
require(tidyverse) | |
require(stringr) | |
df.tableS2<-df %>% select(-(starts_with(c("d", "Start", "End", "Strand", "Length", "FE", "MF", "fmE")) | ends_with(c("basemean", "lfcSE", "pvalue")))) | |
df.tableS2$ENSEMBL_v36 <- row.names(df) | |
df.tableS2 <- df.tableS2 %>% | |
rename_with(stringr::str_replace, | |
pattern = "fXO2", replacement = "fXO-XX6", | |
matches("fXO2")) %>% | |
rename_with(stringr::str_replace, | |
pattern = "fmO", replacement = "XOXO", | |
matches("fmO")) %>% | |
rename_with(stringr::str_replace, | |
pattern = "FM", replacement = "fXO-XY", | |
matches("FM")) %>% | |
relocate(ENSEMBL_v36, .before = ENSEMBL) %>% | |
relocate(starts_with("fXO-XY"), .after = starts_with("mXO")) %>% | |
relocate(starts_with("fXO-XX6"), .after =starts_with("XOXO")) | |
write.csv(df.tableS2, file = "Table_S2.csv", row.names = F,quote = F) |