Skip to content
Permalink
479373775c
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
208 lines (176 sloc) 8.62 KB
PathCount.txt = "counts_TBLs_Male_AG09270_Female_AG05278A.txt"
FeatureColumn = 1
LastMetaColumn = 6
PathDesign.csv = "coldata_TBLs_Both_AG05278A_AG09270_byXa.csv"
SampleColumn = 1
TrimFront <- "X.home.FCAM.dahern.Trophoblast_RNAseq.Alignment.Strand."
TrimBack <- "_mapped.bam"
designform <- "condition"
findSV <- T
removeSV <- findSV
minCounts <- 20
minSamples <- 2
minAve <- 10
require(tidyverse)
df.counts<-as.data.frame(read.table(file = PathCount.txt, sep="\t", header = TRUE, row.names = FeatureColumn))
df.design<-as.data.frame(read.csv(file = PathDesign.csv, header = TRUE, row.names = SampleColumn)) %>%
mutate(line = str_split_fixed(Samples, "_", 3)[,2])
vc.design<-unlist(strsplit(designform, split = " "))
vc.designLast<-unlist(strsplit(tail(vc.design,1), split = ":"))
if(!all(c(vc.design[(vc.design != "+") & (vc.design != tail(vc.design,1))],vc.designLast) %in% colnames(df.design)))
{stop("Check your designform and sample_sheet")}
df.design_ID <- df.design
df.design_ID$ID <- row.names(df.design_ID)
colnames(df.counts)<-sub(TrimBack, "", sub(TrimFront, "", colnames(df.counts)))
ls.meta<-apply(df.counts[,c(1:LastMetaColumn)], 2, function(X) strsplit(as.character(X), split = ";"))
df.meta<-as.data.frame(lapply(ls.meta, function(X) unlist(lapply(X, function(Y) Y[1]))))
rownames(df.meta)<-rownames(df.counts)
mn.counts<-as.matrix(df.counts[,rownames(df.design)])
all(rownames(df.design) == colnames(mn.counts))
require("DESeq2")
dds0 <- DESeqDataSetFromMatrix(countData = mn.counts,
colData = df.design_ID,
design = formula(paste0("~" ,designform)))
#Pre-filter based on https://support.bioconductor.org/p/65091/
dds1 <- estimateSizeFactors(dds0)
mn.nc1 <- counts(dds1, normalized=TRUE)
filter1 <- rowSums(mn.nc1 >= minCounts) >= minSamples
dds1 <- dds1[filter1,]
mn.nc2 <- counts(dds1, normalized=TRUE)
filter2 <- rowMeans(mn.nc2) >= minAve
dds1 <- dds1[filter2,]
print("Sample names match in correct order. Returning filtered dds")
if(findSV){
require("sva")
require("limma")
mod <- model.matrix (design(dds1), colData(dds1))
mod0 <- model.matrix (~ 1, colData(dds1))
n.sv = num.sv(mn.nc2,mod,method="leek")
n.sv
ls.svseq <- svaseq(mn.nc2, mod, mod0, method="irw")
dds1.sva <- dds1
colnames(ls.svseq$sv) <- colnames(ls.svseq$sv, do.NULL = FALSE, prefix = "SV")
colData(dds1.sva)<-cbind(colData(dds1.sva), ls.svseq$sv)
design(dds1.sva)<-as.formula(paste0("~",paste(colnames(ls.svseq$sv), collapse="+"), "+", designform))
dds2 <- DESeq(dds1.sva)
# dds2 is for DESEQ2
vsd=vst(dds2, blind = FALSE)
# re-detection of SVs in vsd for SV correction in vst-based plots
ls2.svseq <- svaseq(assay(vsd), mod, mod0, method="irw")
dds1.sva <- dds1
colnames(ls2.svseq$sv) <- colnames(ls2.svseq$sv, do.NULL = FALSE, prefix = "SV")
colData(dds1.sva)<-cbind(colData(dds1.sva), ls2.svseq$sv)
design(dds1.sva)<-as.formula(paste0("~",paste(colnames(ls2.svseq$sv), collapse="+"), "+", designform))
dds3 <- DESeq(dds1.sva)
vsd3=vst(dds3, blind = FALSE)
}
if(!findSV){print("Skipping SVA...")
dds2 <- DESeq(dds1)
dds2<-dds2[which(mcols(dds2)$betaConv),]
vsd=vst(dds2, blind = FALSE); vsd3 = vsd
}
if(removeSV)
{
require(limma)
assay(vsd3) <- limma::removeBatchEffect(assay(vsd3)
, covariates = colData(vsd3)[,c(grep("SV", colnames(colData(vsd3))))]
, design = model.matrix( ~ colData(dds2)[,vc.designLast[1]]))
}
require(ggplot2)
fx.gg_col<-function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
#df.ggPCA<-plotPCA(vsd, intgroup=c(colnames(df.design)), returnData=T)
df.ggPCA3<-plotPCA(vsd3, intgroup=c(colnames(df.design)), returnData=T)
percentVar <- round(100 * attr(df.ggPCA3, "percentVar"))
gg.PCA3<-ggplot(df.ggPCA3, aes(x = PC1, y = PC2, color = factor(label), shape = factor(simple))) +
geom_point(size =3, stroke = 1.5) + scale_shape_manual(values=c(19, 21)) +
scale_color_manual(values = c("violet", "darkmagenta", "mediumpurple", "deepskyblue", "blue")) +
xlab(paste0("PC1: ", percentVar[1], "% variance")) +
ylab(paste0("PC2: ", percentVar[2], "% variance")) +
theme(legend.key.size = unit(0.2, 'cm'))
gg.PCA3
ggsave("PCA3_TBLs_RmvSVs.pdf", gg.PCA3, device = "pdf", width = 4*percentVar[1]/max(percentVar), height=5*percentVar[2]/max(percentVar), units = "in")
metadata(dds2)<-df.meta
dds<-dds2
ap <- 0.05
l2fc<-0.3
DF.resF<-results(dds, contrast=c("condition","X1_femaleXO","X1_femaleXX"), alpha = ap)
DF.resFE<-results(dds, contrast=c("condition","X2_femaleXX","X1_femaleXX"), alpha = ap)
DF.resF2<-results(dds, contrast=c("condition","X1_femaleXO","X2_femaleXX"), alpha = ap)
df.resF2<-as.data.frame(DF.resF2)
df.resFE<-as.data.frame(DF.resFE)
colnames(df.resF2)<-paste("fXO2", colnames(df.resF2), sep = "_")
colnames(df.resFE)<-paste("FE", colnames(df.resFE), sep = "_")
DF.resM<-results(dds, contrast=c("condition","X3_maleXO","X3_maleXY"), alpha = ap)
DF.resO<-results(dds, contrast=c("condition","X1_femaleXO","X3_maleXO"), alpha = ap)
DF.resFM<-results(dds, contrast=c("condition","X1_femaleXO","X3_maleXY"), alpha = ap)
DF.resMF<-results(dds, contrast=c("condition","X3_maleXO","X1_femaleXX"), alpha = ap)
DF.resE<-results(dds, contrast=c("condition","X1_femaleXX","X3_maleXY"), alpha = ap)
df.resF<-as.data.frame(DF.resF)
df.resM<-as.data.frame(DF.resM)
df.resO<-as.data.frame(DF.resO)
df.resFM<-as.data.frame(DF.resFM)
df.resMF<-as.data.frame(DF.resMF)
df.resE<-as.data.frame(DF.resE)
colnames(df.resF)<-paste("fXO", colnames(df.resF), sep = "_")
colnames(df.resM)<-paste("mXO", colnames(df.resM), sep = "_")
colnames(df.resO)<-paste("fmO", colnames(df.resO), sep = "_")
colnames(df.resFM)<-paste("FM", colnames(df.resFM), sep = "_")
colnames(df.resMF)<-paste("MF", colnames(df.resMF), sep = "_")
colnames(df.resE)<-paste("fmE", colnames(df.resE), sep = "_")
stopifnot( all( rownames(df.resF) == rownames(assay(vsd3)) &
rownames(df.resF) == rownames(df.resF2) &
rownames(df.resF) == rownames(df.resFE) &
rownames(df.resF) == rownames(df.resM) &
rownames(df.resF) == rownames(df.resO) &
rownames(df.resF) == rownames(df.resFM) &
rownames(df.resF) == rownames(df.resMF) &
rownames(df.resF) == rownames(df.resE)))
df.all<-cbind(assay(vsd3), df.resF, df.resF2, df.resFE, df.resM, df.resO, df.resFM, df.resMF, df.resE)
df.all2<-merge(metadata(dds), df.all, by = "row.names")
rownames(df.all2)<-df.all2$Row.names
df.all2$Row.names<-t(matrix(unlist(strsplit(rownames(df.all2), "[.]")),nrow=2))[,1]
colnames(df.all2)[1]<-"ENSEMBL"
df<-df.all2[order(match(rownames(df.all2), rownames(assay(vsd3)))),]
vtf.F2 <- (df$fXO2_padj <= ap & abs(df$fXO2_log2FoldChange) >= l2fc)
vtf.FE <- (df$FE_padj <= ap & abs(df$FE_log2FoldChange) >= l2fc)
names(vtf.F2)<-rownames(df)
names(vtf.FE)<-rownames(df)
vtf.F <- (df$fXO_padj <= ap & abs(df$fXO_log2FoldChange) >= l2fc)
vtf.M <- (df$mXO_padj <= ap & abs(df$mXO_log2FoldChange) >= l2fc)
vtf.O <- (df$fmO_padj <= ap & abs(df$fmO_log2FoldChange) >= l2fc)
vtf.FM <- (df$FM_padj <= ap & abs(df$FM_log2FoldChange) >= l2fc)
vtf.MF <- (df$MF_padj <= ap & abs(df$MF_log2FoldChange) >= l2fc)
vtf.E <- (df$fmE_padj <= ap & abs(df$fmE_log2FoldChange) >= l2fc)
vtf.X <- (df$Chr == "chrX" & as.numeric(df$Start) > 2781479 & as.numeric(df$End)<155701383)
vtf.Y <- (df$Chr == "chrY" & as.numeric(df$Start) > 2781479 & as.numeric(df$End)<56887903)
vtf.PAR <- ((df$Chr == "chrX" | df$Chr == "chrY") & !(vtf.X) & !(vtf.Y))
names(vtf.F)<-rownames(df)
names(vtf.M)<-rownames(df)
names(vtf.O)<-rownames(df)
names(vtf.FM)<-rownames(df)
names(vtf.MF)<-rownames(df)
names(vtf.E)<-rownames(df)
names(vtf.X)<-rownames(df)
names(vtf.Y)<-rownames(df)
names(vtf.PAR)<-rownames(df)
require(tidyverse)
require(stringr)
df.tableS2<-df %>% select(-(starts_with(c("d", "Start", "End", "Strand", "Length", "FE", "MF", "fmE")) | ends_with(c("basemean", "lfcSE", "pvalue"))))
df.tableS2$ENSEMBL_v36 <- row.names(df)
df.tableS2 <- df.tableS2 %>%
rename_with(stringr::str_replace,
pattern = "fXO2", replacement = "fXO-XX6",
matches("fXO2")) %>%
rename_with(stringr::str_replace,
pattern = "fmO", replacement = "XOXO",
matches("fmO")) %>%
rename_with(stringr::str_replace,
pattern = "FM", replacement = "fXO-XY",
matches("FM")) %>%
relocate(ENSEMBL_v36, .before = ENSEMBL) %>%
relocate(starts_with("fXO-XY"), .after = starts_with("mXO")) %>%
relocate(starts_with("fXO-XX6"), .after =starts_with("XOXO"))
write.csv(df.tableS2, file = "Table_S2.csv", row.names = F,quote = F)