Skip to content
Permalink
28eae5b6dc
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
128 lines (118 sloc) 4.72 KB
library(QREM)
library(SEMMS)
res <- list()
qn <- 0.25 # change to 0.5 and 0.75
maxReps <- 20
fn <- "riboflavin1.csv"
responseCol <- 1
predictors <- 2:4089
dataYXZ <- readInputFile(fn, ycol=responseCol, Zcols=predictors)
nn <- 10
n <- dataYXZ$N
y0 <- dataYXZ$Y
K <- dataYXZ$K
# initialize:
zval <- rep(0, K)
for (i in seq(1,K,by=5)) {
if (i %% 101 == 0) { cat(i,"...\n") }
preds <- paste0(colnames(dataYXZ$Z)[i:min(i+4,K)], collapse = " + ")
linmod <- as.formula(paste("Y ~", preds))
dframetmp <- data.frame(cbind(dataYXZ$Y, dataYXZ$Z[,i:min(i+4,K)]))
colnames(dframetmp) <- c("Y",colnames(dataYXZ$Z)[i:min(i+4,K)])
qremFit <- QREM(lm,linmod, dframetmp, qn, maxInvLambda = 1000)
zval[i:min(i+4,K)] <- qremFit$coef$beta[-1]/sqrt(diag(bcov(qremFit,linmod,dframetmp,qn)))[-1]
}
nnset <- order(abs(zval),decreasing = TRUE)[1:nn]
#cat("initial set", nnset,"\n")
for (rep in 1:maxReps) {
# create a subset of the selected columns and run QREM
preds <- paste(colnames(dataYXZ$Z)[nnset], collapse = "+")
linmod <- as.formula(paste("Y ~", preds))
dframetmp <- data.frame(cbind(dataYXZ$Y, dataYXZ$Z[,nnset]))
colnames(dframetmp) <- c("Y",colnames(dataYXZ$Z)[nnset])
qremFit <- QREM(lm,linmod, dframetmp, qn, maxInvLambda = 1000)
# apply the weights found by QREM and rerun SEMMS
dataYXZtmp <- dataYXZ
#dataYXZtmp$Z <- diag(sqrt(qremFit$weights))%*%dataYXZ$Z
dataYXZtmp$Y <- (dataYXZ$Y-(1-2*qn)/qremFit$weights)#*sqrt(qremFit$weights)
#plot(dataYXZ$Y, dataYXZtmp$Y); abline(0,1,col=2)
fittedVSnew <- fitSEMMS(dataYXZtmp,distribution = 'N', mincor=0.8,rnd=F,
nnset=nnset, minchange = 1, maxst = 20)
foundSEMMSnew <- sort(union(which(fittedVSnew$gam.out$lockedOut != 0),
fittedVSnew$gam.out$nn))
cat(rep,"\n",fittedVSnew$gam.out$nn,"\n",nnset,"\n", qremFit$empq,"\n\n")
if (length(fittedVSnew$gam.out$nn) == length(nnset)) {
if (all(fittedVSnew$gam.out$nn == nnset)) {
break
}
}
nnset <- fittedVSnew$gam.out$nn
}
# after the loop
fittedGLM <- runLinearModel(dataYXZtmp,nnset, "N")
print(summary(fittedGLM$mod))
plotMDS(dataYXZ, fittedVSnew, fittedGLM, ttl="... Data")
plotFit(fittedGLM)
plot(y0, col=(2+(qremFit$ui>0)), cex=0.7, pch=19)
if(length(nnset) > 0) {
for (i in 1:length(nnset)) {
plot(dataYXZ$Z[,nnset[i]],dataYXZ$Y, col=1+(qremFit$ui>0))
}
}
res[[1]] <- list(y0, nnset, qremFit, fittedGLM)
# loop over all the columns, and get the estimates for q=0.25, 0.5, 0.75
#
# This part can take a while... 4088 quantile regression models with
# variable selection
for (k in 1:K) {
cat(k,"...\n")
dataYXZ <- readInputFile(fn, ycol=k+1, Zcols=setdiff(predictors,k+1))
n <- dataYXZ$N
y0 <- dataYXZ$Y
K <- dataYXZ$K
zval <- rep(0, K)
for (i in seq(1,K,by=5)) {
if (i %% 101 == 0) { cat(i,"...\n") }
preds <- paste0(colnames(dataYXZ$Z)[i:min(i+4,K)], collapse = " + ")
linmod <- as.formula(paste("Y ~", preds))
dframetmp <- data.frame(cbind(dataYXZ$Y, dataYXZ$Z[,i:min(i+4,K)]))
colnames(dframetmp) <- c("Y",colnames(dataYXZ$Z)[i:min(i+4,K)])
qremFit <- QREM(lm,linmod, dframetmp, qn, maxInvLambda = 1000)
zval[i:min(i+4,K)] <- qremFit$coef$beta[-1]/sqrt(diag(bcov(qremFit,linmod,dframetmp,qn)))[-1]
}
nnset = order(abs(zval),decreasing = TRUE)[1:nn]
#cat("initial set", nnset,"\n")
for (rep in 1:maxReps) {
# create a subset of the selected columns and run QREM
preds <- paste(colnames(dataYXZ$Z)[nnset], collapse = "+")
linmod <- as.formula(paste("Y ~", preds))
dframetmp <- data.frame(cbind(dataYXZ$Y, dataYXZ$Z[,nnset]))
colnames(dframetmp) <- c("Y",colnames(dataYXZ$Z)[nnset])
qremFit <- QREM(lm,linmod, dframetmp, qn, maxInvLambda = 1000)
# apply the weights found by QREM and rerun SEMMS
dataYXZtmp <- dataYXZ
#dataYXZtmp$Z <- diag(sqrt(qremFit$weights))%*%dataYXZ$Z
dataYXZtmp$Y <- (dataYXZ$Y-(1-2*qn)/qremFit$weights)#*sqrt(qremFit$weights)
fittedVSnew <- fitSEMMS(dataYXZtmp,distribution = 'N', mincor=0.8,rnd=F,
nnset=nnset, minchange = 1, maxst = 20)
foundSEMMSnew <- sort(union(which(fittedVSnew$gam.out$lockedOut != 0),
fittedVSnew$gam.out$nn))
cat(rep,"\n",fittedVSnew$gam.out$nn,"\n",nnset,"\n", qremFit$empq,"\n\n")
if (length(fittedVSnew$gam.out$nn) == length(nnset)) {
if (all(fittedVSnew$gam.out$nn == nnset)) {
break
}
}
nnset <- fittedVSnew$gam.out$nn
if (length(nnset) == 0)
break
}
if (length(nnset) == 0) {
res[[k+1]] <- list()
next
}
fittedGLM <- runLinearModel(dataYXZtmp,nnset, "N")
res[[k+1]] <- list(y0, nnset, qremFit, fittedGLM)
}
save(res, file="B2q25.RData") # change to B2q50 and B2q75 when using different
# quartiles