source('http://myweb.uiowa.edu/pbreheny/7600/s16/notes/fun.R')
require(covTest)
require(selectiveInference)

# Generate example data ---------------------------------------------------

set.seed(1)
n <- 100
p1 <- 6
N <- 100
p <- 60
K <- 3
bb <- numeric(6*K)
bb[(0:5)*K+1] <- c(1,-1,0.5,0.5,-0.5,-0.5)
require(Matrix)
A <- matrix(0.5, 3, 3) + 0.5*diag(3)
B <- diag(p-K*p1)
Sigma <- bdiag(A, A, A, A, A, A, B)
R <- chol(Sigma)

X <- as.matrix(matrix(rnorm(n * p), n, p) %*% R)
colnames(X) <- paste0("V", 1:p)
y <- X[,1:(p1*K)] %*% bb + rnorm(n)

varType <- vector("character", p)
varType[(0:5)*K+1] <- "A"
varType[c((0:5)*K+2, (0:5)*K+3)] <- "B"
varType[(p1*K+1):p] <- "C"
varLab <- vector("character", p)
varLab[varType=="A"] <- paste0("A", 1:sum(varType=="A"))
varLab[varType=="B"] <- paste0("B", 1:sum(varType=="B"))
varLab[varType=="C"] <- paste0("C", 1:sum(varType=="C"))

# CovTest: Sim ------------------------------------------------------------

# Forward selection (slide 4)
N <- 200
R1 <- R2 <- numeric(N)
for (i in 1:N) {
  yy <- scale(rnorm(100), scale=FALSE)
  XX <- std(matrix(rnorm(100*100), 100, 100))
  jj <- which.max(abs(crossprod(XX, yy)))
  fit1 <- lm(yy~XX[,1])
  fit2 <- lm(yy~XX[,jj])
  R1[i] <- crossprod(yy) - crossprod(fit1$residuals)
  R2[i] <- crossprod(yy) - crossprod(fit2$residuals)
}

# Prespecified test
qqplot(qchisq(ppoints(100), 1), R1, las=1, bty="n", pch=16, ylim=c(0, 8),
       xlab=expression(chi^2), ylab=expression(T[R]))
lines(0:8, 0:8, lty=2, lwd=2, col="gray")
mean(R1 > qchisq(.95, 1))

# "Cherry-picked" test
qqplot(qchisq(ppoints(100), 1), R2, las=1, bty="n", pch=16, ylim=c(0, max(R2)),
       xlab=expression(chi^2), ylab=expression(T[R]))
lines(0:8, 0:8, lty=2, lwd=2, col="gray")
mean(R2 > qchisq(.95, 1))

# CovTest (Slide 7)
N <- 200
T <- numeric(N)
for (i in 1:N) {
  yy <- scale(rnorm(100), scale=FALSE)
  XX <- std(matrix(rnorm(100*100), 100, 100))
  fit <- lars(XX, yy, max.steps=1)
  Test <- covTest(fit, XX, yy, sigma=1)
  T[i] <- Test$results[,"Drop_in_covariance"]
}
qqplot(qexp(ppoints(100), 1), T, las=1, bty="n", pch=16, ylim=c(0, max(T)),
       xlab="Exp(1)", ylab=expression(T[C]))
lines(0:8, 0:8, lty=2, lwd=2, col="gray")
mean(T > qexp(.95, 1))


# CovTest: Example data ---------------------------------------------------

# Slide 8
fit <- lars(X, y)
Test <- covTest(fit, X, y)
Tab <- Test$results[1:10,]
Tab[,1] <- varLab[Tab[,1]]
Tab

# With lambda difference
cbind(Test$results[1:10,], lamDiff=-diff(fit$lambda)[1:10])


# Selective inference -----------------------------------------------------

X.std <- std(X)
fit = glmnet(X.std, y, standardize=FALSE)
lam <- 25
b <- coef(fit, s=lam/n)[-1]
res <- fixedLassoInf(X.std, y, b, lam, sigma=1)
bb <- res$sign * res$vmat %*% y
B <- cbind(bb, res$ci, res$pv)

CIplot(B, sort=FALSE, labels=varLab[res$vars], xlab=expression(beta))
lines(c(1,1), c(7.5, 8.5), col="gray", lty=2, lwd=2, xpd=1)
lines(c(-1,-1), c(6.5, 7.5), col="gray", lty=2, lwd=2)
lines(c(0.5,0.5), c(3.5, 5.5), col="gray", lty=2, lwd=2)
lines(c(-0.5,-0.5), c(1.5, 3.5), col="gray", lty=2, lwd=2)

CIplot(lm(y~X.std[,b!=0]), sort=FALSE, labels=varLab[res$vars], xlab=expression(beta))
lines(c(1,1), c(7.5, 8.5), col="gray", lty=2, lwd=2, xpd=1)
lines(c(-1,-1), c(6.5, 7.5), col="gray", lty=2, lwd=2)
lines(c(0.5,0.5), c(3.5, 5.5), col="gray", lty=2, lwd=2)
lines(c(-0.5,-0.5), c(1.5, 3.5), col="gray", lty=2, lwd=2)


# TCGA data ---------------------------------------------------------------

load(url('http://myweb.uiowa.edu/pbreheny/data/bcTCGA.RData'))
p <- ncol(X)
X.TCGA <- X
y.TCGA <- y
n.TCGA <- length(y.TCGA)

X.std <- std(X.TCGA)
fit = glmnet(X.std, y.TCGA, standardize=FALSE)
lam <- 115
b <- coef(fit, s=lam/n.TCGA)[-1]
names(b) <- colnames(X.TCGA)
sum(b!=0)
res <- fixedLassoInf(X.std, y, b, lam, sigma=var(y)/2)
bb <- res$sign * res$vmat %*% y
B <- cbind(bb, res$ci, res$pv)
rownames(B) <- colnames(X)[res$vars]
CIplot(B, sort=FALSE, mar=c(4, 5, 1, 5), xlab=expression(beta))

fit <- lars(X.TCGA, y.TCGA, max.steps=20, use.Gram=FALSE)
Test <- covTest(fit, X, y, sigma=var(y)/2)
Tab <- Test$results[1:10,]
Tab[,1] <- colnames(X.TCGA)[Tab[,1]]
Tab[1:10,]
