source('http://myweb.uiowa.edu/pbreheny/7600/s16/notes/fun.R')

############################
#### Case study 1: BRCA ####
############################

load(url('http://myweb.uiowa.edu/pbreheny/data/bc-tcga.RData'))
require(glmnet)
set.seed(1)
cvfit <- cv.glmnet(X, y)
fit <- cvfit$glmnet.fit
xlim <- log(c(fit$lambda[1], cvfit$lambda.min))
plot(fit, xlim=xlim, xvar="lambda")
plot(cvfit)

## lam values
max(fit$lambda)
cvfit$lambda.min
cvfit$lambda.1se

## R^2
max(1-cvfit$cvm/var(y))

## coef
b <- coef(cvfit)
b[which(b > 0.15),,drop=FALSE]

## coef (cont'd)
b <- coef(fit, s=0.2)
sum(b != 0)
b[which(b!=0),,drop=FALSE]

## predict
predict(cvfit, X[85,,drop=FALSE])
sapply(predict(fit, type="nonzero"), length)

xlim <- log(c(fit$lambda[1], cvfit$lambda.min))
nv <- sapply(predict(fit, type="nonzero"), length)[48]
plot(fit, xvar="lambda", las=1, xlab=expression(lambda), xaxt="n", bty="n",
     xlim=xlim, col=pal(nv), lwd=2)
at <- seq(xlim[1], xlim[2], length=5)
axis(1, at=at, labels=round(exp(at), 2))
abline(v=log(cvfit$lambda.min), col="gray", lty=2, lwd=2)
abline(v=log(cvfit$lambda.1se), col="gray", lty=2, lwd=2)
mtext("Variables selected", 3, 2.5)

ll <- log(fit$lambda)
plot(cvfit, las=1, xlab=expression(lambda), xaxt="n", bty="n", xlim=rev(range(ll)), ylab=expression(CV(lambda)))
at <- seq(max(ll), min(ll), length=5)
axis(1, at=at, labels=round(exp(at), 2))
mtext("Variables selected", 3, 2.5)

# AIC/BIC
require(ncvreg)
fit1 <- ncvreg(X, y, penalty='lasso', lambda=cvfit$lambda)
fit2 <- ncvreg(X, y, penalty='lasso', lambda.min=0.001)

ll <- log(fit1$lambda)
IC <- cbind(AIC(fit1), BIC(fit1))
matplot(ll, IC, xlim=rev(range(ll)), col=pal(2), type='l', lwd=3, lty=1, bty='n',
        xlab=expression(lambda), xaxt='n', las=1, ylab="AIC/BIC")
at <- seq(max(ll), min(ll), length=5)
axis(1, at=at, labels=round(exp(at), 3))
toplegend(legend=c("AIC", "BIC"), col=pal(2), lwd=3)

ll <- log(fit2$lambda)
IC <- cbind(AIC(fit2), BIC(fit2))
matplot(ll, IC, xlim=rev(range(ll)), col=pal(2), type='l', lwd=3, lty=1, bty='n',
        xlab=expression(lambda), xaxt='n', las=1, ylab="AIC/BIC")
at <- seq(max(ll), min(ll), length=5)
axis(1, at=at, labels=round(exp(at), 3))
toplegend(legend=c("AIC", "BIC"), col=pal(2), lwd=3)

################################
#### Case study 2: Carbotax ####
################################

require(glmnet)
require(ncvreg)
load(url('http://myweb.uiowa.edu/pbreheny/data/Koussounadis2014.RData'))
set.seed(1)

require(splines)
sDay <- ns(sData$Day, df=2)
X0 <- model.matrix(~ Treatment*sDay, sData)[,-1]
multiplier <- rep(0:1, c(ncol(X0), ncol(X)))
XX <- cbind(X0, X)

cvfit <- cv.glmnet(XX, y, penalty.factor=multiplier)
plot(cvfit)

cvfit <- cv.ncvreg(XX, y, penalty.factor=multiplier, penalty='lasso')
plot(cvfit, type='rsq')
fit <- cvfit$fit
plot(fit, log=TRUE)

# Figure
xlim <- log(c(fit$lambda[1], cvfit$lambda.min))
ylim <- c(-0.3, 0.4)
plot(fit, log=TRUE, xlab=expression(lambda), xaxt="n", bty="n", xlim=xlim, lwd=2, ylim=ylim)
at <- seq(xlim[1], xlim[2], length=5)
axis(1, at=at, labels=round(exp(at), 2))
abline(v=log(cvfit$lambda.min), col="gray", lty=2, lwd=2)
mtext("Variables selected", 3, 2.5)
nv <- predict(fit, lam=exp(at), type="nvars")
axis(3, at=at, labels=nv)

ll <- log(fit$lambda)
plot(cvfit, xlab=expression(lambda), xaxt="n", bty="n", type='rsq', selected=FALSE)
at <- seq(max(ll), min(ll), length=5)
axis(1, at=at, labels=round(exp(at), 2))
mtext("Variables selected", 3, 2.5)
nv <- predict(fit, lam=exp(at), type="nvars")
axis(3, at=at, labels=nv)

# double exponential prior
par(mar=c(5,3,1,6))
x <- seq(-3,3,len=101)
ylim <- c(0,dexp(0)/2)
plot(x, dnorm(x), type="l", col="gray80", lwd=3, ylim=ylim, yaxt="n", bty="n", xlab=expression(beta),ylab="")
mtext(expression(p(beta)),2,1)
lines(x,dexp(abs(x))/2,col="slateblue",lwd=3)
rightlegend(legend=c("Ridge","Lasso"), lwd=3, col=c("gray80","slateblue"))
