library(hdrm)

############################
#### Case study 1: BRCA ####
############################

attachData('bcTCGA')
set.seed(1)   # Not necessary, but your CV results might look different than mine with a different seed

# Model fitting and CV
cvfit <- cv.glmnet(X, y)
fit <- cvfit$glmnet.fit
xlim <- log(c(fit$lambda[1], cvfit$lambda.min))
plot(fit, xlim=xlim, xvar="lambda")
plot(cvfit)

## lam values
max(fit$lambda)
cvfit$lambda.min
cvfit$lambda.1se

## R^2
max(1-cvfit$cvm/var(y))

## coef
b <- coef(cvfit)
b[which(b > 0.15),,drop=FALSE]

## coef (cont'd)
b <- coef(fit, s=0.2)
sum(b != 0)
b[which(b!=0),,drop=FALSE]

## predict
predict(cvfit, X[85,,drop=FALSE])
sapply(predict(fit, type="nonzero"), length)

# Slides 5-6
Fig2.10(cvfit)

# AIC/BIC (Slide 8)
fit1 <- ncvreg(X, y, penalty='lasso', lambda=cvfit$lambda)
fit2 <- ncvreg(X, y, penalty='lasso', lambda.min=0.001)
Fig2.11(fit1)
Fig2.11(fit2)

# AICc
Fig2.11_aicc <- function(fit, ...) {
  lam <- log(fit$lambda)
  ll <- logLik(fit)
  df <- pmin(as.numeric(attr(ll, "df")), fit$n-1)
  print(range(df))
  IC <- cbind(AIC(fit), BIC(fit), AIC(fit) + 2 * df * (df + 1)/(fit$n - df - 1))
  matplot(lam, IC, xlim = rev(range(lam)), col = hdrm:::pal(3), type = "l", 
          lwd = 3, lty = 1, bty = "n", xlab = expression(lambda), 
          xaxt = "n", las = 1, ylab = "AIC/BIC", ...)
  at <- seq(max(lam), min(lam), length = 5)
  axis(1, at = at, labels = round(exp(at), 2))
  hdrm:::toplegend(legend = c("AIC", "BIC", "AICc"), col = hdrm:::pal(3), lwd = 3)
}
Fig2.11_aicc(fit1)
Fig2.11_aicc(fit2, ylim=c(-100, 2000))

################################
#### Case study 2: Carbotax ####
################################

attachData(Koussounadis2014)
set.seed(1)

# Construct combined X (low- and high-dimensional features)
library(splines)
sDay <- ns(sData$Day, df=2)
X0 <- model.matrix(~ Treatment*sDay, sData)[,-1]
multiplier <- rep(0:1, c(ncol(X0), ncol(X)))
XX <- cbind(X0, X)

cvfit <- cv.ncvreg(XX, y, penalty.factor=multiplier, penalty='lasso')
plot(cvfit, type='rsq')
fit <- cvfit$fit
plot(fit, log=TRUE)

# Slides 24-25
Fig2.12(cvfit)

# double exponential prior
par(mar=c(5,3,1,6))
x <- seq(-3,3,len=101)
ylim <- c(0,dexp(0)/2)
plot(x, dnorm(x), type="l", col="gray80", lwd=3, ylim=ylim, yaxt="n", bty="n", xlab=expression(beta),ylab="")
mtext(expression(p(beta)),2,1)
lines(x,dexp(abs(x))/2,col="slateblue",lwd=3)
hdrm:::rightlegend(legend=c("Ridge","Lasso"), lwd=3, col=c("gray80","slateblue"))
