library(hdrm)

############################
#### Case study 1: BRCA ####
############################

attachData('bcTCGA')
set.seed(1)   # Not necessary, but your CV results might look different than mine with a different seed

# Model fitting and CV
cvfit <- cv.glmnet(X, y)
fit <- cvfit$glmnet.fit
xlim <- log(c(fit$lambda[1], cvfit$lambda.min))
plot(fit, xlim=xlim, xvar="lambda")
plot(cvfit)

## lam values
max(fit$lambda)
cvfit$lambda.min
cvfit$lambda.1se

## R^2
max(1-cvfit$cvm/var(y))

## coef
b <- coef(cvfit)
b[which(b > 0.15),,drop=FALSE]

## coef (cont'd)
b <- coef(fit, s=0.2)
sum(b != 0)
b[which(b!=0),,drop=FALSE]

## predict
predict(cvfit, X[85,,drop=FALSE])
sapply(predict(fit, type="nonzero"), length)

# Slides 5-6
Fig2.10(cvfit)

# AIC/BIC (Slide 8)
fit1 <- ncvreg(X, y, penalty='lasso', lambda=cvfit$lambda)
fit2 <- ncvreg(X, y, penalty='lasso', lambda.min=0.001)
Fig2.11(fit1)
Fig2.11(fit2)

################################
#### Case study 2: Carbotax ####
################################

attachData(Koussounadis2014)
set.seed(1)

# Construct combined X (low- and high-dimensional features)
library(splines)
sDay <- ns(sData$Day, df=2)
X0 <- model.matrix(~ Treatment*sDay, sData)[,-1]
multiplier <- rep(0:1, c(ncol(X0), ncol(X)))
XX <- cbind(X0, X)

cvfit <- cv.ncvreg(XX, y, penalty.factor=multiplier, penalty='lasso')
plot(cvfit, type='rsq')
fit <- cvfit$fit
plot(fit, log=TRUE)

# Slides 24-25
Fig2.12(cvfit)

# double exponential prior
par(mar=c(5,3,1,6))
x <- seq(-3,3,len=101)
ylim <- c(0,dexp(0)/2)
plot(x, dnorm(x), type="l", col="gray80", lwd=3, ylim=ylim, yaxt="n", bty="n", xlab=expression(beta),ylab="")
mtext(expression(p(beta)),2,1)
lines(x,dexp(abs(x))/2,col="slateblue",lwd=3)
hdrm:::rightlegend(legend=c("Ridge","Lasso"), lwd=3, col=c("gray80","slateblue"))
