library(hdrm)
library(ggplot2)
theme_set(theme_minimal())

# Simulation takes a while!
out <- cache(Ex3.1(), 'e3-1.pqt')
Fig3.5(out)

Fig3.6()

# Breast cancer gene expression study -------------------------------------
attach_data(brca1)

fit <- ncvreg(X, y, penalty='lasso')
b <- coef(fit, which=which.min(BIC(fit)))[-1]

# w <- abs(b)^(-1)   # Calculate weights
# w <- pmin(w, 1e10) # cv.glmnet does not allow
#                    # infinite weights
# cvfit <- cv.glmnet(X, y, penalty.factor=w,
#                    lambda.min=1e-5)

# Adaptive lasso, biased CV estimate
par(mar=c(4, 4.5, 3, 0.25))
ylim <- c(0.15, 0.55)
Fig3.7(ylim=ylim)

# CV the entire adaptive lasso procedure
par(mar=c(4, 4.5, 3, 0.25))
if (file.exists('brca1_al.rds')) {
  cvfit <- readRDS('brca1_al.rds')
} else {
  cvfit <- cv.adaptive_lasso(X, y)
  saveRDS(cvfit, 'brca1_al.rds')
}
plot(cvfit, bty='n', ylab = expression(CV(lambda)), ylim=ylim)

# CV: Regular lasso
par(mar=c(4, 4.5, 3, 0.25))
cvfit_lasso <- cv.ncvreg(X, y, penalty='lasso')
plot(cvfit_lasso, bty='n', ylab = expression(CV(lambda)), ylim=ylim)

# cvfit3 <- cv.ncvreg(X, y) # gam=3 is default
# cvfit7 <- cv.ncvreg(X, y, gamma=7)

if (file.exists('brca1_mcp3.rds')) {
cvfit3 <- readRDS('brca1_mcp3.rds')
cvfit7 <- readRDS('brca1_mcp7.rds')
} else {
cvfit3 <- cv.ncvreg(X, y)
cvfit7 <- cv.ncvreg(X, y, gamma=7)
saveRDS(cvfit3, 'brca1_mcp3.rds')
saveRDS(cvfit7, 'brca1_mcp7.rds')
}

fit3 <- cvfit3$fit
fit7 <- cvfit7$fit
ylim=c(-0.21, 0.51)
par(mfrow=c(1,2), mar=c(4, 4, 2, 0.25))
xlim <- log(c(fit3$lambda[1], cvfit3$lambda.min))
plot(fit3, xlim=xlim, log.l=TRUE, bty='n', ylim=ylim)
mtext(expression(gamma==3))
xlim <- log(c(fit7$lambda[1], cvfit7$lambda.min))
plot(fit7, xlim=xlim, log.l=TRUE, bty='n', ylim=ylim)
mtext(expression(gamma==7))

ylim=c(0.18, 0.551)
par(mfrow=c(1,2), mar=c(4, 4, 5, 0.25))
plot(cvfit3, bty='n')
mtext(expression(gamma==3), line=3)
plot(cvfit7, bty='n')
mtext(expression(gamma==7), line=3)

summary(cvfit3)

summary(cvfit7)

cvfit_scad <- cv.ncvreg(X, y, gamma=8, penalty='SCAD')
summary(cvfit_scad)

ylim <- c(-0.2, 0.5)
par(mfrow = c(1, 2), mar=c(4, 4.5, 2.5, 0.5))
plot(cvfit_scad$fit, log.l=TRUE, bty='n', ylim=ylim)
plot(cvfit_scad, bty='n')

plot(cvfit_lasso$fit, xlim=log(rev(range(cvfit_scad$lambda))), log.l = TRUE, bty = "n", ylim=ylim)
plot(cvfit_lasso, bty = "n")

# WHO-ARI study -----------------------------------------------------------
attach_data('whoari')
XX <- std(X)

set.seed(1)

fold <- assign_fold(y, 10)
cvfit_mcp <- cv.ncvreg(XX, y, gam=6, fold=fold)
cvfit_las <- cv.ncvreg(XX, y, penalty="lasso",
                     fold=fold)

ylim <- c(0, 0.5)
par(mfrow = c(1, 2), mar=c(4, 4.5, 4, 0.5))
plot(cvfit_mcp, type='rsq', bty='n', ylim=ylim)
mtext('MCP', line=3)
plot(cvfit_las, type='rsq', bty='n', ylim=ylim)
mtext('Lasso', line=3)

fit_mcp <- cvfit_mcp$fit
fit_las <- cvfit_las$fit
ylim <- c(-0.5, 0.5)
par(mfrow = c(1, 2), mar=c(4, 4.5, 2, 0.5))
xlim <- log(c(fit_mcp$lambda[1], cvfit_mcp$lambda.min))
plot(fit_mcp, bty='n', xlim=xlim, log.l=TRUE, ylim=ylim)
mtext('MCP')
xlim <- log(c(fit_las$lambda[1], cvfit_las$lambda.min))
plot(fit_las, bty='n', xlim=xlim, log.l=TRUE, ylim=ylim)
mtext('Lasso')

summary(cvfit_mcp)

summary(cvfit_las)

