library(hdrm)

# Slide 4 (19 degrees of freedom)
set.seed(1)
n <- 25
p <- 100
xnam <- paste0("V", 1:p)
form <- as.formula(paste("y ~ ", paste(xnam, collapse= "+")))
N <- 100
Y <- Yhat.lasso <- Yhat.step <- Yhat.lm <- matrix(NA, N, n)
v <- numeric(N)
pb <- txtProgressBar(0, N, style=3)
for (i in 1:N) {
  X <- matrix(runif(n*p), n, p)
  Data <- as.data.frame(X)
  y <- rnorm(n)
  Y[i,] <- y
  fit <- lm(y~V1+V2+V3+V4+V5, Data)
  Yhat.lm[i,] <- fit$fitted
  fit0 <- lm(y~1, data=Data)
  fit <- step(fit0, scope=form, direction="forward", trace=0, k=log(n), steps=5)
  Yhat.step[i,] <- fit$fitted
  fit <- glmnet(X, y)
  Yhat.lasso[i,] <- predict(fit, X, s=0.2)
  v[i] <- nrow(predict(fit, s=0.2, type='nonzero'))
  setTxtProgressBar(pb, i)
}
sum(sapply(1:n, function(i) cov(Y[,i], Yhat.lm[,i])))      #  5 df
sum(sapply(1:n, function(i) cov(Y[,i], Yhat.step[,i])))    # 19 df
sum(sapply(1:n, function(i) cov(Y[,i], Yhat.lasso[,i])))   # 10 df
mean(v)                                                    # 10 df

# AIC, BIC for lasso (Slide 7)
Fig2.6()

# Demo code (Slide 15)
attachData('pollution')
X <- std(X)
fit <- glmnet(X, y)
plot(fit, label=TRUE)
cvfit <- cv.glmnet(X, y, nfolds=length(y))
plot(cvfit)

# CV Fig (Slide 16)
Fig2.7()

# Slide 17
cvfit$lambda.min                                         # Value of lambda minimizing CV error
nrow(predict(fit, type='nonzero', s=cvfit$lambda.min))   # Number of nonzero
ind <- which(cvfit$lambda==cvfit$lambda.min)
range(fit$lambda[which(cvfit$cvm < cvfit$cvup[ind])])    # Range of lambda values within 1 SE of min

# Variance estimators (Slide 22)
Fig2.8()

# R-squared (Slide 25)
Fig2.9()

# summary.cv.ncvreg (Slide 26)
cvfit <- cv.ncvreg(X, y, penalty="lasso", nfolds=length(y))
summary(cvfit)
