library(hdrm)

# Slide 7
Fig2.4()

# Pollution data: Demo (Slide 20)
attachData('pollution')
X <- std(X)
fit <- glmnet(X, y)
plot(fit, label=TRUE)
lam <- 1.84   # From cross-validation (next lecture)

# Coefficient path (Slide 21)
Fig2.5()

## Slide 21, original (i.e., not log) axis
par(mar=c(5, 5, 5, 5))
l <- fit$lambda
b <- coef(fit)[-1,]
nv <- sapply(predict(fit, type="nonzero"), length)
matplot(l, t(b), col=hdrm:::pal(ncol(X)), lwd=3, bty="n", xlim=rev(range(l)), type="l", lty=1, xlab=expression(lambda), ylab="", las=1)
mtext(expression(hat(beta)), 2, 3, las=1)
ind <- abs(b[,ncol(b)]) > 15
text(x=-5, y=b[which(ind),ncol(b)], colnames(X)[ind], xpd=TRUE)
text(x=-5, y=10, "SO2", xpd=TRUE)
ind <- sapply(seq(40, 0, -10), function(x) which.min(abs(l-x)))
axis(3, at=l[ind], labels=round(nv[ind], 1))
mtext("Number of nonzero coefficients", 3, 3)
abline(v=log(lam), col="gray")

# What are the nonzero coefficients?
predict(fit, s=lam, type="nonzero")
colnames(X)[unlist(predict(fit, s=lam, type="nonzero"))]

# Lasso / ridge comparison (Slide 23)
fit.ridge <- ridge(X,y)
b.lasso <- as.numeric(coef(fit, s=lam))[-1]
b.ridge <- coef(fit.ridge, which=which.min(fit.ridge$GCV))[-1]
boxplot(b.ridge, b.lasso, col='gray', las=1, names=c('Ridge', 'Lasso'),
        frame.plot=FALSE)
mtext(expression(hat(beta)), 2, 3, las=1)
