# Same as running Fig2.4()
adj <- 0.07
par(mar=c(0.5,0,0,1.5))
plot(0, 0, type="l", xaxt="n", yaxt="n", xlim=c(-0.1,1), ylim=c(-0.1,0.9), bty="n", xlab="", ylab="")
arrows(0,0,1,0)
text(1+adj, -adj, expression(bold(x)[1]), xpd=TRUE)
arrows(0,0,1/2,sqrt(3)/2)
text(1/2+adj, sqrt(3)/2-adj, expression(bold(x)[2]), xpd=TRUE)
points(0, 0, pch=19)
text(-adj, -adj, expression(bold(0)))
points(0.8,0,pch=19)
text(0.8-adj, -adj, expression(bar(bold(y))[1]))
points(0.4+sqrt(3)/3, 1/3, pch=19)
text(0.4+sqrt(3)/3+adj, 1/3-adj, expression(bar(bold(y))[2]), xpd=TRUE)
col <- "#008DFFFF"
lines(c(0, 0.4), c(0,0), col=col, lwd=5)
lines(c(0.4, 0.4+1/2), c(0, sqrt(3)/2), col="gray", lty=2, lwd=5)
lines(c(0.4, 0.4+sqrt(3)/3), c(0, 1/3), col=col, lwd=5)

# library(glmnet)
# fit <- glmnet(X, y)
# plot(fit)

# Pollution data
library(hdrm)
attach_data('pollution')
X <- std(X)

# Fit (default plot)
fit <- glmnet(X, y)
plot(fit, label=TRUE)
cvfit <- cv.glmnet(X, y, nfolds=length(y))

# Figures shown on slides
Fig2.5(cv=FALSE)
Fig2.5()
par(mar=c(5, 5, 5, 5))
l <- fit$lambda
b <- coef(fit)[-1,]
nv <- sapply(predict(fit, type="nonzero"), length)
matplot(l, t(b), col=hdrm:::pal(ncol(X)), lwd=3, bty="n", xlim=rev(range(l)), type="l", lty=1, xlab=expression(lambda), ylab="", las=1)
mtext(expression(hat(beta)), 2, 3, las=1)
ind <- abs(b[,ncol(b)]) > 15
text(x=-5, y=b[which(ind),ncol(b)], colnames(X)[ind], xpd=TRUE)
text(x=-5, y=10, "SO2", xpd=TRUE)
ind <- sapply(seq(40, 0, -10), function(x) which.min(abs(l-x)))
axis(3, at=l[ind], labels=round(nv[ind], 1))
mtext("Number of nonzero coefficients", 3, 3)
abline(v=log(cvfit$lambda.min), col="gray")

# Which coefficients are nonzero
colnames(X)[unlist(predict(fit, s=1.84, type="nonzero"))]

# Lasso / ridge comparison
fit.ridge <- ridge(X,y)
b.lasso <- as.numeric(coef(fit, s=1.84))[-1]
b.ridge <- coef(fit.ridge, which=which.min(fit.ridge$loocv))[-1]
boxplot(b.ridge, b.lasso, col='gray', las=1, names=c('Ridge', 'Lasso'),
        frame.plot=FALSE)
mtext(expression(hat(beta)), 2, 3, las=1)

