load(url('http://myweb.uiowa.edu/pbreheny/data/pollution.RData'))
source('http://myweb.uiowa.edu/pbreheny/7600/s16/notes/fun.R')
X <- std(X)

##############################################################################
## Note: The MASS package has a function, lm.ridge(), for ridge regression. ##
## It's a fine function, but (a) it's syntax isn't really in line with how  ##
## other penalized regression packages like glmnet and ncvreg work,         ##
## (b) it doesn't exactly match our presentation in the book in terms of    ##
## how it defines lambda and GCV, and (c) it doesn't offer/return certain   ##
## calculations, like the degrees of freedom or standard errors.            ##
## So, I wrote my own function, ridge(), with various associated helper     ##
## functions.  You can use whichever you'd like, but I thought ridge()      ##
## would be more convenient.                                                ##
##############################################################################

# Use of ridge()
fit <- ridge(X, y)
plot(fit)
plot(fit, xaxis='df')
coef(fit, lambda=1)
summary(fit, lambda=1)
fit$lambda[which.min(fit$GCV)]
predict(fit, X, lambda=0.2)
summary(fit, lambda=0.01)
confint(fit, lambda=0.01)
confint(fit, lambda=0.1, parm=c('HC', 'NOX', 'SO2'))

# Trace
par(mar=c(5,5,5,7))
plot(fit)
ind <- abs(coef(fit)[-1,1]) > 15
text(x=log10(10^(-3.8)), y=coef(fit)[-1,1][ind], colnames(X)[ind], xpd=TRUE)
text(x=log10(10^(-3.8)), y=8, "SO2", xpd=TRUE)
ind <- seq(1, length(fit$lambda), length=5)
axis(3, at=log10(fit$lambda)[ind], labels=round(fit$df[ind], 1))
mtext("Degrees of freedom", 3, 3)
abline(v=log10(fit$lambda)[which.min(fit$GCV)], col="gray")

# Prediction accuracy
ll <- log10(fit$lambda)
col <- pal(2)
plot(ll, fit$GCV/1000, type="l", lwd=3, xlim=c(3,-3), las=1, xlab=expression(lambda), xaxt="n", ylab="Prediction error",
     col=col[2], bty="n", ylim=c(50,250))
logAxis(1, base=10)
lines(ll, fit$RSS/1000, col=col[1], lwd=3)
text(-3.1, 105, "GCV", xpd=TRUE)
text(-3.1, 65, "RSS", xpd=TRUE)
fit$lambda[which.min(fit$GCV)]

## Significance
summary(fit, which=which.min(fit$GCV))
t.ridge <- summary(fit, which=which.min(fit$GCV))[-1,]$t
ord <- order(t.ridge, decreasing=TRUE)
fit.ols <- lm(y-mean(y)~0+., data=as.data.frame(X))
t.ols <- summary(fit.ols)$coef[,3]
cbind(t.ridge, t.ols)[ord,]
