plot.loess <- function(x,...){ind <- order(x$x);plot(x$x[ind],x$fitted[ind],...)}
lines.loess <- function(x,...){ind <- order(x$x);lines(x$x[ind],x$fitted[ind],...)}

bmd <- read.delim("http://web.as.uky.edu/statistics/users/pbreheny/621/data/bmd.txt")
m <- subset(bmd, gender=="male")
f <- subset(bmd, gender=="female")

## Slide 4
span <- seq(.1, 1, len=31)
degree <- 0:2
n <- nrow(m)
GCV <- p <- matrix(NA,nrow=length(span),ncol=3)
for (i in 1:length(span)) {
  for (j in 1:length(degree)) {
    fit <- loess(spnbmd~age,m,span=span[i],degree=degree[j])
    GCV[i,j] <- mean(((fit$y-fit$fitted)/(1-fit$trace.hat/n))^2)
    p[i,j] <- fit$trace.hat
  }
}
plot(span,1000*GCV[,2], type="l", xlab="Span", ylab="GCV", las=1, lwd=3)
ind <- c(1,6,11,16,21,26,31)
axis(3,at=span[ind],labels=round(p[ind,2],1))
mtext("Effective degrees of freedom", line=2.5)

## Slide 5
col <- c("#FF4E37FF","#00B500FF","#008DFFFF")
matplot(span, p, type="l", lty=1, xlab="Span", ylab="Effective degrees of freedom", col=col, lwd=3, ylim=c(0,20))
legend("topright",legend=c("degree=0","degree=1","degree=2"), lty=1, col=col, lwd=3)
optimal <- apply(GCV,2,which.min)
points(span[optimal], c(p[optimal[1],1],p[optimal[2],2],p[optimal[3],3]), pch=19,col=col)

## Slide 6
fit <- loess(spnbmd~age,m,span=span[optimal[1]],degree=0)
plot(m$age, m$spnbmd, pch=19,cex=0.5, col="gray", xlab="Age", ylab="Relative change in spinal BMD", las=1)
for (i in 1:3) {
  fit <- loess(spnbmd~age, m, span=span[optimal[i]], degree=i-1)
  lines(fit, col=col[i], lwd=3)
}
abline(h=0,lty=2)
legend("topright",legend=c("degree=0","degree=1","degree=2"),lty=1,col=col, lwd=3)

## Slide 7
## Optimal h
span <- seq(.1,1,len=31)
nm <- nrow(m)
nf <- nrow(f)
GCV <- p <- matrix(NA,nrow=length(span),ncol=2)
for (i in 1:length(span)) {
  fit <- loess(spnbmd~age,f,span=span[i])
  GCV[i,1] <- sum(((fit$y-fit$fitted)/(1-fit$trace.hat/nf))^2)
  p[i,1] <- fit$trace.hat
  fit <- loess(spnbmd~age,m,span=span[i])
  GCV[i,2] <- sum(((fit$y-fit$fitted)/(1-fit$trace.hat/nm))^2)
  p[i,2] <- fit$trace.hat
}
optimal <- apply(GCV,2,which.min)
p.opt <- c(p[optimal[1],1], p[optimal[2],2])
## Plot
col <- c("#FF4E37FF", "#008DFFFF")
tcol <- c("#FF4E374C", "#008DFF4C")
plot(f$age, f$spnbmd, pch=16, col=tcol[1], ylim=range(bmd$spnbmd), xlab="Age", ylab="Relative change in spinal BMD", las=1)
points(m$age, m$spnbmd, pch=16, col=tcol[2])
fit <- loess(spnbmd~age, f, span=span[optimal[1]], degree=2)
lines(fit, col=col[1],lwd=3)
fit <- loess(spnbmd~age, m, span=span[optimal[2]],degree=2)
lines(fit, col=col[2],lwd=3)
leg.text <- paste(c("Females","Males"), " (df = ", round(p.opt,1), ")", sep="")
legend("topright",legend=leg.text, lty=1, lwd=3, col=col)
abline(h=0,lty=2)

## Variance estimation (Slide 12)
require(locfit)
fit.f <- locfit(spnbmd~lp(age,nn=span[optimal[1]]),f)
fit.m <- locfit(spnbmd~lp(age,nn=span[optimal[2]]),m)
p <- fit.m$dp["df1"]
pp <- fit.m$dp["df2"]
RSS <- -2*fit.m$dp["lk"]
RSS/(nm-2*p+pp)
sqrt(rv(fit.m))
sqrt(rv(fit.f))

## Slide 14
xx <- seq(min(bmd$age), max(bmd$age), len=99)
v.f <- -2*predict(fit.f, newdata=xx, what="lik")/predict(fit.f, newdata=xx, what="rdf")
v.m <- -2*predict(fit.m, newdata=xx, what="lik")/predict(fit.m, newdata=xx, what="rdf")
plot(xx, sqrt(v.m), type="l", lwd=3, ylim=c(0,.06), las=1, main="Males", xlab="Age", ylab=expression(hat(sigma)^2*"(Age)"))
abline(h=sqrt(rv(fit.m)), lty=2)
plot(xx, sqrt(v.f), type="l", lwd=3, ylim=c(0,.06), las=1, main="Females", xlab="Age", ylab=expression(hat(sigma)^2*"(Age)"))
abline(h=sqrt(rv(fit.f)), lty=2)

## Pointwise intervals (slide 18)
tcol <- c("#FF4E3740", "#008DFF40")
pf <- predict(fit.f, xx, se.fit=TRUE)
pm <- predict(fit.m, xx, se.fit=TRUE)
z <- qnorm(.025)
plot(NULL, xlim=range(bmd$age), ylim=c(-.05,.15), type="n",xlab="Age",ylab="Relative change in spinal BMD", las=1)
polygon(c(xx,rev(xx)),c(pf$fit-z*pf$se.fit,rev(pf$fit+z*pf$se.fit)),col=tcol[1],border=FALSE)
lines(xx, pf$fit,col=col[1], lwd=3)
polygon(c(xx,rev(xx)),c(pm$fit-z*pm$se.fit,rev(pm$fit+z*pm$se.fit)),col=tcol[2],border=FALSE)
lines(xx, pm$fit,col=col[2], lwd=3)
legend("topright", legend=c("Females","Males"), lty=1, lwd=3, col=col)

## Global bands (slide 19)
sf <- scb(spnbmd~lp(age,nn=span[optimal[1]],deg=2), data=f, ev=lfgrid(70))
sm <- scb(spnbmd~lp(age,nn=span[optimal[2]],deg=2), data=m, ev=lfgrid(70))
plot(NULL, xlim=range(bmd$age), ylim=c(-.05,.15), type="n",xlab="Age",ylab="Relative change in spinal BMD", las=1)
with(sf, polygon(c(xev,rev(xev)), c(lower,rev(upper)), col=tcol[1], border=FALSE))
lines(xx, pf$fit,col=col[1], lwd=3)
with(sm, polygon(c(xev,rev(xev)), c(lower,rev(upper)), col=tcol[2], border=FALSE))
lines(xx, pm$fit,col=col[2], lwd=3)
legend("topright", legend=c("Females","Males"), lty=1, lwd=3, col=col)

## Hypothesis tests (slide 23)
require(gam)
fit0 <- gam(spnbmd~1, data=m)
fit1 <- gam(spnbmd~age, data=m)
fit2 <- gam(spnbmd~lo(age), data=m)
anova(fit0, fit1, fit2, test="F")