x <- faithful$waiting
a <- min(x)
b <- max(x)

## Interactive
require(iplots)
ihist(x, fillColor="gray", borderColor="white")

## Slide 6
ylim=c(0,.1)
hist(x, breaks=seq(a, b, len=4), col="gray", border="white", main="", xlab="Waiting time", freq=F, ylim=ylim)
hist(x, breaks=seq(a, b, len=14), col="gray", border="white", main="", xlab="Waiting time", freq=F, ylim=ylim)
hist(x, breaks=seq(a, b, len=75), col="gray", border=FALSE, main="", xlab="Waiting time", freq=F, ylim=ylim)

## Cross-validation picture (slide 20)
for (i in 1:5) {
  par(bty="n")
  plot(c(0,5),c(-2,3),type="n",xaxt="n",yaxt="n",xlab="",ylab="")
  for (j in 1:5) {
    if (i==j) polygon(j-1+c(0,1,1,0),c(0,0,1,1),col="white")
    else polygon(j-1+c(0,1,1,0),c(0,0,1,1),col="gray80")
    text(j-1+.5,.5,labels=j)
  }
}

## Slide 17, 23
n <- length(x)
xx <- x + runif(n, -0.5, 0.5)
brks <- 4*(1:25)
h <- J <- J1 <- J2 <- J.hat <- numeric(length(b))
for (i in 1:length(brks)) {
  breaks <- seq(min(xx), max(xx), len=brks[i])
  fit <- hist(xx, breaks=breaks, plot=F)
  h[i] <- diff(range(xx))/(brks[i]-1)
  J1[i] <- sum(h[i]*fit$density^2)
  J2[i] <- 2*sum(fit$counts*fit$density)/n
  J.hat[i] <- 2/(h[i]*(n-1))-(n+1)/(h[i]*(n-1))*sum((fit$counts/n)^2)
  J[i] <- sum(h[i]*fit$density^2) - 2*sum(fit$counts*fit$density)/n
}
op <- par(mar=c(5,4,1,10))
plot(h,J,type="l",xlim=rev(range(h)), lwd=3, las=1)
lines(h,J.hat,col="blue", lwd=3)
legend(0, -0.025, col=c("black","blue"), lwd=3, legend=c("plug-in","leave-one-out"), bty="n", xpd=TRUE)
par(op)

## Slide 24
hist(x, breaks=seq(a, b, len=4*which.min(J.hat)), col="gray", border="white", xlab="Waiting times until eruption", main="")

## Slide 26
## Binomial CI
m <- 11
breaks <- seq(a, b, len=m+1)
fit <- hist(x, breaks=breaks, plot=F)
l1 <- u1 <- numeric(m)
h <- breaks[2]-breaks[1]
for (i in 1:m) {
  CI <- binom.test(fit$counts[i],n)$conf
  l1[i] <- CI[1]/h
  u1[i] <- CI[2]/h
}
hist(x, breaks=breaks, freq=FALSE, ylim=c(0,0.075), border="white",main="", xlab="Waiting times (min)")
polygon.step(breaks, u1, l1, col="lightgray", border=FALSE)
hist(x, breaks=breaks, freq=FALSE, add=TRUE)

## Slide 27
l2 <- u2 <- numeric(m)
h <- breaks[2]-breaks[1]
for (i in 1:m) {
  CI <- binom.test(fit$counts[i], n, conf.level=1-.05/m)$conf
  l2[i] <- CI[1]/h
  u2[i] <- CI[2]/h
}
hist(x, breaks=breaks, freq=FALSE, ylim=c(0,0.075), border="white",main="", xlab="Waiting times (min)")
polygon.step(breaks, u2, l2, col="gray90", border=FALSE)
polygon.step(breaks, u1, l1, col="gray80", border=FALSE)
hist(x, breaks=breaks, freq=FALSE, add=TRUE)

## Bootstrap-histogram (slide 29)
B <- 50
fb <- matrix(NA, nrow=B, ncol=m+2)
fit0 <- hist(x, breaks=breaks, freq=FALSE, ylim=c(0,0.075), border="white")
for (i in 1:B) {
  xb <- sample(x, n, replace=TRUE)
  d <- hist(xb,breaks=breaks,plot=FALSE)$density
  d.jitter <- d+runif(m,-.005,.005)
  d.jitter[d == 0] <- 0
  fb[i,] <- c(0,d.jitter,0)
  lines(stepfun(breaks,fb[i,]),do.points=FALSE,col="lightgray")
}
lines(stepfun(breaks,c(0,fit0$density,0)),do.points=FALSE,col="black")

## Bootstrap confidence bands (pointwise)
B <- 1000
fb <- matrix(NA,nrow=B,ncol=m)
for (i in 1:B) {
  xb <- sample(x, n, replace=TRUE)
  d <- hist(xb, breaks=breaks, plot=FALSE)$density
  fb[i,] <- d
}
L1 <- apply(fb, 2, quantile,p=.025)
U1 <- apply(fb, 2, quantile,p=.975)
outside <- (apply(t(fb) < L1, 2, any) | apply(t(fb) > U1, 2, any))
mean(outside)

a1 <- exp(seq(log(.01), log(.00001), len=49))
a2 <- numeric(length(a1))
for (i in 1:length(a1)) {
  lower <- apply(fb,2,quantile,p=a1[i]/2)
  upper <- apply(fb,2,quantile,p=1-a1[i]/2)
  a2[i] <- mean(apply(t(fb) < lower,2,any) | apply(t(fb) > upper,2,any))
}
plot(a1,a2,xlim=rev(range(a1)),type="l",xlab=expression(alpha[pw]),ylab=expression(alpha))
abline(h=.05,col="red")

alpha <- max(a1[a2 < .05])
L2 <- apply(fb, 2, quantile, p=alpha/2)
U2 <- apply(fb,2,quantile, p=1-alpha/2)
rbind(L2,U2)
hist(x, breaks=breaks, freq=FALSE, border="white", ylim=c(0,0.075), main="Bootstrap", xlab="Waiting times (min)")
polygon.step(breaks,U2,L2,col="gray90",border=FALSE)
polygon.step(breaks,U1,L1,col="gray80",border=FALSE)
hist(x, breaks=breaks, freq=FALSE, add=TRUE)
