source('http://myweb.uiowa.edu/pbreheny/7600/s16/notes/fun.R')

# Slide 12
set.seed(1)
n <- 20
p <- 19
X <- std(matrix(rnorm(n*p), n, p))

bigVar <- numeric(p)
for (i in 1:p) {
  Var <- solve(crossprod(X[,1:i]))
  bigVar[i] <- max(diag(Var))
}
plot(1:p, log(20*bigVar, 2), yaxt="n", pch=19, bty='n', xlim=c(0,20),
     ylab="Largest variance", xlab="Number of columns included", )
logAxis(2, base=2)
bigVar[15]/bigVar[1]

# Slide 16
set.seed(1)
n <- 25
p <- 100
xnam <- paste0("V", 1:p)
form <- as.formula(paste("y ~ ", paste(xnam, collapse= "+")))
N <- 100

res <- cover <- pred <- NULL
pb <- txtProgressBar(1, N, style=3)
for (i in 1:N) {
  X <- std(matrix(runif(n*p), n, p))
  Data <- as.data.frame(X)
  y <- rnorm(n)
  j <- which.max(abs(crossprod(X, y - mean(y))))
  XX <- std(matrix(runif(n*p), n, p))
  pData <- as.data.frame(XX)
  yy <- rnorm(n)
  fit0 <- lm(y~1, data=Data)
  fit <- step(fit0, scope=form, direction="forward", trace=0, k=log(n), steps=5)
  res <- rbind(res, summary(fit)$coef[-1,])
  pred <- c(pred, yy - predict(fit, pData))
  cover <- c(cover, apply(confint(fit)[-1,,drop=FALSE], 1, prod) <= 0)
  setTxtProgressBar(pb, i)
}

# Slide 17
b <- res[,"Estimate"]
hist(b, breaks=seq(-1.2, 1.2, .1), freq=FALSE, xlab=expression(hat(beta)), col="gray", border="white", main="", las=1)

# Slide 19
mean(b^2)
1/n
mean(b^2)/(1/n)

# Slide 20
nrow(res)/N

# Slide 21
crossprod(pred)/length(pred)

# Slide 22
median(res[,"Pr(>|t|)"])
range(res[,"Pr(>|t|)"])
mean(cover)
