## Read in data
wells <- read.delim("http://web.as.uky.edu/statistics/users/pbreheny/760/data/wells.txt")

## Models under consideration
fit0 <- glm(Switch~1, family=binomial, data=wells)
fit <- vector("list",3)
summary(fit[[1]] <- glm(Switch~Dist, family=binomial, data=wells))
summary(fit[[2]] <- glm(Switch~Dist+Arsenic, family=binomial, data=wells))
summary(fit[[3]] <- glm(Switch~., family=binomial, data=wells))

## Slide 13 (gamma/tau/D are from SAS)
r2 <- R2 <- R2adj <- DE <- DEadj <- numeric(length(fit))
y <- wells$Switch
for (i in 1:length(fit)) {
  r2[i] <- cor(y, fit[[i]]$fitted)^2
  R2[i] <- 1-crossprod(y-fit[[i]]$fitted)/crossprod(y-mean(y))
  R2adj[i] <- 1-crossprod(y-fit[[i]]$fitted)/fit[[i]]$df.residual/(crossprod(y-mean(y))/fit[[i]]$df.null)
  DE[i] <- 1-fit[[i]]$deviance/fit[[i]]$null.deviance
  DEadj[i] <- 1-fit[[i]]$deviance/fit[[i]]$df.residual/(fit[[i]]$null.deviance/fit[[i]]$df.null)
}
rbind(r2,R2,R2adj,DE,DEadj)

## Slide 15
table(fit[[1]]$fitted>0.5, y)
table(fit[[3]]$fitted>0.5, y)

## ROC
require(ROCR)
pred <- perf <- vector("list",3)
for (i in 1:3) pred[[i]] <- prediction(fit[[i]]$fitted, y)
for (i in 1:3) perf[[i]] <- performance(pred[[i]], measure="tpr", x.measure="fpr")
for (i in 1:3) print(performance(pred[[i]], measure="auc")@y.values[[1]]) ## Area under curve
col <- c("#FF4E37FF", "#00B500FF", "#008DFFFF")
plot(perf[[1]], lwd=2, col=col[1])
plot(perf[[2]], add=TRUE, col=col[2], lwd=2)
plot(perf[[3]], add=TRUE, col=col[3], lwd=2)
legend("topleft", col=col, legend=paste("Model", 1:3), lwd=2)

## AIC and BIC
AIC(fit[[3]]) ## Basic usage
sapply(fit, AIC)
B <- sapply(fit, BIC)
BB <- B-min(B) ## Otherwise exp(B) is too large for the computer to handle
exp(-0.5*BB)/sum(exp(-0.5*BB))
