## Read in data
wells <- read.delim("http://web.as.uky.edu/statistics/users/pbreheny/760/data/wells.txt")
attach(wells)
ilogit <- binomial()$linkinv
source("http://web.as.uky.edu/statistics/users/pbreheny/760/S13/notes/Hist.R") ## Minor rewrite to R's hist function

## Descriptive
prop.table(table(Switch))
prop.table(table(Community))
hist(Arsenic, breaks=50, col="gray", border="white")
hist(Dist, breaks=50, col="gray", border="white")
barplot(table(Educ), border="white", xlab="Education")

## Descriptive vs. Switch
prop.table(table(Community, Switch),1)
boxplot(Arsenic~Switch, col="gray", las=1, ylab="Arsenic", xlab="Switch")
boxplot(Dist~Switch, col="gray", las=1, ylab="Distance", xlab="Switch")
boxplot(Educ~Switch, col="gray", las=1, ylab="Education", xlab="Switch")

## New variables
cDist <- (Dist-mean(Dist))/100
cArsn <- (Arsenic-mean(Arsenic))/2
cEduc <- (Educ-mean(Educ))/8

## Additive model
summary(fit <- glm(Switch~cArsn+cDist+cEduc+Community, family=binomial))

## Compare to unadjusted odds ratios
adj <- coef(fit)[-1]
uadj <- c(coef(glm(Switch~cArsn, family=binomial))[2],
          coef(glm(Switch~cDist, family=binomial))[2],
          coef(glm(Switch~cEduc, family=binomial))[2],
          coef(glm(Switch~Community, family=binomial))[2])
exp(cbind(uadj, adj))
cor(Arsenic, Dist)

## Get rid of Community
summary(fit <- glm(Switch~cArsn+cDist+cEduc, family=binomial))

## Plot
require(visreg)
par(mfrow=c(1,3))
fit <- glm(Switch~Arsenic+Dist+Educ, family=binomial)
visreg(fit, scale="response", partial=FALSE, ylim=c(0,1), ylab="Pr(Switch)")

## Transformations
EduCat <- cut(Educ, c(-1, 0, 5, 10, 18), labels=c("0", "1-5", "6-10", "11+"))
summary(fit <- glm(Switch~cArsn+cDist+cEduc, family=binomial))        ## AIC: 3918
summary(fit <- glm(Switch~log(Arsenic)+cDist+cEduc, family=binomial)) ## AIC: 3886
summary(fit <- glm(Switch~cArsn+log(Dist)+cEduc, family=binomial))    ## AIC: 3951
summary(fit <- glm(Switch~cArsn+Dist+EduCat, family=binomial))        ## AIC: 3910

## New additive model
clArsn <- log(Arsenic)-mean(log(Arsenic))
summary(fit <- glm(Switch~clArsn+Dist+EduCat, family=binomial))       ## AIC: 3878

## Plot
fit <- glm(Switch~log(Arsenic)+Dist+EduCat, family=binomial)
visreg(fit, scale="response", partial=FALSE, ylim=c(0,1), ylab="Pr(Switch)")

## Interactions
summary(fit <- glm(Switch~clArsn*cDist+EduCat, family=binomial))      ## AIC: 3879    
summary(fit <- glm(Switch~clArsn*EduCat+cDist, family=binomial))      ## AIC: 3878
summary(fit <- glm(Switch~clArsn+cDist*EduCat, family=binomial))      ## AIC: 3860

## Plot
fit <- glm(Switch~Dist*EduCat + log(Arsenic), family=binomial)
visreg(fit, "Arsenic", scale="response", partial=FALSE, ylim=c(0,1), ylab="Pr(Switch)")
visreg(fit, "Dist", "EduCat", scale="response", partial=FALSE, ylim=c(0,1), ylab="Pr(Switch)")

## Summarization
tab <- table(fit$fitted > .5, Switch)
sum(tab[2:3])/sum(tab)                 ## 36.8% Error rate
                                       ## 42.5% Error rate for null model
1-fit$deviance/fit$null.deviance       ## 7% Null deviance explained

## The march of progress
fit.list <- list(glm(Switch~1, family=binomial),
                 glm(Switch~Dist+Arsenic+Educ+Community, family=binomial),
                 glm(Switch~Dist+Arsenic+Educ, family=binomial),
                 glm(Switch~Dist+log(Arsenic)+EduCat, family=binomial),
                 glm(Switch~Dist*EduCat+log(Arsenic), family=binomial))
plot(sapply(fit.list, AIC), pch=19, type="o", xaxt="n", xlab="", ylab="AIC")
lab <- c("Null model", "Basic\nadditive", "Removed\ncommunity", "Transformations", "Interaction")
axis(1, at=1:5, labels=lab, tck=0)
plot(sapply(fit.list, BIC), pch=19, type="o", xaxt="n", xlab="", ylab="BIC")
axis(1, at=1:5, labels=lab, tck=0)
