## NHANES sim
source("http://myweb.uiowa.edu/pbreheny/571/f14/labs/displayProgressBar.R")
lipids <- read.delim("http://myweb.uiowa.edu/pbreheny/data/lipids.txt")
pop <- lipids$TRG
N <- 10000
n <- c(2, 3, 4, 5, 10, 20, 40, 80, 160, 320, 640)
covered <- matrix(NA, N, length(n), dimnames=list(1:N, n))
for (i in 1:N) {
  for (j in 1:length(n)) {
    sam <- sample(pop, n[j], replace=TRUE)
    if (sd(sam)==0) {
      covered[i,j] <- 0
      next
    }
    covered[i,j] <- t.test(sam, mu=mean(pop))$p.value > 0.05
  }
  displayProgressBar(i, N)
}
coverage <- apply(covered, 2, mean)
plot(n, coverage, type="l", ylim=c(0.85,1))
abline(h=0.95, col="red")

## nonlinear axis
plot(log2(n), coverage, type="l", ylim=c(0.85,1), xaxt="n", xlab="n")
x <- seq(1, 9, 2)
axis(1, at=x, labels=2^x)
abline(h=0.95, col="red")

## equal variance t test
lead <- read.delim("http://myweb.uiowa.edu/pbreheny/data/lead-iq.txt")
head(lead)
t.test(IQ~Smelter, data=lead, var.equal=TRUE)

## By hand:
n <- with(lead, by(IQ, Smelter, length))
m <- with(lead, by(IQ, Smelter, mean))
v <- with(lead, by(IQ, Smelter, var))
Sp <- sqrt(weighted.mean(v, n-1))
SE <- Sp*sqrt(1/n[1] + 1/n[2])
2*pt((m[1]-m[2])/SE, sum(n)-2, lower.tail=FALSE)
m[1]-m[2] + qt(c(0.025,0.975), sum(n)-2)*SE

## welch t test
t.test(IQ~Smelter, data=lead)

## By hand:
SE <- sqrt(sum(v/n))
df <- sum(v/n)^2/sum((v/n)^2/(n-1))
2*pt((m[1]-m[2])/SE, df, lower.tail=FALSE)
m[1]-m[2] + qt(c(0.025,0.975), df)*SE

## equal variance 1
col <- c("#FF4E37FF", "#008DFFFF")
N <- 1000
n <- 2:10
pW <- pS <- matrix(NA, N, length(n))
for (i in 1:N) {
  for (j in 1:length(n)) {
    s1 <- rnorm(n[j], mean=0, sd=1)
    s2 <- rnorm(n[j], mean=0, sd=1)
    pW[i,j] <- t.test(s1, s2, var.equal=FALSE)$p.value
    pS[i,j] <- t.test(s1, s2, var.equal=TRUE)$p.value
  }
  displayProgressBar(i,N)
}
plot(n, apply(pW < 0.05, 2, mean), type="l", lwd=3, col=col[1], ylab="Type I Error Rate", xlab="Sample size per group", ylim=c(0,0.1), las=1)
abline(h=0.05, col="gray")
lines(n, apply(pS < 0.05, 2, mean), lwd=3, col=col[2])
legend("topleft", lwd=3, col=col, legend=c("Welch","Student"))

## equal variance 2
pW <- pS <- matrix(NA, N, length(n))
for (i in 1:N) {
  for (j in 1:length(n)) {
    s1 <- rnorm(n[j], mean=1.5, sd=1)
    s2 <- rnorm(n[j], mean=0, sd=1)
    pW[i,j] <- t.test(s1, s2, var.equal=FALSE)$p.value
    pS[i,j] <- t.test(s1, s2, var.equal=TRUE)$p.value
  }
  displayProgressBar(i,N)
}
plot(n, apply(pW < 0.05, 2, mean), type="l", lwd=3, col=col[1], ylab="Power", xlab="Sample size per group", ylim=c(0,1), las=1)
lines(n, apply(pS < 0.05, 2, mean), lwd=3, col=col[2])
legend("topleft", lwd=3, col=col, legend=c("Welch","Student"))

## unequal variance 1
pW <- pS <- matrix(NA, N, length(n))
for (i in 1:N) {
  for (j in 1:length(n)) {
    s1 <- rnorm(n[j], mean=0, sd=1)
    s2 <- rnorm(n[j], mean=0, sd=4)
    pW[i,j] <- t.test(s1, s2, var.equal=FALSE)$p.value
    pS[i,j] <- t.test(s1, s2, var.equal=TRUE)$p.value
  }
  displayProgressBar(i,N)
}
plot(n, apply(pW < 0.05, 2, mean), type="l", lwd=3, col=col[1], ylab="Type I error rate", xlab="Sample size per group", ylim=c(0,0.1), las=1)
lines(n, apply(pS < 0.05, 2, mean), lwd=3, col=col[2])
abline(h=0.05, col="gray", lwd=2)
legend("bottomleft", lwd=3, col=col, legend=c("Welch","Student"))

## unequal variance 2
n <- 5
SD <- 1:10
pW <- pS <- matrix(NA, N, length(SD))
for (i in 1:N) {
  for (j in 1:length(SD)) {
    s1 <- rnorm(n, mean=0, sd=1)
    s2 <- rnorm(n, mean=0, sd=SD[j])
    pW[i,j] <- t.test(s1, s2, var.equal=FALSE)$p.value
    pS[i,j] <- t.test(s1, s2, var.equal=TRUE)$p.value
  }
  displayProgressBar(i,N)
}
plot(SD, apply(pW < 0.05, 2, mean), type="l", lwd=3, col=col[1], ylab="Type I error rate", xlab="Ratio of SDs", ylim=c(0,0.1), las=1)
lines(SD, apply(pS < 0.05, 2, mean), lwd=3, col=col[2])
abline(h=0.05, col="gray", lwd=2)
legend("bottomleft", lwd=3, col=col, legend=c("Welch","Student"))

## unequal variance 3
n1 <- 10:18
pW <- pS <- matrix(NA, nrow=N, ncol=length(n1))
for (i in 1:N) {
  for (j in 1:length(n1)) {
    s1 <- rnorm(n1[j],    mean=0, sd=1)
    s2 <- rnorm(20-n1[j], mean=0, sd=4)
    pW[i,j] <- t.test(s1, s2, var.equal=FALSE)$p.value
    pS[i,j] <- t.test(s1, s2, var.equal=TRUE)$p.value
  }
  displayProgressBar(i,N)
}
plot(n1, apply(pW < 0.05, 2, mean), type="l", lwd=3, col=col[1], ylab="Type I error rate", xlab="Number in group 1", ylim=c(0,0.5), las=1)
lines(n1, apply(pS < 0.05, 2, mean), lwd=3, col=col[2])
abline(h=0.05, col="gray", lwd=2)
legend("topleft", lwd=3, col=col, legend=c("Welch","Student"))

## unequal variance 4
n1 <- 10:18
pW <- pS <- matrix(NA, nrow=N, ncol=length(n1))
for (i in 1:N) {
  for (j in 1:length(n1)) {
    s1 <- rnorm(n1[j],    mean=0, sd=4)
    s2 <- rnorm(20-n1[j], mean=3, sd=1)
    pW[i,j] <- t.test(s1, s2, var.equal=FALSE)$p.value
    pS[i,j] <- t.test(s1, s2, var.equal=TRUE)$p.value
  }
  displayProgressBar(i,N)
}
plot(n1, apply(pW < 0.05, 2, mean), type="l", lwd=3, col=col[1], ylab="Power", xlab="Number in group 1", ylim=c(0, 1), las=1)
lines(n1, apply(pS < 0.05, 2, mean), lwd=3, col=col[2])
abline(h=0.05, col="gray", lwd=2)
legend("topleft", lwd=3, col=col, legend=c("Welch","Student"))

