# NHANES sim
lipids <- read.delim("http://myweb.uiowa.edu/pbreheny/data/lipids.txt")
pop <- lipids$TRG
N <- 10000
n <- c(2, 3, 4, 5, 10, 20, 40, 80, 160, 320, 640)
covered <- matrix(NA, N, length(n), dimnames=list(1:N, n))
pb <- txtProgressBar(1, N, style=3)
for (i in 1:N) {
  for (j in 1:length(n)) {
    sam <- sample(pop, n[j], replace=TRUE)
    if (sd(sam)==0) {
      covered[i,j] <- 0
      next
    }
    covered[i,j] <- t.test(sam, mu=mean(pop))$p.value > 0.05
  }
  setTxtProgressBar(pb, i)
}

# NHANES sim: plot
coverage <- apply(covered, 2, mean)
plot(n, coverage, type="l", ylim=c(0.85,1), las=1)
abline(h=0.95, col="red")

# nonlinear axis
plot(log2(n), coverage, type="l", ylim=c(0.85,1), xaxt="n", xlab="n", las=1)
x <- seq(1, 9, 2)
axis(1, at=x, labels=2^x)
abline(h=0.95, col="red")

# equal variance t test
lead <- read.delim("http://myweb.uiowa.edu/pbreheny/data/lead-iq.txt")
head(lead)
t.test(IQ~Smelter, data=lead, var.equal=TRUE)

## By hand:
n <- with(lead, by(IQ, Smelter, length))
m <- with(lead, by(IQ, Smelter, mean))
v <- with(lead, by(IQ, Smelter, var))
Sp <- sqrt(weighted.mean(v, n-1))
SE <- Sp*sqrt(1/n[1] + 1/n[2])
2*pt((m[1]-m[2])/SE, sum(n)-2, lower.tail=FALSE)
m[1]-m[2] + qt(c(0.025,0.975), sum(n)-2)*SE

# welch t test
t.test(IQ~Smelter, data=lead)

## By hand:
SE <- sqrt(sum(v/n))
df <- sum(v/n)^2/sum((v/n)^2/(n-1))
2*pt((m[1]-m[2])/SE, df, lower.tail=FALSE)
m[1]-m[2] + qt(c(0.025,0.975), df)*SE

# equal variance 1
col <- c("#FF4E37FF", "#008DFFFF")
N <- 10000
n <- 2:10
pW <- pS <- matrix(NA, N, length(n))
pb <- txtProgressBar(1, N, style=3)
for (i in 1:N) {
  for (j in 1:length(n)) {
    s1 <- rnorm(n[j], mean=0, sd=1)
    s2 <- rnorm(n[j], mean=0, sd=1)
    pW[i,j] <- t.test(s1, s2, var.equal=FALSE)$p.value
    pS[i,j] <- t.test(s1, s2, var.equal=TRUE)$p.value
  }
  setTxtProgressBar(pb, i)
}

# equal variance: plot
plot(n, apply(pW < 0.05, 2, mean), type="l", lwd=3, col=col[1], las=1,
     ylab="Type I Error Rate", xlab="Sample size per group", ylim=c(0,0.1))
abline(h=0.05, col="gray")
lines(n, apply(pS < 0.05, 2, mean), lwd=3, col=col[2])
legend("topleft", lwd=3, col=col, legend=c("Welch","Student"))

# equal variance 2
pW <- pS <- matrix(NA, N, length(n))
pb <- txtProgressBar(1, N, style=3)
for (i in 1:N) {
  for (j in 1:length(n)) {
    s1 <- rnorm(n[j], mean=1.5, sd=1)
    s2 <- rnorm(n[j], mean=0, sd=1)
    pW[i,j] <- t.test(s1, s2, var.equal=FALSE)$p.value
    pS[i,j] <- t.test(s1, s2, var.equal=TRUE)$p.value
  }
  setTxtProgressBar(pb, i)
}

# equal variance 2: plot
plot(n, apply(pW < 0.05, 2, mean), type="l", lwd=3, col=col[1], ylab="Power",
     xlab="Sample size per group", ylim=c(0,1), las=1)
lines(n, apply(pS < 0.05, 2, mean), lwd=3, col=col[2])
legend("topleft", lwd=3, col=col, legend=c("Welch","Student"))

# unequal variance 1
pW <- pS <- matrix(NA, N, length(n))
pb <- txtProgressBar(1, N, style=3)
for (i in 1:N) {
  for (j in 1:length(n)) {
    s1 <- rnorm(n[j], mean=0, sd=1)
    s2 <- rnorm(n[j], mean=0, sd=4)
    pW[i,j] <- t.test(s1, s2, var.equal=FALSE)$p.value
    pS[i,j] <- t.test(s1, s2, var.equal=TRUE)$p.value
  }
  setTxtProgressBar(pb, i)
}

# unequal variance 1: plot
plot(n, apply(pW < 0.05, 2, mean), type="l", lwd=3, col=col[1], las=1,
     ylab="Type I error rate", xlab="Sample size per group", ylim=c(0,0.1))
lines(n, apply(pS < 0.05, 2, mean), lwd=3, col=col[2])
abline(h=0.05, col="gray", lwd=2)
legend("bottomleft", lwd=3, col=col, legend=c("Welch","Student"))

# unequal variance 2
n <- 5
SD <- 1:10
pW <- pS <- matrix(NA, N, length(SD))
pb <- txtProgressBar(1, N, style=3)
for (i in 1:N) {
  for (j in 1:length(SD)) {
    s1 <- rnorm(n, mean=0, sd=1)
    s2 <- rnorm(n, mean=0, sd=SD[j])
    pW[i,j] <- t.test(s1, s2, var.equal=FALSE)$p.value
    pS[i,j] <- t.test(s1, s2, var.equal=TRUE)$p.value
  }
  setTxtProgressBar(pb, i)
}

# unequal variance 2: plot
plot(SD, apply(pW < 0.05, 2, mean), type="l", lwd=3, col=col[1],
     ylab="Type I error rate", xlab="Ratio of SDs", ylim=c(0,0.1), las=1)
lines(SD, apply(pS < 0.05, 2, mean), lwd=3, col=col[2])
abline(h=0.05, col="gray", lwd=2)
legend("bottomleft", lwd=3, col=col, legend=c("Welch","Student"))

# unequal variance 3
n1 <- 10:18
pW <- pS <- matrix(NA, nrow=N, ncol=length(n1))
pb <- txtProgressBar(1, N, style=3)
for (i in 1:N) {
  for (j in 1:length(n1)) {
    s1 <- rnorm(n1[j],    mean=0, sd=1)
    s2 <- rnorm(20-n1[j], mean=0, sd=4)
    pW[i,j] <- t.test(s1, s2, var.equal=FALSE)$p.value
    pS[i,j] <- t.test(s1, s2, var.equal=TRUE)$p.value
  }
  setTxtProgressBar(pb, i)
}

# unequal variance 3: plot
plot(n1, apply(pW < 0.05, 2, mean), type="l", lwd=3, col=col[1],
     ylab="Type I error rate", xlab="Number in group 1", ylim=c(0,0.5), las=1)
lines(n1, apply(pS < 0.05, 2, mean), lwd=3, col=col[2])
abline(h=0.05, col="gray", lwd=2)
legend("topleft", lwd=3, col=col, legend=c("Welch","Student"))

# unequal variance 4
n1 <- 10:18
pW <- pS <- matrix(NA, nrow=N, ncol=length(n1))
pb <- txtProgressBar(1, N, style=3)
for (i in 1:N) {
  for (j in 1:length(n1)) {
    s1 <- rnorm(n1[j],    mean=0, sd=4)
    s2 <- rnorm(20-n1[j], mean=3, sd=1)
    pW[i,j] <- t.test(s1, s2, var.equal=FALSE)$p.value
    pS[i,j] <- t.test(s1, s2, var.equal=TRUE)$p.value
  }
  setTxtProgressBar(pb, i)
}

# unequal variance 4: plot
plot(n1, apply(pW < 0.05, 2, mean), type="l", lwd=3, col=col[1], ylab="Power",
     xlab="Number in group 1", ylim=c(0, 1), las=1)
lines(n1, apply(pS < 0.05, 2, mean), lwd=3, col=col[2])
abline(h=0.05, col="gray", lwd=2)
legend("topleft", lwd=3, col=col, legend=c("Welch","Student"))
