require(boot)

## Draw random samples from a double exponential distribution
rdex <- function(n, mean=0, sd=1) {
  mean + sample(c(-1,1), n, replace=TRUE) * rexp(n, sqrt(2)/sd)
}

## Draw random samples from a mixture of normals
rmix <- function(n, mean=0, sd=1, frac=.1, ratio=10) {
  z <- rbinom(n, size=1, prob=frac)
  mean + (1-z)*rnorm(n, mean=mean, sd=sd) + z*rnorm(n, mean=mean, sd=ratio*sd)
}

## Function to generate data for two-group problem
genData <- function(n, m, dist=c("normal", "dex", "mix"), variance=c("equal", "unequal"), delta=1) {
  dist <- match.arg(dist)
  variance <- match.arg(variance)
  g <- c(rep(1, n), rep(0, m))
  if (dist=="normal" & variance=="equal") z <- c(rnorm(n), rnorm(m))
  if (dist=="normal" & variance=="unequal") z <- c(rnorm(n, sd=0.5), rnorm(m, sd=1.5))
  if (dist=="dex" & variance=="equal") z <- c(rdex(n), rdex(m))
  if (dist=="dex" & variance=="unequal") z <- c(rdex(n, sd=0.5), rdex(m, sd=1.5))
  if (dist=="mix" & variance=="equal") z <- c(rmix(n), rmix(m))
  if (dist=="mix" & variance=="unequal") z <- c(rmix(n, sd=0.5), rmix(m, sd=1.5))
  z <- z + g*delta
  data.frame(z, g)
}

## Bootstrap test (slide 7)
boot.fun <- function(x, ind) c(mean(x[ind]), var(x[ind])/length(ind))
boot.test <- function(Data, rank=FALSE, B=1000) {
  z <- if (rank) rank(Data$z) else Data$z
  x <- z[Data$g==1]
  y <- z[Data$g==0]
  xx <- x - mean(x) + mean(z)
  yy <- y - mean(y) + mean(z)
  
  ## Bootstrap
  x.star <- boot(xx, boot.fun, B)$t
  y.star <- boot(yy, boot.fun, B)$t
  t.star <- (x.star[,1] - y.star[,1])/sqrt(x.star[,2]+y.star[,2])
  t.obs <- (mean(x) - mean(y))/sqrt(var(x)/length(x)+var(y)/length(y))
  mean(abs(t.star) >= abs(t.obs))
}

## Bootstrap test for difference of medians (slide 11)
median.boot <- function(x, ind) median(x[ind])
boot.median.test <- function(Data, B=1000) {
  x <- Data$z[Data$g==1]
  y <- Data$z[Data$g==0]
  xx <- x - median(x) + median(Data$z)
  yy <- y - median(y) + median(Data$z)
  
  ## Bootstrap
  x.star <- boot(xx, median.boot, B)$t
  y.star <- boot(yy, median.boot, B)$t
  t.star <- x.star - y.star[,1]
  t.obs <- median(x) - median(y)
  mean(abs(t.star) >= abs(t.obs))
}

## Simulation (Takes a few hours)
N <- 1000
delta <- seq(0, 1, len=5)
dist <- c("normal", "dex", "mix")
variance <- c("equal", "unequal", "unequal")
n <- c(30, 10, 50)
m <- c(30, 50, 10)
results <- array(NA, dim=c(N, length(delta), 3, 3, 5), dimnames=list(1:N, delta, dist, c("Equal", "More in HV", "Fewer in HV"), c("Student", "Welch", "Wilcoxon", "Boot-mean", "Boot-median")))
for (i in 1:N) {
  for (j in 1:length(delta)) {
    for (k in 1:3) {
      for (l in 1:3) {
        Data <- genData(n=n[l], m=m[l], dist=dist[k], variance=variance[l], delta=delta[j])
        results[i,j,k,l,1] <- t.test(Data$z~Data$g, var.eq=TRUE)$p.value
        results[i,j,k,l,2] <- t.test(Data$z~Data$g)$p.value
        results[i,j,k,l,3] <- wilcox.test(Data$z~Data$g)$p.value
        results[i,j,k,l,4] <- boot.test(Data)
        results[i,j,k,l,5] <- boot.median.test(Data)
      }
    }
  }
  displayProgressBar(i,N,50)
  save(results, file="tmp.RData")
}

## Slides 8-9
mypanel <- function(...) {
  panel.abline(h=.05, col="black", lty=2)
  panel.xyplot(...)
}
df <- array2df(apply(results[,,,,1:4] < .05, 2:5, mean, na.rm=TRUE), vars=c("Delta","Dist","Variance","Method","Power"))
xyplot(Power~Delta|Variance, df, subset=(Dist=="normal"), group=Method, layout=c(3,1), type="l", auto.key=list(columns=4, points=FALSE, lines=TRUE), panel=mypanel)
xyplot(Power~Delta|Variance, df, subset=(Dist=="dex"), group=Method, layout=c(3,1), type="l",auto.key=list(columns=4,points=FALSE,lines=TRUE),panel=mypanel)

## Slides 12-13
df <- array2df(apply(results[,,,,c(1:3,5)] < .05, 2:5, mean, na.rm=TRUE), vars=c("Delta","Dist","Variance","Method","Power"))
xyplot(Power~Delta|Variance, df, subset=(Dist=="dex"), group=Method, layout=c(3,1),type="l",auto.key=list(columns=4,points=FALSE,lines=TRUE),panel=mypanel)
xyplot(Power~Delta|Variance, df, subset=(Dist=="mix"), group=Method, layout=c(3,1),type="l",auto.key=list(columns=4,points=FALSE,lines=TRUE),panel=mypanel)
