library(hdrm)

# Read in Leukemia data
Golub1999 <- read_data('Golub1999')

# Carry out t-tests
summ <- summary(lm(Golub1999$X~Golub1999$y))
tstat <- sapply(summ, function(s) s$coef[2,3])
p <- sapply(summ, function(s) s$coef[2,4])
z <- qnorm(p/2)*sign(-tstat)
h <- length(p)
n <- length(Golub1999$y)

# Histogram of p-values
hist(p, main='', col='gray', border='white', las=1, breaks=seq(0, 1, .025), freq=FALSE)
lines(0:1, c(1,1), lwd=3, col='slateblue')

z6 <- z[z > -6 & z < 6]
zz <- seq(-6, 6, .01)
hist(z6, main='', col='gray', border='white', las=1, breaks=seq(-6, 6, 0.5), freq=FALSE, ylim=c(0, 0.4), xlab='z')
lines(zz, dnorm(zz), lwd=3, col='slateblue')
lines(rep(qnorm(.05/h), 2), c(0, 0.4), lwd=2, lty=2)
lines(rep(-qnorm(.05/h), 2), c(0, 0.4), lwd=2, lty=2)

# p.adjust(p, method='bonferroni')
# p.adjust(p, method='holm')        # Default

# Westfall-Young procedure (time consuming, ~10 minutes)
ord <- order(p)
if (file.exists('min-p.rds')) {
  min_p <- readRDS('min-p.rds')
} else {
  B <- 500
  min_p <- matrix(NA, B, h)
  pb <- progress::progress_bar$new(total=B)
  XX <- Golub1999$X[,ord]
  set.seed(6)  # For reproducibility
  for (i in 1:B) {
    summ_i <- summary(lm(XX~sample(Golub1999$y)))
    p_i <- vapply(summ_i, function(s) s$coef[2,4], numeric(1))
    min_p[i,] <- rev(cummin(rev(p_i)))
    pb$tick()
  }
  saveRDS(min_p, 'min-p.rds')
}
pi_hat <- apply(t(min_p) <= p[ord], 1, mean)
p_adj_sorted <- cummax(pi_hat)
p_adj <- p_adj_sorted[match(1:h, ord)]

# Comparison of p-values adjustments
par(mar=c(4,4,2,0.5))
P <- cbind(sort(p.adjust(p)), sort(p.adjust(p, method='bonf')), sort(p_adj))
matplot(P[1:500,], col=breheny::pal(3), type='l', bty='n', lwd=3, lty=1, xlab="Ordered index", ylab='Adjusted p', las=1)
hdrm::toplegend(legend=c('Holm', 'Bonferroni', 'Westfall'), lwd=3, col=hdrm::pal(3))

