# Leukemia example
# See previous lecture for creation of min-p.rds
library(hdrm)
Golub1999 <- read_data(Golub1999)
summ <- summary(lm(Golub1999$X~Golub1999$y))
tstat <- sapply(summ, function(s) s$coef[2,3])
p <- sapply(summ, function(s) s$coef[2,4])
min_p <- readRDS('min-p.rds')
h <- length(p)
ord <- order(p)
pi_hat <- apply(t(min_p) <= p[ord], 1, mean)
p_adj_sorted <- cummax(pi_hat)
p_wy <- p_adj_sorted[match(1:h, ord)]

# Compare Bonferroni, Westfall-Young, and Benjamini-Hochberg
P <- cbind(p.adjust(p, method='bonf'), p_wy, p.adjust(p, method='BH'))
colnames(P) <- c('Bonferroni', 'Westfall', 'Benjamini')

# # Number of genes significant at a 10% threshold
# colSums(P <= 0.1)

# Plot comparison with BH
par(mar=c(4, 4, 2, 0.5))
S <- apply(P, 2, sort)
matplot(S[1:2000,], col=breheny::pal(3)[c(1,2,3)], type='l', bty='n', lwd=3, lty=1, xlab="Ordered index", ylab='FWER or FDR', las=1)
hdrm::toplegend(legend=colnames(P), lwd=3, col=hdrm::pal(3))

# p.adjust(p, method='BH')

# Bias-variance tradeoff in estimating pi0
ph <- function(tt) sapply(tt, function(t) sum(p>t)/(h*(1-t)))
tt <- seq(.01, .99, .01)
par(mar=c(4, 4.5, 0.5, 0.5))
plot(tt, ph(tt), pch=19, xlab='t', ylab=expression(hat(pi)(t)), bty='n', las=1, ylim=c(0.4, 1))
plot(tt, ph(tt), pch=19, xlab='t', ylab=expression(hat(pi)(t)), bty='n', las=1, ylim=c(0.4, 1), col='gray')
fit <- smooth.spline(tt, ph(tt), df=6)
lines(fit$x, fit$y, col='red', lwd=3)

# Estimated value of pi0
pi_hat <- tail(fit$y, 1)
h0_hat <- pi_hat * h

# How does this compare to the p-value histogram?
H <- H0 <- hist(p, breaks=seq(0,1,.025), plot=FALSE)
H0$counts <- pmin(h0_hat*.025, H$counts)
plot(H, main="", xlab="p", ylim=c(0, max(H$counts)), border="white", col="gray", las=1)
plot(H0, add=TRUE, border="white", col="gray50")
lines(0:1, rep(h0_hat*.025, 2), lwd=3, col='red')

