Golub1999 <- readRDS(url('https://s3.amazonaws.com/pbreheny-data-sets/Golub1999.rds'))

# Leuk example
summ <- summary(lm(Golub1999$X~Golub1999$y))
tstat <- sapply(summ, function(s) s$coef[2,3])
p <- sapply(summ, function(s) s$coef[2,4])
z <- qnorm(p/2) * sign(-tstat)
h <- length(p)

# Motivation (Slide 3)
q <- p.adjust(p, method='BH')
sum(q < 0.01)
range(p[q < 0.01])
range(abs(z[q < 0.01]))

# Density illustration (Slide 10)
x <- c(1, 3, 5, 6, 6.2, 6.6, 7, 8, 10)
n <- length(x)
xx <- seq(-2, 10, len=101)
hist(x, border="white", main="", freq=FALSE, las=1)
rug(x, col="red", lwd=3)
for (i in 1:n) lines(xx, dnorm(xx, x[i], 0.75)/10, col="#008DFFB2", lwd=3)
d <- density(x, bw=0.75)
lines(d, lwd=3)

# Density (Slide 11)
z6 <- z[z > -6 & z < 6]
brk <- seq(-6, 6, length = 99)
op <- par(mfrow=c(1,3), mar=c(5, 5, 0.5, 0.5))
h <- hist(z6, breaks=brk, main="", border=FALSE, col="lightgray", freq=FALSE, las=1, xlab='z')
dens <- density(z6, bw=.05)
lines(dens, col='slateblue', lwd=3)
h <- hist(z6, breaks=brk, main="", border=FALSE, col="lightgray", freq=FALSE, las=1, xlab='z')
dens <- density(z6)
lines(dens, col='slateblue', lwd=3)
h <- hist(z6, breaks=brk, main="", border=FALSE, col="lightgray", freq=FALSE, las=1, xlab='z')
dens <- density(z6, bw=1.5)
lines(dens, col='slateblue', lwd=3)
par(op)

# Local FDR illustration (Slide 12)
f <- function(z, pi0=1, ...) {
  # Calculation
  dens <- density(z, bw="nrd")
  f <- approxfun(dens$x, dens$y)
  f0 <- function(z) pi0*dnorm(z)
  lfdr <- pmin(f0(z)/f(z), 1)
  
  # Plot
  h <- hist(z, breaks=seq(min(z), max(z), length = 99), plot=FALSE)
  zz <- seq(min(z), max(z), len=299)
  ylim=c(0, max(c(h$density, dens$y, f0(0))))
  plot(h, main="", border=FALSE, col="lightgray", freq=FALSE, las=1, ylim=ylim)
  lines(dens, col="#FF4E37", lwd=2)
  lines(zz, f0(zz), col="#008DFF", lwd=2)
  fdr.zz <- f0(h$mids)/f(h$mids)
  y <- pmax(h$density * (1 - fdr.zz), 0)
  for (k in 1:length(h$mids)) lines(rep(h$mids[k],2), c(0, y[k]), lwd = 2, col = "red")
  
  return(invisible(lfdr))
}
lfdr1 <- f(z)

# Estimating pi0
lfdr2 <- f(z, pi0=.53)

# z vs fdr plot
f1 <- approxfun(z, lfdr1)
f2 <- approxfun(z, lfdr2)
zz <- seq(-5, 5, len=99)
plot(zz, f1(zz), type='l', lwd=3, bty='n', las=1, ylim=c(0, 1), col="#FF4E37", xlab='z', ylab='Local FDR')
lines(zz, f2(zz), lwd=3, col="#008DFF")
hdrm:::toplegend(legend=c(expression(hat(pi)[0]*'='*1), expression(hat(pi)[0]*'='*0.53)), lwd=3, col=c("#FF4E37FF", "#008DFFFF"))
lines(zz, rep(0.1, length(zz)), lty=2)

uniroot(function(x) f1(x)-0.1, lower=2, upper=4)$root
uniroot(function(x) f2(x)-0.1, lower=2, upper=4)$root
sum(lfdr1 < .1)
sum(lfdr2 < .1)

# FDR vs local fdr
sum(q < .1)
sum(lfdr1 < .1)
min(abs(z[q < .1]))

# Averages
h <- length(z)
p.left <- pnorm(z)
FDR <- h*p.left/rank(p.left)
mean(lfdr1[FDR < .1])
p.right <- pnorm(-z)
FDR <- h*p.right/rank(p.right)
mean(lfdr1[FDR < .1])


# fdrtool -----------------------------------------------------------------
library(fdrtool)

# Illustration: why bumps are problematic
set.seed(2)
zz <- sort(rnorm(500))
lfdr <- f(zz)
text(zz[490], 0.22, labels=sprintf('%.2f', lfdr[490]))
text(zz[500], 0.08, labels=sprintf('%.2f', lfdr[500]))

# ecdf vs density
set.seed(4)
pp <- sort(runif(10))
Fn <- ecdf(pp)
plot(Fn, las=1, main='', bty='n', col.01line=NA, xlab='p', xlim=c(0,1), ylab='F(p)', verticals=TRUE)
plot(pp, rep(1, length(pp)), type='n', bty='n', las=1, xlab='p', ylab='f(p)', xlim=c(0,1), ylim=c(0,2))
abline(v=pp)

# Grenander
plot(Fn, las=1, main='', bty='n', col.01line=NA, xlab='p', xlim=c(0,1), ylab='F(p)', verticals=TRUE)
lcm <- gcmlcm(sort(pp), Fn(sort(pp)), 'lcm')
lines(lcm$x.knots, lcm$y.knots, col=hdrm:::pal(2)[2], lwd=2)
plot(c(lcm$x.knots, 1), c(lcm$slope.knots, c(0, 0)), lwd=2,
     type='s', bty='n', las=1, xlab='p', ylab='f(p)', xlim=c(0,1), ylim=c(0,1.75), col=hdrm:::pal(2)[2])
lines(density(pp), lwd=2)

# Modified Grenander
yy <- get('y', environment(Fn))
p0 <- 0.5
yy[10] <- 1-p0 + p0*pp[10]
plot(c(0, pp), c(0, yy), type='s', bty='n', xlab='p', xlim=c(0,1), ylim=c(0,1), ylab='F(p)', las=1)
points(pp, yy, pch=19)
lines(c(pp[10], 1), c(yy[10], 1))
lines(c(0,1), c(0, p0), lty=2, col='gray50')
lines(c(0,1), c(1-p0, 1), lty=2, col='gray50')
lcm <- gcmlcm(pp, yy, 'lcm')
lines(c(lcm$x.knots, 1), c(lcm$y.knots, 1), col=hdrm:::pal(2)[2], lwd=2)
plot(c(lcm$x.knots, 1), c(lcm$slope.knots, rep((1-tail(lcm$y.knots, 1))/(1-tail(lcm$x.knots, 1)), 2)), lwd=2,
     type='s', bty='n', las=1, xlab='p', ylab='f(p)', xlim=c(0,1), ylim=c(0,1.75), col=hdrm:::pal(2)[2])

# Bumpy data
pp <- 2*pnorm(-abs(zz))
res <- fdrtool(pp, statistic='pvalue', verbose=FALSE, plot=FALSE, cutoff.method='pct0')
lfdr <- f(zz)
xx <- pp[zz>0]
yy <- lfdr[zz>0]
zr <- zz[zz>0]
ord <- order(zr)
plot(zr[ord], yy[ord], type='l', col=hdrm:::pal(2)[1], lwd=2, bty='n', las=1, xlab='z', ylab='fdr')
lines(zr[ord], res$lfdr[zz>0][ord], type='s', col=hdrm:::pal(2)[2], lwd=2)
hdrm:::toplegend(legend=c('Kernel', 'Grenander'), col=hdrm:::pal(2), lwd=2)

# Leukemia data
res <- fdrtool(p, statistic='pvalue', verbose=FALSE)
head(res$lfdr)  # local fdrs
sum(res$lfdr < 0.1)
sum(lfdr2 < 0.1)
lfdrPlot.fdrtool <- function(z, p, n=99, ...) {
  res <- fdrtool(p, statistic='pvalue', verbose=FALSE, plot=FALSE)
  h <- hist(z, freq=FALSE, n=n, col='gray', border='white', main='', las=1, ...)
  zz <- seq(min(z), max(z), len=299)
  f1 <- approxfun(z, res$lfdr, yright=min(res$lfdr))
  y <- h$density * (1 - f1(h$mids))
  for (k in 1:length(h$mids)) {
    if (y[k] != 0) lines(rep(h$mids[k],2), c(0, y[k]), lwd = 2, col = "red")
  }
  lines(zz, dnorm(zz)*res$param[1,'eta0'], col=hdrm:::pal(2)[2], lwd=3)
}
lfdrPlot.fdrtool(z, p)
sum(fdrtool(z, verbose=FALSE)$lfdr < 0.1)


# ashr --------------------------------------------------------------------
library(ashr)

# ashr
theta <- sapply(summ, function(s) s$coef[2,1])
s <- sapply(summ, function(s) s$coef[2,2])
fit <- ash(theta, s, mixcompdist='normal', optmethod='cxxMixSquarem')
lfdrPlot.ash <- function(fit, n=99, ...) {
  z <- fit$data$x/fit$data$s
  brk <- seq(min(z), max(z), length=n+1)
  h <- hist(z, border='white', col='gray', main='', freq=FALSE, breaks=brk, ...)
  zz <- seq(min(z), max(z), len=299)
  f1 <- approxfun(z, get_lfdr(fit))
  y <- h$density * (1 - f1(h$mids))
  for (k in 1:length(h$mids)) {
    if (y[k] != 0) lines(rep(h$mids[k],2), c(0, y[k]), lwd = 2, col = "red")
  }
  lines(zz, dnorm(zz)*fit$fitted_g$pi[1], col='#008DFF', lwd=3)
}
lfdrPlot.ash(fit)
get_pi0(fit)

# Other mixtures
fit2 <- ash(theta, s, df=70, mixcompdist='uniform', optmethod='cxxMixSquarem')
get_pi0(fit2)
fit3 <- ash(theta, s, df=70, mixcompdist='halfuniform', optmethod='cxxMixSquarem')
get_pi0(fit3)

# TERF1
z[6554]
p[6554]
0.53*q[6554]
lfdr2[6554]
get_lfdr(fit)[6554]
get_lfsr(fit)[6554]

# Posterior means
library(data.table)
DT <- data.table(Gene=gsub('Response ', '', names(p)), as.data.table(fit$result))
DT[order(-PosteriorMean)][101:110][, .(Gene, betahat, sebetahat, lfsr, lfdr, PosteriorMean)]
