Gode2011 <- readRDS(url('https://s3.amazonaws.com/pbreheny-data-sets/Gode2011.rds'))
expCond <- Gode2011$expCond
Y <- Gode2011$Y
geneTable <- function(Y, g) {
  Tab <- matrix(Y[g,], ncol=2)
  rownames(Tab) <- c('Swarmer1', 'Swarmer2', 'Swimmer1', 'Swimmer2')
  colnames(Tab) <- c('Plate', 'Liquid')
  Tab
}

# Slide 6
geneTable(Y, 'VPA1555_at')
Gode2011$geneInfo['VPA1555_at']

# Slide 8
geneTable(Y, 'VPA0929_at')
Gode2011$geneInfo['VPA0929_at']
summary(lm(Y['VPA0929_at',]~Environment*Celltype, expCond))

# limma is not on CRAN; it lives on Bioconductor.  To install:
# install.packages('BiocManager')
# BiocManager::install('limma')

# Slide 19 (limma analysis)
library(limma)
X <- model.matrix(~Environment*Celltype, data=expCond)
fit <- lmFit(Gode2011$Y, X)
eb <- eBayes(fit)
Tab <- topTable(eb, coef=4, number=nrow(Y), sort.by='none')
z <- qnorm(Tab$P.Value/2) * sign(Tab$t)
p <- Tab$P.Value
q <- Tab$adj.P.Val

# Conventional analysis
summ <- summary(lm(t(Y)~Environment*Celltype, data=expCond))
names(summ) <- gsub('Response ', '', names(summ))
tstat <- sapply(summ, function(s) s$coef[4,3])
pp <- sapply(summ, function(s) s$coef[4,4])
qq <- p.adjust(pp, method='BH'); names(qq) <- names(pp)
zz <- qnorm(pp/2) * sign(tstat)

# Hyperparameters (Slide 20)
sqrt(eb$s2.prior)
sqrt(eb$df.prior)

# Revisiting ModA (Slide 21)
fit$sigma['VPA0929_at']
sqrt(eb$s2.post['VPA0929_at'])
tstat['VPA0929_at']
pp['VPA0929_at']
qq['VPA0929_at']
Tab['VPA0929_at',]

# Plotting variance (Slide 22)
hist(log(fit$sigma^2), n=79, xlab=expression(sigma^2 * ' (raw)'), xlim=c(-12, 3), xaxt='n', ylim=c(0, 280), col='gray', border='white', main='')
lab <- c('0', '0.0001', '0.002', '0.05', '1', '20')
axis(1, at=seq(-12,3,3), lab=lab)
abline(v=log(fit$sigma['VPA0929_at']^2), col='red')
text(log(fit$sigma['VPA0929_at']^2), 300, 'ModA', xpd=NA)
hist(log(eb$s2.post), n=45, xlab=expression(sigma^2 * ' (shrunken)'), xlim=c(-12, 3), xaxt='n', ylim=c(0, 280), col='gray', border='white', main='')
axis(1, at=seq(-12,3,3), lab=lab)
abline(v=log(eb$s2.post['VPA0929_at']), col='red')
text(log(eb$s2.post['VPA0929_at']), 300, 'ModA', xpd=NA)

# Other example (Slide 23)
geneTable(Y, 'VPA1545_at')
Gode2011$geneInfo['VPA1545_at']
tstat['VPA1545_at']
pp['VPA1545_at']
qq['VPA1545_at']
Tab['VPA1545_at',]

# Overall summary (Slide 24)
sum(Tab$adj.P.Val < .1)
sum(q < .1)

# Local fdr (Slide 25)
library(fdrtool)
lfdrPlot <- function(z, p, n=59, main='', ...) {
  res <- fdrtool(p, statistic='pvalue', verbose=FALSE, plot=FALSE)
  h <- hist(z, freq=FALSE, n=n, col='gray', border='white', las=1, main=main, ...)
  zz <- seq(min(z), max(z), len=299)
  f1 <- approxfun(z, res$lfdr, yleft=min(res$lfdr), yright=min(res$lfdr))
  y <- h$density * (1 - f1(h$mids))
  for (k in 1:length(h$mids)) {
    if (y[k] != 0) lines(rep(h$mids[k],2), c(0, y[k]), lwd = 2, col = "red")
  }
  lines(zz, dnorm(zz)*res$param[1,'eta0'], col=breheny::pal(2)[2], lwd=3)
}
lfdrPlot(zz, pp, xlim=c(-6, 6)); mtext('Original')
lfdrPlot(z, p, xlim=c(-6, 6)); mtext('Moderated')
res <- fdrtool(p, statistic='pvalue', verbose=FALSE, plot=FALSE)
sum(res$lfdr < 0.1)
sum(res$lfdr[z<0] < 0.1)
sum(res$lfdr[z>0] < 0.1)

# Our plot of variances suggests that our estimate may be distorted by the long
# left tail of the distribution.  The primary cause of the tail is that the
# variance is related to the mean in this experiment.  limma supports
# modeling this relationship; a better approach might be:
eb2 <- eBayes(fit, trend=TRUE)

