Gode2011 <- readRDS(url('https://s3.amazonaws.com/pbreheny-data-sets/Gode2011.rds'))
expCond <- Gode2011$expCond
Y <- Gode2011$Y
geneTable <- function(X, g) {
  Tab <- matrix(Y[g,], ncol=2)
  rownames(Tab) <- c('Swarmer1', 'Swarmer2', 'Swimmer1', 'Swimmer2')
  colnames(Tab) <- c('Plate', 'Liquid')
  Tab
}

# Slide 6
geneTable(Y, 'VPA1555_at')
Gode2011$geneInfo['VPA1555_at']

# Slide 8
geneTable(Y, 'VPA0929_at')
Gode2011$geneInfo['VPA0929_at']
summary(lm(Y['VPA0929_at',]~Environment*Celltype, expCond))

# limma is not on CRAN; it lives on Bioconductor.  To install:
# install.packages('BiocManager')
# BiocManager::install('limma')

# Slide 19 (limma analysis)
require(limma)
library(limma)
X <- model.matrix(~Environment*Celltype, data=expCond)
fit <- lmFit(Gode2011$Y, X)
eb <- eBayes(fit)
Tab <- topTable(eb, coef=4, number=nrow(Y), sort.by='none')
z <- qnorm(Tab$P.Value/2) * sign(Tab$t)
p <- Tab$P.Value
q <- Tab$adj.P.Val

# Conventional analysis
summ <- summary(lm(t(Y)~Environment*Celltype, data=expCond))
names(summ) <- gsub('Response ', '', names(summ))
tstat <- sapply(summ, function(s) s$coef[4,3])
p <- sapply(summ, function(s) s$coef[4,4])
q <- p.adjust(p, method='BH')

# Hyperparameters (Slide 20)
sqrt(eb$s2.prior)
sqrt(eb$df.prior)

# Revisiting ModA (Slide 21)
fit$sigma['VPA0929_at']
sqrt(eb$s2.post['VPA0929_at'])
tstat['VPA0929_at']
p['VPA0929_at']
q['VPA0929_at']
Tab['VPA0929_at',]

# Plotting variance (Slide 22)
hist(log(fit$sigma^2), n=79, xlab=expression(sigma^2 * ' (raw)'), xlim=c(-12, 3), xaxt='n', ylim=c(0, 280), col='gray', border='white', main='')
lab <- c('0', '0.0001', '0.002', '0.05', '1', '20')
axis(1, at=seq(-12,3,3), lab=lab)
abline(v=log(fit$sigma['VPA0929_at']^2), col='red')
text(log(fit$sigma['VPA0929_at']^2), 300, 'ModA', xpd=NA)
hist(log(eb$s2.post), n=45, xlab=expression(sigma^2 * ' (shrunken)'), xlim=c(-12, 3), xaxt='n', ylim=c(0, 280), col='gray', border='white', main='')
axis(1, at=seq(-12,3,3), lab=lab)
abline(v=log(eb$s2.post['VPA0929_at']), col='red')
text(log(eb$s2.post['VPA0929_at']), 300, 'ModA', xpd=NA)

# Other example (Slide 23)
geneTable(Y, 'VPA1545_at')
Gode2011$geneInfo['VPA1545_at']
tstat['VPA1545_at']
p['VPA1545_at']
q['VPA1545_at']
Tab['VPA1545_at',]

# Overall summary (Slide 24)
sum(Tab$adj.P.Val < .1)
sum(q < .1)

# Local fdr (Slide 25)
library(ashr)
SE <- sqrt(eb$s2.post)*eb$stdev.unscaled[,4]
ash_fit <- ash(eb$coefficients[,4], SE, optmethod='cxxMixSquarem', mixcompdist='halfuniform')
lfdrPlot.ash <- function(fit, n=99, xlim=range(z), ...) {
  z <- fit$data$x/fit$data$s
  brk <- seq(min(z), max(z), length=99*diff(range(z))/diff(range(xlim)))
  h <- Hist(z, freq=FALSE, xlim=xlim, breaks=brk, ...)
  zz <- seq(min(z), max(z), len=299)
  f1 <- approxfun(z, get_lfdr(fit))
  y <- h$density * (1 - f1(h$mids))
  for (k in 1:length(h$mids)) {
    if (y[k] != 0) lines(rep(h$mids[k],2), c(0, y[k]), lwd = 2, col = "red")
  }
  lines(zz, dnorm(zz)*fit$fitted_g$pi[1], col=pal(2)[2], lwd=3)
}
lfdrPlot.ash(ash_fit, xlim=c(-6, 6))
sum(get_pm(ash_fit) < -2)
sum(get_pm(ash_fit) > 2)
get_pi0(ash_fit)
sum(get_lfsr(ash_fit) < .1)

# Our plot of variances suggests that our estimate may be distorted by the long
# left tail of the distribution.  The primary cause of the tail is that the
# variance is related to the mean in this experiment.  limma supports
# modeling this relationship; a better approach might be:
eb2 <- eBayes(fit, trend=TRUE)
