# Read in data
# Note: Gode2011 is not an hdrm data set
library(stringr)
if (!file.exists('Gode2011.rds')) {
  download.file('https://github.com/IowaBiostat/data-sets/raw/main/Gode2011/Gode2011.rds', 'Gode2011.rds')
}
Gode2011 <- readRDS('Gode2011.rds')

# Install limma; note that it is not on CRAN
# limma lives on Bioconductor instead; to install:
# install.packages('BiocManager')
# BiocManager::install('limma')
library(limma)

# LafS
gene_table <- function(id, dat=Gode2011, latex=FALSE) {
  if (str_ends(id, '_at')) {
    j <- str_which(names(Gode2011$geneInfo), id)
  } else {
    j <- grep(id, Gode2011$geneInfo)
  }
  if (length(j) != 1) {
    print(dat$geneInfo[j])
    stop('String is not unique', call.=FALSE)
  }
  tab <- matrix(dat$Y[j,], ncol=2)
  rownames(tab) <- c('Swarmer1', 'Swarmer2', 'Swimmer1', 'Swimmer2')
  colnames(tab) <- c('Plate', 'Liquid')
  if (latex) {
    knitr::kable(tab, 'latex', booktabs=TRUE, digits=2) |>
      kableExtra::kable_styling(position='center')
  } else {
    tab
  }
}
gene_table('LafS', latex=TRUE)

gene_table('VPA0929_at', latex=TRUE)

# fit <- lmFit(Y, X)

# eb <- eBayes(fit)

# tab <- topTable(eb)

# Limma analysis
X <- model.matrix(~ Environment * Celltype, data=Gode2011$expCond)
fit <- lmFit(Gode2011$Y, X)
eb <- eBayes(fit)
Tab <- topTable(eb, coef=4, number=nrow(Gode2011$Y), sort.by='none')
z <- -1 * sign(Tab$t) * qnorm(Tab$P.Value/2)
p <- Tab$P.Value
q <- Tab$adj.P.Val

# Conventional analysis
summ <- summary(lm(t(Gode2011$Y)~Environment*Celltype, data=Gode2011$expCond))
names(summ) <- gsub('Response ', '', names(summ))
tstat <- sapply(summ, function(s) s$coef[4,3])
pp <- sapply(summ, function(s) s$coef[4,4])
zz <- -1 * sign(tstat) * qnorm(pp/2)
qq <- p.adjust(pp, method='BH')
Q <- cbind(q, qq)
P <- cbind(p, pp)

# Distribution of gene variances
par(mfrow=c(1,2), mar=c(4.5, 4.5, 2, 0.5))
hist(log(fit$sigma^2), n=79, xlim=c(-12, 3), xaxt='n', ylim=c(0, 280), las=1,
     main='', border='white', col='gray', xlab=expression(sigma^2 * ' (raw)'))
lab <- c('0', '0.0001', '0.002', '0.05', '1', '20')
axis(1, at=seq(-12,3,3), lab=lab)
abline(v=log(fit$sigma['VPA0929_at']^2), col='red')
text(log(fit$sigma['VPA0929_at']^2), 300, 'ModA', xpd=NA)
hist(log(eb$s2.post), n=45, xlim=c(-12, 3), xaxt='n', ylim=c(0, 280), las=1,
     main='', border='white', col='gray', xlab=expression(sigma^2 * ' (shrunken)'))
axis(1, at=seq(-12,3,3), lab=lab)
abline(v=log(eb$s2.post['VPA0929_at']), col='red')
text(log(eb$s2.post['VPA0929_at']), 300, 'ModA', xpd=NA)

gene_table('VPA1545_at', latex=TRUE)

# Local FDR using fdrtool
library(fdrtool)
lfdr_plot <- function(z, p, n=59, main='', ...) {
  res <- fdrtool(p, statistic='pvalue', verbose=FALSE, plot=FALSE)
  h <- hist(z, freq=FALSE, n=n, col='gray', border='white', las=1, main=main, ...)
  zz <- seq(min(z), max(z), len=299)
  f1 <- approxfun(z, res$lfdr, yleft=min(res$lfdr), yright=min(res$lfdr))
  y <- h$density * (1 - f1(h$mids))
  for (k in 1:length(h$mids)) {
    if (y[k] != 0) lines(rep(h$mids[k],2), c(0, y[k]), lwd = 2, col = "red")
  }
  lines(zz, dnorm(zz)*res$param[1,'eta0'], col=breheny::pal(2)[2], lwd=3)
}
par(mfcol=c(1,2), mar=c(4, 4, 1, 0.5))
lfdr_plot(zz, pp, xlim=c(-6, 6)); mtext('Original')
lfdr_plot(z, p, xlim=c(-6, 6)); mtext('Moderated')

res <- fdrtool(p, statistic='pvalue', verbose=FALSE, plot=FALSE)
res <- fdrtool(pp, statistic='pvalue', verbose=FALSE, plot=FALSE)
sum(res$lfdr < 0.1)
sum(res$lfdr[z<0] < 0.1)
sum(res$lfdr[z>0] < 0.1)

