source('http://myweb.uiowa.edu/pbreheny/7600/s16/notes/fun.R')
load(url('http://myweb.uiowa.edu/pbreheny/data/Gode2011.RData'))

# Slide 6
Y['VPA1555_at',]
geneInfo['VPA1555_at']

# Slide 8
Y['VPA0929_at',]
geneInfo['VPA0929_at']
summary(lm(Y['VPA0929_at',]~Environment*Celltype, expCond))

# limma is not on CRAN; it is a Bioconductor package.  To install:
# source("https://bioconductor.org/biocLite.R")
# biocLite("limma")

# Slide 19 (limma analysis)
require(limma)
X <- model.matrix(~Environment*Celltype, data=expCond)
fit <- lmFit(Y, X)
eb <- eBayes(fit)
Tab <- topTable(eb, coef=4, number=nrow(Y), sort.by='none')
z <- qnorm(Tab$P.Value/2) * sign(Tab$t)

# OLS analysis
summ <- summary(lm(t(Y)~Environment*Celltype, data=expCond))
names(summ) <- gsub('Response ', '', names(summ))
tstat <- sapply(summ, function(s) s$coef[4,3])
p <- sapply(summ, function(s) s$coef[4,4])
q <- p.adjust(p, method='BH')

# Slide 20
sqrt(eb$s2.prior)
sqrt(eb$df.prior)

# Slide 21
fit$sigma['VPA0929_at']
sqrt(eb$s2.post['VPA0929_at'])
tstat['VPA0929_at']
p['VPA0929_at']
q['VPA0929_at']
Tab['VPA0929_at',]

# Slide 22-23
Y['VPA1545_at',]
geneInfo['VPA1545_at']
tstat['VPA1545_at']
p['VPA1545_at']
q['VPA1545_at']
Tab['VPA1545_at',]

# Slide 23
sum(Tab$adj.P.Val < .1)
sum(q < .1)

# Local fdr
lfdr <- lfdrPlot(z)
sum(lfdr < .2)

# Extra stuff
hist(log(fit$sigma^2), n=99, main='', col='gray', border='white', xlab=expression(log*" "*hat(sigma)[j]^2))
abline(v=log(eb$s2.prior), col='red', lwd=3)

# The above plot suggests that our estimate may be distorted by the extended
# left tail of the distribution.  The primary cause of the tail is that the
# variance is related to the mean in this experiment.  limma supports
# modeling this relationship; a better approach might be:
eb2 <- eBayes(fit, trend=TRUE)
