library(hdrm)
library(ggplot2)
library(data.table)

# Generate data
Data <- genDataABN(n=100, p=60, a=6, b=2, rho=0.5, beta=c(1,-1,0.5,-0.5,0.5,-0.5))
Data <- Ex9.1()  # Same as above, but with seed specified to match book output

# Fit lasso model
cvfit <- cv.ncvreg(Data$X, Data$y, penalty='lasso', seed=10)
fit <- cvfit$fit

# Calculate mFdr
head(mfdr(fit))
EF <- mfdr(fit)$EF

# Plot A/B/N/EF
S <- coef(fit)[-1,] != 0
Res <- data.table(ll=log(fit$lambda), EF=EF,
                  A=apply(S[Data$varType=='A',], 2, sum)*1.0,
                  B=apply(S[Data$varType=='B',], 2, sum)*1.0,
                  N=apply(S[Data$varType=='N',], 2, sum)*1.0)
gg <- melt(Res, id.vars='ll', measure.vars = c('EF', 'A', 'B', 'N'), value.name='Number')
gg[variable=='EF', Number := 42/60*Number]
g <- ggplot(gg, aes(ll, Number, group=variable, color=variable)) + geom_line() + xlim(max(gg$ll), min(gg$ll)) + xlab(expression(log(lambda))) + theme(legend.title=element_blank())
g + geom_vline(xintercept = log(2*sqrt(3*log(60)/100)), lty=2)   # Vertical line: Theory
g + geom_vline(xintercept = log(cvfit$lambda.min), lty=2)        # Vertical line: Cross-validation

# Plot mdr
tmp <- data.table(ll=log(fit$lambda), Estimate=mfdr(fit)$mFDR,
                  True=apply(S[Data$varType=='N',], 2, sum)/apply(S, 2, sum))
gg <- melt(tmp, id.vars='ll', measure.vars = c('Estimate', 'True'), value.name='mFdr')[is.finite(mFdr)]
ggplot(gg, aes(ll, mFdr, group=variable, color=variable)) + geom_line() + xlim(max(gg$ll), min(gg$ll)) + xlab(expression(log(lambda))) + theme(legend.title=element_blank())
gg[variable=='Estimate', mFdr := 42/60*mFdr]   # Correct for pi0
ggplot(gg, aes(ll, mFdr, group=variable, color=variable)) + geom_line() + xlim(max(gg$ll), min(gg$ll)) + xlab(expression(log(lambda))) + theme(legend.title=element_blank())

# Plot A/B/N/EF for correlated noise
CorData <- genDataABN(n=100, p=60, a=6, b=2, rho=0.5, beta=c(1,-1,0.5,-0.5,0.5,-0.5), noise = 'exchangeable', rho.noise=0.8)
cvfit_cor <- cv.ncvreg(CorData$X, CorData$y, penalty='lasso')
fit_cor <- cvfit_cor$fit
S <- coef(fit_cor)[-1,] != 0
tmp <- data.table(ll=log(fit_cor$lambda), EF=42/60*mfdr(fit_cor)$EF,
                  A=apply(S[Data$varType=='A',], 2, sum)*1.0,
                  B=apply(S[Data$varType=='B',], 2, sum)*1.0,
                  N=apply(S[Data$varType=='N',], 2, sum)*1.0)
gg <- melt(tmp, id.vars='ll', measure.vars = c('EF', 'A', 'B', 'N'), value.name='Number')
ggplot(gg, aes(ll, Number, group=variable, color=variable)) + geom_line() + xlim(max(gg$ll), min(gg$ll)) + xlab(expression(log(lambda))) + theme(legend.title=element_blank())
tmp <- data.table(ll=log(fit_cor$lambda), Estimate=42/60*mfdr(fit_cor)$mFDR,
                  True=apply(S[Data$varType=='N',], 2, sum)/apply(S, 2, sum))
gg <- melt(tmp, id.vars='ll', measure.vars = c('Estimate', 'True'), value.name='mFdr')[is.finite(mFdr)]
ggplot(gg, aes(ll, mFdr, group=variable, color=variable)) + geom_line() + xlim(max(gg$ll), min(gg$ll)) + xlab(expression(log(lambda))) + theme(legend.title=element_blank())

# Comparison
summ <- summary(lm(Data$X~Data$y))
p <- sapply(summ, function(s) s$coef[2,4])
bh <- p.adjust(p, method='BH')
mFdr <- mfdr(cvfit$fit)$mFDR         
tapply(bh < 0.1, Data$varType, sum)  # Univariate testing
Res[cvfit$min]                       # Cross-validation
Res[max(which(mFdr < 0.1))]          # mFDR

# Simulation not yet portable -- sorry!

# local fdr
lam <- fit$lambda
L <- length(lam)
P <- matrix(NA, L, length(Data$beta))
for (j in 1:L) {
  P[j,] <- ncvreg:::local_mfdr(fit, lam[j])$mfdr
}
gg <- melt(data.table(lam, P), id.vars='lam')
gg[, Cat := rep(Data$varType, each=L)]
col <- c(hdrm:::pal(2)[2], "#BEBEBEBF", "#000000BF")
ggplot(gg, aes(lam, value, group=variable, color=factor(Cat))) + geom_line() + 
  theme(legend.position="none") + xlim(rev(range(lam))) + geom_vline(xintercept=0.32, lty=2) + 
  scale_color_manual(values=col) +
  xlab(expression(lambda)) + ylab("mfdr")
summary(fit, lambda=cvfit$lambda.min)

# TCGA
attachData(bcTCGA)
fit <- ncvreg(X, y, penalty='lasso')
Fig6.6(fit)
