library(data.table)
library(ggplot2)
library(hdrm)
library(kableExtra)
library(magrittr)

# gen_data_abn(n = 100, p = 60, a = 6, b = 2, rho = 0.5,
# beta = c(1, -1, 0.5, -0.5, 0.5, -0.5))

# Generate data
dat <- Ex9.1()

# Fit lasso model
fold <- assign_fold(dat$y, 10, seed = 10)
cvfit <- cv.ncvreg(dat$X, dat$y, penalty = 'lasso', fold = fold)
fit <- cvfit$fit

# Calculate mFdr
ef <- mfdr(fit)$EF

# Plot A/B/N/EF
s <- coef(fit)[-1, ] != 0
res <- data.table(
  ll = log(fit$lambda),
  EF = ef,
  A = colSums(s[dat$varType == 'A', ]) * 1.0,
  B = colSums(s[dat$varType == 'B', ]) * 1.0,
  N = colSums(s[dat$varType == 'N', ]) * 1.0)
gg <- melt(res, id.vars = 'll', measure.vars = c('EF', 'A', 'B', 'N'),
  value.name = 'Number')

ggplot(gg, aes(ll, Number, group = variable, color = variable)) +
  geom_line() + xlim(max(gg$ll), min(gg$ll)) +
  xlab(expression(log(lambda))) +
  theme(legend.title = element_blank())
gg[variable == 'EF', Number := 42 / 60 * Number]
ggplot(gg, aes(ll, Number, group = variable, color = variable)) +
  geom_line() +
  xlim(max(gg$ll), min(gg$ll)) +
  xlab(expression(log(lambda))) +
  theme(legend.title = element_blank())
ggplot(gg, aes(ll, Number, group = variable, color = variable)) +
  geom_line() +
  xlim(max(gg$ll), min(gg$ll)) +
  xlab(expression(log(lambda))) +
  theme(legend.title = element_blank()) +
  geom_vline(xintercept = log(2 * sqrt(2 * log(60) / 100)), lty = 2)
ggplot(gg, aes(ll, Number, group = variable, color = variable)) +
  geom_line() +
  xlim(max(gg$ll), min(gg$ll)) +
  xlab(expression(log(lambda))) +
  theme(legend.title = element_blank()) +
  geom_vline(xintercept = log(cvfit$lambda.min), lty = 2)

gg <- data.table(
  ll = log(fit$lambda),
  Estimate = mfdr(fit)$mFDR,
  True = colSums(s[dat$varType == 'N', ]) / colSums(s)) %>%
  melt(id.vars = 'll', measure.vars = c('Estimate', 'True'), value.name = 'mFdr') %>%
  .[is.finite(mFdr)]

ggplot(gg, aes(ll, mFdr, group = variable, color = variable)) + geom_line() +
  xlim(max(gg$ll), min(gg$ll)) +
  xlab(expression(log(lambda))) + theme(legend.title = element_blank())
gg[variable == 'Estimate', mFdr := 42 / 60 * mFdr]
ggplot(gg, aes(ll, mFdr, group = variable, color = variable)) + geom_line() +
  xlim(max(gg$ll), min(gg$ll)) +
  xlab(expression(log(lambda))) + theme(legend.title = element_blank())

# Plot A/B/N/EF for correlated noise
dat_cor <- gen_data_abn(
  n = 100,
  p = 60,
  a = 6,
  b = 2,
  rho = 0.5,
  beta = c(1, -1, 0.5, -0.5, 0.5, -0.5),
  noise = 'exchangeable',
  rho.noise = 0.8)
cvfit_cor <- cv.ncvreg(dat_cor$X, dat_cor$y, penalty = 'lasso')
fit_cor <- cvfit_cor$fit
s <- coef(fit_cor)[-1, ] != 0
gg <- data.table(
  ll = log(fit_cor$lambda),
  EF = 42 / 60 * mfdr(fit_cor)$EF,
  A = colSums(s[dat$varType == 'A', ]) * 1.0,
  B = colSums(s[dat$varType == 'B', ]) * 1.0,
  N = colSums(s[dat$varType == 'N', ]) * 1.0) %>%
  melt(id.vars='ll', measure.vars = c('EF', 'A', 'B', 'N'), value.name='Number')
ggplot(gg, aes(ll, Number, group=variable, color=variable)) +
  geom_line() +
  xlim(max(gg$ll), min(gg$ll)) +
  xlab(expression(log(lambda))) +
  theme(legend.title=element_blank())

gg <- data.table(
  ll = log(fit_cor$lambda),
  Estimate = 42/60*mfdr(fit_cor)$mFDR,
  True = colSums(s[dat$varType == 'N', ]) / colSums(s)) %>%
  melt(id.vars = 'll', measure.vars = c('Estimate', 'True'), value.name = 'mFdr') %>%
  .[is.finite(mFdr)]
ggplot(gg, aes(ll, mFdr, group = variable, color = variable)) +
  geom_line() +
  xlim(max(gg$ll), min(gg$ll)) +
  xlab(expression(log(lambda))) +
  theme(legend.title = element_blank())

p <- lm(dat$X ~ dat$y) %>%
summary() %>%
sapply(function(s) s$coef[2,4])
univ <- data.table(
bh = p.adjust(p, method = 'BH'),
type = dat$varType) %>%
.[, sum(bh < 0.1), type] %>%
dcast(. ~ type, value.var = "V1")
cv <- res[cvfit$min]
mfdr <- res[max(which(mfdr(cvfit$fit)$mFDR < 0.1))]
tab <- rbind(mfdr[, 3:5], univ[, 2:4], cv[, 3:5])
tab[, method := c('Lasso (mFDR)', 'Univariate', 'Lasso (CV)')]
setcolorder(tab, 'method')
kbl(tab, 'latex', booktabs=TRUE) %>%
kable_styling(position = 'center') %>%
add_header_above(c(" " = 1, "# Selected" = 3))

# Note: Code for reproducing this simulation not yet available, sorry
CMP <- apply(cmp[,1,c(1,2,4),], 2:3, mean)
colnames(CMP) <- c("Causative (A)", "Correlated (B)", "Noise (N)")
gg <- melt(as.data.table(CMP, keep.rownames="Method"), id.var="Method", variable.name="Type")
gg[, Method := factor(Method, levels=c('LassoCV', 'Univ', 'LassoFDR'))]
ggplot(gg, aes(Method, value, fill = Type)) +
  geom_bar(stat = "identity", position = position_stack(reverse = TRUE)) +
  scale_fill_grey(start = 0, end = 1) +
  coord_flip() +
  theme(legend.background = element_rect(fill = "gray90")) +
  guides(fill = guide_legend(title = NULL)) +
  ylab("Avg Number Selected") +
  xlab("")

lam <- fit$lambda
L <- length(lam)
P <- matrix(NA, L, length(dat$beta))
for (j in 1:L) {
  P[j,] <- ncvreg:::local_mfdr(fit, lam[j])$mfdr
}
gg <- melt(data.table(lam, P), id.vars='lam')
gg[, Cat := rep(dat$varType, each=L)]
col <- c(hdrm:::pal(2)[2], scales::alpha('gray', 0.75), scales::alpha('black', 0.75))
ggplot(gg, aes(lam, value, group=variable, color=factor(Cat))) + geom_line() +
  theme(legend.position="none") + xlim(rev(range(lam))) + geom_vline(xintercept=cvfit$lambda.min, lty=2) +
  scale_color_manual(values=col) +
  xlab(expression(lambda)) + ylab("mfdr")

# summary(fit, lambda=cvfit$lambda.min)
# #   Nonzero coefficients         :  29
# #   Expected nonzero coefficients:  20.37
# #   Average mfdr (29 features)   :   0.702
# #
# #      Estimate        z       mfdr Selected
# # A1   0.874102  11.2647    < 1e-04        *
# # A2  -0.774583 -10.9076    < 1e-04        *
# # A4  -0.502917  -7.1268    < 1e-04        *
# # A3   0.422238   5.4744    < 1e-04        *
# # A6  -0.351849  -4.7564 0.00059017        *
# # A5   0.309722   4.1233 0.00915535        *
# # N39 -0.200926  -2.9913 0.33482886        *

# ...
# # N42  0.037062   1.2128 0.95479163        *
# # N26 -0.024974  -1.0778 0.96101303        *
# # N24 -0.020723  -1.0491 0.96213966        *
# # N2   0.018021   0.9914 0.96422830        *
# # N17 -0.009270  -0.8914 0.96733745        *
# # N37 -0.008625  -0.8807 0.96763605        *
# # N11 -0.004770  -0.8405 0.96870108        *
# # N41 -0.004774  -0.8346 0.96885141        *
# # N34  0.003134   0.8183 0.96925552        *

attach_data(brca1)
cvfit <- cv.ncvreg(X, y, penalty='lasso')
fit <- cvfit$fit
mfdr(fit)
summary(fit, lambda=cvfit$lambda.min)
lmfdr <- local_mfdr(fit, lambda=cvfit$lambda.min)
sum(lmfdr$mfdr < 0.2)

# plot(mfdr(fit))
# plot(mfdr(fit), type = 'EF')

par(mfrow=c(1,2), mar=c(4, 4, 3, 0.5))
plot(mfdr(fit), bty='n')
plot(mfdr(fit), type = 'EF', bty='n')

