library(hdrm)
library(kableExtra)
library(selectiveInference)

# Data (running examples)
brca <- read_data(brca1)
dat <- Ex9.1()
x <- dat$X
y <- dat$y
n <- nrow(x)
p <- ncol(x)
var_type <- dat$varType

# Failure of chi^2 result
set.seed(2)
N <- 1000
R1 <- R2 <- numeric(N)
for (i in 1:N) {
  yy <- scale(rnorm(100), scale = FALSE)
  XX <- std(matrix(rnorm(100 * 100), 100, 100))
  jj <- which.max(abs(crossprod(XX, yy)))
  fit1 <- lm(yy~XX[, 1])
  fit2 <- lm(yy~XX[, jj])
  R1[i] <- crossprod(yy) - crossprod(fit1$residuals)
  R2[i] <- crossprod(yy) - crossprod(fit2$residuals)
}

# Pre-specified
qqplot(
  qchisq(ppoints(100), 1), R1, las = 1, bty = "n", pch = 16, ylim = c(0, 8),
  xlab = expression(chi^2), ylab = expression(T[R])
)
mtext('Prespecified', line = 0.5)
lines(0:8, 0:8, lty = 2, lwd = 2, col = "gray")
# mean(R1 > qchisq(.95, 1))

# Forward selection
qqplot(
  qchisq(ppoints(100), 1), R2, las = 1, bty = "n", pch = 16,
  ylim = c(0, max(R2)), xlab = expression(chi^2), ylab = expression(T[R])
)
mtext('Forward selection', line = 0.5)
lines(0:8, 0:8, lty = 2, lwd = 2, col = "gray")
# mean(R2 > qchisq(.95, 1))

# CovTest
set.seed(1)
N <- 2000
p <- numeric(N)
for (i in 1:N) {
  yy <- scale(rnorm(100), scale = FALSE)
  XX <- std(matrix(rnorm(100 * 100), 100, 100))
  fit <- lar(XX, yy, maxsteps = 2)
  p[i] <- suppressWarnings(larInf(fit, sigma = 1)$pv.covtest[1])
}
TS <- qexp(1 - p)
par(mar = c(4, 4.1, 1, 1))
qqplot(
  qexp(ppoints(length(TS)), 1), TS, las = 1, bty = "n", pch = 16,
  ylim = c(0, max(TS)), xlab = "Exp(1)", ylab = expression(T[C]))
lines(0:8, 0:8, lty = 2, lwd = 2, col = "gray")

# fit <- lar(X, y)
# larInf(fit)

# estimateSigma(X, y)

# CovTest for example data
fit <- lar(x, y)
res <- larInf(fit, sigma = estimateSigma(x, y)$sigmahat)
tab <- data.frame(
  Var = colnames(x)[res$vars],
  TS = qexp(1 - res$pv.covtest),
  p = res$pv.covtest
)
tab$p <- hdrm:::format_p(tab$p)
kbl(tab[1:10, ], 'latex', booktabs = TRUE, digits = 2, align = 'lrr') |>
  kable_styling(position = 'center')

forwardStop(res$pv.covtest)

# x_std <- std(x)
# fit <- glmnet(x_std, y, standardize = FALSE)

# lam <- 25
# b <- coef(fit, s = lam/n)[-1]
# res <- fixedLassoInf(x_std, y, b, lam)

x_std <- std(x)
fit <- glmnet(x_std, y, standardize = FALSE)
lam <- 25
b <- coef(fit, s = lam/n, exact = TRUE, x = x_std, y = y)[-1]
res <- fixedLassoInf(x_std, y, b, lam, sigma = 1)
bb <- res$vmat %*% y
ci <- cbind(bb, res$ci, res$pv)
dimnames(ci) <- list(names(res$vars), c('Estimate', 'Lower', 'Upper', 'p'))

ci_plot(ci, sort=FALSE, xlab=expression(beta), xlim=c(-4, 4))
lines(c(1,1), c(6.5, 7.5), col="gray", lty=2, lwd=2, xpd=1)
lines(c(-1,-1), c(5.5, 6.5), col="gray", lty=2, lwd=2)
lines(c(0.5, 0.5), c(4.5, 5.5), col="gray", lty=2, lwd=2)
lines(c(-0.5, -0.5), c(3.5, 4.5), col="gray", lty=2, lwd=2)
lines(c(0.5, 0.5), c(2.5, 3.5), col="gray", lty=2, lwd=2)
lines(c(-0.5, -0.5), c(1.5, 2.5), col="gray", lty=2, lwd=2)

lm(y ~ x_std[, b!=0]) |>
  ci_plot(sort=FALSE, labels=names(res$vars), xlab=expression(beta), xlim=c(-4, 4))
lines(c(1,1), c(6.5, 7.5), col="gray", lty=2, lwd=2, xpd=1)
lines(c(-1,-1), c(5.5, 6.5), col="gray", lty=2, lwd=2)
lines(c(0.5, 0.5), c(4.5, 5.5), col="gray", lty=2, lwd=2)
lines(c(-0.5, -0.5), c(3.5, 4.5), col="gray", lty=2, lwd=2)
lines(c(0.5, 0.5), c(2.5, 3.5), col="gray", lty=2, lwd=2)
lines(c(-0.5, -0.5), c(1.5, 2.5), col="gray", lty=2, lwd=2)

# covTest for BRCA1
fit <- lar(brca$X, brca$y, maxsteps = 11)
res <- larInf(fit, sigma = 0.45)
tab <- data.frame(
  Var = colnames(brca$X)[res$vars],
  TS  = qexp(1 - res$pv.covtest),
  p   = res$pv.covtest
)
tab$p <- hdrm:::format_p(tab$p)
kbl(tab[1:10, ], 'latex', booktabs = TRUE, digits = 2, align = 'lrr') |>
  kable_styling(position = 'center')

forwardStop(res$pv.covtest)

# selectiveInference for TCGA data
x_std <- std(brca$X)
fit = glmnet(x_std, brca$y, standardize = FALSE)
lam <- 115
b <- coef(fit, s = lam/length(brca$y), exact = TRUE, x = x_std, y = brca$y)[-1]
# sum(b != 0)
# sh <- estimateSigma(x_std, brca$y)$sigmahat
res <- fixedLassoInf(x_std, brca$y, b, lam, sigma = 0.45)

bb <- res$vmat %*% brca$y
ci <- cbind(bb, res$ci, res$pv)
rownames(ci) <- names(res$vars)
ci_plot(ci, sort = FALSE, mar = c(4, 5, 1, 5), xlab = expression(beta))

