# Setup
source('https://myweb.uiowa.edu/pbreheny/7110/f25/notes/fun.R')
library(logistf)
library(MASS)
library(glmnet)

# Bin
p <- seq(0.001, 0.999, len = 101)
plot(
p,
-log(p) - log(1 - p),
type = "l",
las = 1,
lwd = 3,
col = "slateblue",
bty = "n",
xlab = expression(pi),
ylab = expression(p(pi)),
ylim = c(0, 7)
)

# Binomial proportion: Illustration
n <- 20
x <- 3
theta <- seq(0.001, 0.4, len = 499)
par(mar = c(4.5, 4.5, 2, 0.5))
L <- plotL(theta, dbinom(x, n, theta))
L <- plotL(theta, dbinom(x + 2, n + 4, theta), add = TRUE, col = pal(2)[1])
plotl(theta, dbinom(x, n, theta, log = TRUE), col = pal(2)[1])
plotl(theta, dbinom(x + 2, n + 4, theta, log = TRUE), add = TRUE)
toplegend(legend = c("Standard", "Penalized"), col = pal(2), lwd = 3)
p <- x / n
V <- p * (1 - p) / n
lines(theta, -(p - theta)^2 / V, lty = 2, col = pal(2)[1])
p <- (x + 2) / (n + 4)
V <- p * (1 - p) / (n + 4)
lines(theta, -(p - theta)^2 / V, lty = 2, col = pal(2)[2])

# Binomial proportion: function
ci <- function(x, n, alpha = 0.05, lam = 1) {
  xx <- x + lam - 1
  nn <- n + 2 * lam - 2
  p <- xx / nn
  SE <- sqrt(p * (1 - p) / nn)
  cbind(p + qnorm(0.025) * SE, p + qnorm(0.975) * SE)
}
covered <- function(interval, p) {
  interval[1] <= p & interval[2] >= p
}

# Binomial proportion: sim
N <- 2000
n <- seq(10, 50, 10)
p <- 0.1
Coverage <- matrix(
  NA,
  length(n),
  2,
  dimnames = list(n, c("Standard", "Penalized"))
)
for (i in 1:length(n)) {
  x <- rbinom(N, n[i], p)
  W <- ci(x, n[i])
  Coverage[i, 1] <- mean(apply(W, 1, covered, p = p))
  W <- ci(x, n[i], lam = 3)
  Coverage[i, 2] <- mean(apply(W, 1, covered, p = p))
}
par(mar = c(4, 4, 2, 0.5))
matplot(n, Coverage, bty = "n", type = "l", lwd = 3, col = pal(2), lty = 1, las = 1, ylim = c(0.65, 1))
abline(h = 0.95, col = "gray", lty = 2)
toplegend(legend = colnames(Coverage), col = pal(2), lwd = 3)

# Complete separation
y <- rep(0:1, each = 200)
x1 <- rnorm(400)
x2 <- rep(0, 400)
x2[333] <- 1
fit <- glm(y ~ x1 + x2, family = binomial, control = glm.control(epsilon = 1e-12))
summary(fit)$coef[, c(1, 2, 4)] |>
knitr::kable("latex", booktabs = TRUE, digits = c(2, 2, 3)) |>
kableExtra::kable_styling(position = "center")

# Firth
fit <- logistf(y ~ x1 + x2, pl = FALSE)
cbind(
Estimate = coef(fit),
SE = sqrt(diag(fit$var)),
p = fit$prob
) |>
knitr::kable("latex", booktabs = TRUE, digits = c(2, 2, 3)) |>
kableExtra::kable_styling(position = "center")

# Inspect variance-covariance matrix
X <- cbind(1, x1, x2)
p <- fit$predict
W <- diag(p * (1 - p))
solve(crossprod(X, W %*% X))
fit$var
head(fit$hat.diag)

# Ridge: Seed
set.seed(11)

# Ridge: Motivation
x1 <- rnorm(20)
x2 <- rnorm(20, mean = x1, sd = .01)
y <- rnorm(20, mean = 3 + x1 + x2)
coef(lm(y ~ x1 + x2))

coef(lm.ridge(y ~ x1 + x2, lambda = 1))

# Lasso: Setup
set.seed(1)
coef.glmnet <- function(fit, s) {
b <- glmnet:::coef.glmnet(fit, s = s)
b[b@i[-1] + 1, , drop = FALSE] |> as.matrix()
}

# lasso
X <- matrix(rnorm(100 * 10), 100, 10)
y <- rnorm(100, 2 * X[, 1] + 3 * X[, 7])
fit <- glmnet(X, y)
coef(fit, s = 0.2)

# Splines
set.seed(1)
par(mar = c(4, 4, 1.5, 1), mfrow = c(1, 3))
xx <- seq(0, 1, len = 100)
x <- sort(runif(100))
y <- rnorm(100, mean = exp(x) * (x^2) * sin(10 * x), sd = .5)
fit <- list(
  smooth.spline(x, y, all.knots = TRUE, spar = 1.4),
  smooth.spline(x, y, all.knots = TRUE, spar = 1),
  smooth.spline(x, y, all.knots = TRUE, spar = 0.7)
)
splot <- function(line) {
  main <- c(
    expression(lambda * " too large"),
    expression(lambda * " just right"),
    expression(lambda * " too small")
  )
  for (i in 1:3) {
    if (line[i]) {
      plot(x, y, las = 1, pch = 16, col = "gray80", bty = "n")
      lines(predict(fit[[i]], xx), lwd = 2, col = pal(2)[2])
    } else {
      plot(x, y, las = 1, pch = 16, bty = "n")
    }
    mtext(main[i], cex = 0.7)
  }
}
splot(c(FALSE, FALSE, FALSE))
splot(c(TRUE, FALSE, FALSE))
splot(c(TRUE, FALSE, TRUE))
splot(c(TRUE, TRUE, TRUE))