# Setup
library(data.table)
library(lmtest)
library(sandwich)
source('https://myweb.uiowa.edu/pbreheny/7110/f25/notes/fun.R')

# Quasipoisson illustration
n <- 100
g <- rexp(n, 1)
x <- runif(n)
y <- rpois(n, g * exp(x))

# Poisson
fit1 <- glm(y ~ x, family = poisson())
coeftest(fit1)

# Quasipoisson
fit2 <- glm(y ~ x, family = quasipoisson())
coeftest(fit2)

# Where are these SEs coming from?
X <- model.matrix(fit1)
eta <- drop(X %*% coef(fit1))
mu <- exp(eta)
phi <- sum((y - mu)^2 / mu) / (n - 2)
vcov(fit1)
solve(crossprod(X, mu * X))
vcov(fit2)
solve(crossprod(X, mu / phi * X))

# Sandwich (same for both models)
coeftest(fit1, vcov = sandwich)
coeftest(fit2, vcov = sandwich)

# Quasipoisson simulation
contains <- function(interval, number) {
  number >= min(interval) && number <= max(interval)
}
n <- 30
if (file.exists("res.rds")) {
  res <- readRDS("res.rds")
} else {
  res <- expand.grid(
    method = c("Poisson", "Quasi", "Sandwich"),
    replicate = 1:2000,
    Cover = NA_real_,
    Width = NA_real_
  )
  pb <- progress::progress_bar$new(total = nrow(res))
  for (i in 1:nrow(res)) {
    if (i == 1 || res$replicate[i] != res$replicate[i - 1]) {
      n <- 100
      g <- rexp(n, 1)
      x <- runif(n)
      y <- rpois(n, g*exp(x))
    }
    if (res$method[i] == "Poisson") {
      fit <- glm(y ~ x, family=poisson())
      ci <- coefci(fit, 2)
    } else if (res$method[i] == "Quasi") {
      fit <- glm(y ~ x, family = quasipoisson())
      ci <- coefci(fit, 2)
    } else {
      fit <- glm(y ~ x, family=poisson())
      ci <- coefci(fit, 2, vcov = sandwich(fit))
    }
    res$Cover[i] <- contains(ci, 1)
    res$Width[i] <- ci[2] - ci[1]
    pb$tick()
  }
  saveRDS(res, "res.rds")
}

as.data.table(res) |>
  _[, .(Coverage = mean(Cover), AvgWidth = mean(Width)), method] |>
  knitr::kable("latex", booktabs = TRUE, digits = 3, col.names = c("", "Coverage", "Average SE")) |>
  kableExtra::kable_styling(position = "center")

# Connection between IPW and true likelihood

# Likelihood
l <- function(p, n1, n0, p1, p0) {
  n1 * log(p * p1) + n0 * log((1 - p) * p0) -
    (n0 + n1) * log(p * p1 + (1 - p) * p0)
}
pp <- seq(0, 0.6, len = 501)
ll <- l(pp, 20, 40, 1, 0.5)
plotL(pp, exp(ll - max(ll)))

# Pseudo
pseudo_l <- function(p, n1, n0, p1, p0) {
  n1 / p1 * log(p) + n0 / p0 * log(1 - p)
}
pll <- pseudo_l(pp, 20, 40, 1, 0.5)
plotL(pp, exp(pll - max(pll)), col = pal(2)[1], add = TRUE)
rightlegend(legend = c("True", "Pseudo"), lwd = 3, col = pal(2)[2:1])

# Score
u <- function(p, n1, n0, p1, p0) {
  n1/p - n0/(1-p) - (n0+n1)*(p1-p0)/(p*p1+(1-p)*p0)
}
pp <- seq(0.05, 0.5, len=101)
plot(pp, u(pp, 20, 40, 1, 0.5), type='l', lwd=3, col=pal(2)[2], las=1, bty='n', xlab=expression(pi), ylab='Score', ylim=c(-100, 300))
abline(h=0, lty=2, col='gray')

# Pseudo-score
pseudo_u <- function(p, n1, n0, p1, p0) {
  (n1/p1)/p - (n0/p0)/(1-p)
}
lines(pp, pseudo_u(pp, 20, 40, 1, 0.5), lwd=3, col=pal(2)[1])
toplegend(legend=c('True', 'Pseudo'), lwd=3, col=pal(2)[2:1])

# Sample data
p1 <- 1
p0 <- 0.5
n1 <- 20
n0 <- 40
p <- n1*p0 /(n1*p0 + n0*p1)

# Compare Hessian
library(numDeriv)
-grad(u, 0.2, n1=20, n0=40, p1=1, p0=0.5)
-hessian(l, 0.2, n1=20, n0=40, p1=1, p0=0.5)
-grad(u, n1*p0 /(n1*p0 + n0*p1), n1=n1, n0=n0, p1=p1, p0=p0)
n1/p^2 + n0/(1-p)^2 - (n0+n1)*(p1-p0)^2/(p*p1+(1-p)*p0)^2

# Compare information / variance (true/pseudo/obs/exp)
IT <- n1/p^2 + n0/(1-p)^2 - (n0+n1)*(p1-p0)^2/(p*p1+(1-p)*p0)^2
1/IT
IP <- n1/(p1*p^2) + n0/(p0*(1-p)^2)
1/IP
B <- (1-p1)/p1^2 * n1/p^2 + (1-p0)/p0^2 * n0/(1-p)^2  # Kalbfleish
1/IP + B/IP^2
V <- n1/(p1*p)^2 + n0/(p0*(1-p))^2  # Robust obs
V/IP^2
EIP <- 1/p + 1/(1-p)  # Robust Fisher
EV <- 1/(p1*p) + 1/(p0*(1-p))
EV/EIP^2 * (p1*p + p0*(1-p))/(n0+n1)

# Confidence intervals
SE1 <- sqrt(1/IT)
SE2 <- sqrt(1/IP)
SE3 <- sqrt(V/IP^2)
ci1 <- p + qnorm(c(.025, .975)) * SE1
ci2 <- p + qnorm(c(.025, .975)) * SE2
ci3 <- p + qnorm(c(.025, .975)) * SE3

set.seed(3)
options(show.signif.stars = FALSE)
assignInNamespace("print.coeftest", stats:::printCoefmat, "lmtest")

# Generate data (in this case, pure noise)
y_all <- rbinom(200, size = 1, prob = 0.5)

# Set up as composite likelihood
idx = 2:(length(y_all) - 1)
y <- y_all[idx]
n <- y_all[idx - 1] + y_all[idx + 1]

# Fit
fit <- glm(y ~ n, family = binomial)

coeftest(fit)
coeftest(fit, vcov = vcovHAC)