source('https://myweb.uiowa.edu/pbreheny/7110/f24/notes/fun.R')

# Neyman-Scott Illustration
col <- c("#FF4E37FF", "#00B500FF", "#008DFFFF")
ll <- function(v, Y, m) {
  n <- length(m)
  -n*log(v) - 1/(2*v)*sum((Y[,1] -m)^2 + (Y[,2] -m)^2)
}
llv <- function(s2, Y) {
  n <- nrow(Y)
  v <- apply(Y, 1, diff)/sqrt(2)
  -(n/2)*log(s2) - (1/(2*s2))*sum(v^2)
}
lik_plots <- function(n, seed=3) {
  set.seed(seed)
  m <- rnorm(n)
  Y <- matrix(rnorm(2*n, rep(m, 2)), n, 2)
  vv <- seq(0.01, 2, len=299)
  oll <- ll(vv, Y, m)
  pll <- ll(vv, Y, apply(Y, 1, mean))
  mll <- llv(vv, Y)
  plotL(vv, exp(pll-max(pll)), col=col[1], xlab=expression(sigma^2), ylab=expression("L"*(sigma^2)))
  plotL(vv, exp(mll-max(mll)), col=col[2], add=TRUE)
  plotL(vv, exp(oll-max(oll)), col=col[3], add=TRUE)
  mtext(paste0('n = ', n))
  rightlegend(legend=c('Profile', 'Marginal', 'Oracle'), lwd=3, col=col)
}
par(mar=c(4.5, 4.5, 2, 6))
lik_plots(20)

# Linear mixed modeul illustration
set.seed(NULL)
analyze <- function(x, y, s, method, u) {
  if (method == 'mle') {
    fit <- lm(y ~ x + as.factor(s))
    v <- sum(fit$residuals^2)/(n*m)
    b <- coef(summary(fit))['x',]
  } else if (method == 'marginal') {
    fit <- lme4::lmer(y ~ x + (1|s))
    v <- summary(fit)$sigma^2
    b <- coef(summary(fit))['x',]
  } else if (method == 'oracle') {
    yy <- y-u
    fit <- lm(yy ~ x)
    v <- sum(fit$residuals^2)/(n*m)
    b <- coef(summary(fit))['x',]
  } else if (method == 'naive') {
    fit <- lm(y ~ x)
    v <- summary(fit)$sigma^2
    b <- coef(summary(fit))['x',]
  } else if (method == 'diff') {
    n <- length(y)
    yy <- y[2*(1:n)] - y[2*(1:n)-1]
    xx <- x[2*(1:n)] - x[2*(1:n)-1]
    fit <- lm(yy ~ 0 + xx)
    v <- sum(fit$residuals^2)/(n)
    b <- coef(summary(fit))['xx',]
  }
  out <- data.frame(Estimate=b['Estimate'], SE=b['Std. Error'], Variance=v)
  rownames(out) <- method
  out
}
set_rownames <- function(df, rownames) {
  rownames(df) <- rownames
  df
}

# Generate data
set.seed(4)
n <- 100
m <- 2
x <- runif(n*m)
u <- rep(rnorm(n), each=m)
y <- rnorm(m*n, x+u)
s <- rep(1:n, each=m)

# Analysis
rbind(analyze(x, y, s, 'oracle', u),
      analyze(x, y, s, 'marginal'),
      analyze(x, y, s, 'diff'),
      analyze(x, y, s, 'mle'),
      analyze(x, y, s, 'naive')) |>
  set_rownames(c('Oracle', 'Marginal', 'Differencing', 'Profile', 'Naive')) |>
  knitr::kable('latex', booktabs=TRUE, digits=2) |>
  kableExtra::kable_styling(position = "center")

# LMM: What if u is correlated with x?
set.seed(4)
n <- 1000
m <- 2
u <- rep(rnorm(n), each=m)
x <- rnorm(m*n, u)
y <- rnorm(m*n, x+u)
s <- rep(1:n, each=m)
rbind(analyze(x, y, s, 'oracle', u),
      analyze(x, y, s, 'marginal'),
      analyze(x, y, s, 'diff'),
      analyze(x, y, s, 'mle'),
      analyze(x, y, s, 'naive')) |>
  set_rownames(c('Oracle', 'Marginal', 'Differencing', 'Profile', 'Naive')) |>
  knitr::kable('latex', booktabs=TRUE, digits=2) |>
  kableExtra::kable_styling(position = "center")

# Quadrature example
z <- rnorm(1000000)
mean(sqrt(abs(z)+abs(z^3)))
GH <- lme4::GHrule(20, FALSE)
sum(GH$w * sqrt(abs(GH$z)+abs(GH$z^3)))
GH <- lme4::GHrule(100, FALSE)
sum(GH$w * sqrt(abs(GH$z)+abs(GH$z^3)))

# Quadrature: Variance of median
n <- 11
m <- replicate(100000, median(rnorm(n)))
f <- function(x) {
n * choose(n-1, 5) * x^2 * pnorm(x)^5 * (1-pnorm(x))^5
}
GH20 <- lme4::GHrule(20, FALSE)
GH100 <- lme4::GHrule(100, FALSE)
data.frame(Variance=c(
var(m),
pi / (2*n),
sum(GH20$w * f(GH20$z)),
sum(GH100$w * f(GH100$z))),
row.names=c(
  'Monte Carlo ($N=100,000$)',
  'Asymptotic',
  'Gauss-Hermite ($K=20$)',
  'Gauss-Hermite ($K=100$)')) |>
knitr::kable('latex', booktabs=TRUE, digits=4, escape=FALSE) |>
kableExtra::kable_styling(position = "center")

# Logistic simulation
library(lme4)
library(survival)

if (file.exists('res.rds')) {
  res <- readRDS('res.rds')
} else {
  N <- 1000
  res <- data.frame(naive = rep(NA_real_, N), profile = rep(NA_real_, N), conditional = rep(NA_real_, N), marginal = rep(NA_real_, N))
  pb <- progress::progress_bar$new(total=N)
  for (i in 1:N) {
    # Generate data
    n <- 100
    m <- 2
    x <- rnorm(n*m)
    u <- rep(rnorm(n, sd=2), m)
    y <- rbinom(n*m, 1, binomial()$linkinv(x+u))
    s <- rep(1:n, m)
    
    # Naive
    fit <- glm(y ~ x, family='binomial')
    res$naive[i] <- coef(fit)['x']
    
    # Profile
    fit <- glm(y ~ x + as.factor(s), family='binomial')
    res$profile[i] <- coef(fit)['x']
    
    # Conditional
    fit <- clogit(y ~ x + strata(s), data.frame(y=y, x=x, s=s))
    res$conditional[i] <- coef(fit)['x']
    
    # Marginal
    fit <- glmer(y ~ x + (1|s), family='binomial') |> suppressMessages()
    res$marginal[i] <- fixef(fit)['x']
    
    pb$tick()
  }
  saveRDS(res, 'res.rds')
}

# Display
data.frame(Mean = colMeans(res), SE = sapply(res, sd), RMSE = sqrt(sapply(res - 1, crossprod)/nrow(res))) |>
  magrittr::set_rownames(c('Naive', 'Profile', 'Conditional', 'Marginal')) |>
  knitr::kable('latex', booktabs=TRUE, digits=2) |>
  kableExtra::kable_styling(position = "center")

