source('https://myweb.uiowa.edu/pbreheny/7110/f20/notes/fun.R')

# Binomial proportions: Penalty function
p <- seq(0.001, 0.999, len=101)
plot(p, -log(p) - log(1-p), type='l', las=1, lwd=3, col='slateblue', bty='n', xlab=expression(pi), ylab=expression(p(pi)))

# Binomial proportion: likelihood and PL
n <- 20
x <- 3
theta <- seq(0.001, 0.4, len=499)
L <- plotL(theta, dbinom(x, n, theta))
L <- plotL(theta, dbinom(x+2, n+4, theta), add=TRUE, col=pal(2)[1])
plotl(theta, dbinom(x, n, theta, log=TRUE), col=pal(2)[1])
plotl(theta, dbinom(x+2, n+4, theta, log=TRUE), add=TRUE)
toplegend(legend=c('Standard', 'Penalized'), col=pal(2), lwd=3)
p <- x/n
V <- p*(1-p)/n
lines(theta, -(p-theta)^2/V, lty=2, col=pal(2)[1])
p <- (x+2)/(n+4)
V <- p*(1-p)/(n+4)
lines(theta, -(p-theta)^2/V, lty=2, col=pal(2)[2])

# Binomial proportion: function
ci <- function(x, n, alpha=0.05, lam=1) {
  xx <- x + lam - 1
  nn <- n + 2*lam - 2
  p <- xx/nn
  SE <- sqrt(p*(1-p)/nn)
  cbind(p+qnorm(0.025)*SE, p + qnorm(0.975)*SE)
}
covered <- function(interval, p) {
  interval[1] <= p & interval[2] >= p
}

# Binomial proportion: sim
N <- 2000
n <- seq(10, 50, 10)
p <- 0.1
Coverage <- matrix(NA, length(n), 2, dimnames=list(n, c('Standard', 'Penalized')))
for (i in 1:length(n)) {
  x <- rbinom(N, n[i], p)
  W <- ci(x, n[i])
  Coverage[i, 1] <- mean(apply(W, 1, covered, p=p))
  W <- ci(x, n[i], lam=3)
  Coverage[i, 2] <- mean(apply(W, 1, covered, p=p))
}
matplot(n, Coverage, bty='n', type='l', lwd=3, col=pal(2), lty=1, las=1)
abline(h=0.95, col='gray', lty=2)
toplegend(legend=colnames(Coverage), col=pal(2), lwd=3)

# Complete separation
y <- rep(0:1, each=200)
x1 <- rnorm(400)
x2 <- rep(0, 400); x2[333] <- 1
fit <- glm(y ~ x1 + x2, family=binomial, control=glm.control(epsilon=1e-20))
summary(fit)$coef[,c(1,2,4)]

# Firth
library(logistf)
fit <- logistf(y ~ x1 + x2, pl=FALSE)
summary(fit)

# Test varcov
X <- cbind(1, x1, x2)
p <- fit$predict
W <- diag(p*(1-p))
solve(crossprod(X, W%*%X))
fit$var
head(fit$hat.diag)
boxplot(fit$hat.diag, las=1, bty='n')

# Ridge
library(MASS)
set.seed(11)
x1 <- rnorm(20)
x2 <- rnorm(20, mean=x1, sd=.01)
y <- rnorm(20, mean=3+x1+x2)
coef(lm(y~x1+x2))
coef(lm.ridge(y~x1+x2, lambda=1))

# Lasso
X <- matrix(rnorm(100*10), 100, 10)
y <- rnorm(100, X[,1] + X[,7], sd=0.5)

# Splines
set.seed(1)
xx <- seq(0, 1, len=100)
x <- sort(runif(100))
y <- rnorm(100, mean=exp(x)*(x^2)*sin(10*x), sd=.5)
fit <- list(smooth.spline(x, y, all.knots=TRUE, spar=1.4),
            smooth.spline(x, y, all.knots=TRUE, spar=1),
            smooth.spline(x, y, all.knots=TRUE, spar=0.7))
splot <- function(line) {
  main <- c(expression(lambda*' too large'),
            expression(lambda*' just right'),
            expression(lambda*' too small'))
  for (i in 1:3) {
    if (line[i]) {
      plot(x, y, las=1, pch=16, col='gray80', bty='n')
      lines(predict(fit[[i]], xx), lwd=2, col=pal(2)[2])
    } else {
      plot(x, y, las=1, pch=16, bty='n')
    }
    mtext(main[i])
  }
}
splot(c(FALSE, FALSE, FALSE))
splot(c(TRUE, FALSE, FALSE))
splot(c(TRUE, FALSE, TRUE))
splot(c(TRUE, TRUE, TRUE))