# Rank AFT: Setup
library(survival)
source("http://myweb.uiowa.edu/pbreheny/7210/f19/notes/fun.R")
test <- function(r, dd, xx) {
  J <- length(r)
  ind <- order(r)
  x <- xx[ind]
  d <- dd[ind]
  n <- J - (1:J) + 1
  m <- rev(cumsum(rev(x)))/n
  a <- cumsum(d/n)
  w <- sum(d*(x-m))
  v <- sum(a*x^2) - sum(d*m^2)
  w/sqrt(v)
}

# Pike Rat
Data <- read.delim("https://s3.amazonaws.com/pbreheny-data-sets/Pike1966.txt")
ind <- order(Data$Time)
Data <- Data[ind,]
y <- log(Data$Time)
d <- 1*(Data$Death)
x <- as.numeric(Data$Group)-1
bb <- seq(-0.2,0.6,len=199)
z <- sapply(bb, function(b) {test(y-x*b, d, x)})
plot(bb, z^2, type='s', bty='n', lwd=3, las=1,
     xlab=expression(beta), ylab=expression(chi^2))
abline(h=qchisq(.95,1), col='gray')

# Comparison table
B <- matrix(NA, 4, 3, dimnames=list(c('Rank-based', 'Weibull', 'Exponential', 'Lognormal'), c('Est', 'Lower', 'Upper')))
B[1,] <- c(bb[which.min(abs(z))],
           bb[which.min(abs(z+1.96))],
           bb[which.min(abs(z-1.96))])
fit <- survreg(Surv(Data$Time, d)~x)
B[2,] <- c(coef(fit)[2], confint(fit, 2))
fit <- survreg(Surv(Data$Time, d)~x, dist='exponential')
B[3,] <- c(coef(fit)[2], confint(fit, 2))
fit <- survreg(Surv(Data$Time, d)~x, dist='lognormal')
B[4,] <- c(coef(fit)[2], confint(fit, 2))
B

# Simulated data
set.seed(3)
n <- 1000
x <- rbinom(n, 1, 0.5)
lam <- exp(-x)
yy <- rweibull(n, 1.5, 1/lam)
cc <- rexp(n)
y <- log(pmin(yy, cc))
d <- yy <= cc
bb <- seq(0,2,len=199)
z <- sapply(bb, function(b) {test(y-x*b, d, x)})
plot(bb, z^2, type='s', bty='n', lwd=3, las=1,
     xlab=expression(beta), ylab=expression(chi^2))
abline(h=qchisq(.95,1), col='gray')

# Comparison table
B <- matrix(NA, 4, 3, dimnames=list(c('Rank-based', 'Weibull', 'Exponential', 'Lognormal'), c('Est', 'Lower', 'Upper')))
B[1,] <- c(bb[which.min(abs(z))],
           bb[which.min(abs(z+1.96))],
           bb[which.min(abs(z-1.96))])
fit <- survreg(Surv(exp(y), d)~x)
B[2,] <- c(coef(fit)[2], confint(fit, 2))
fit <- survreg(Surv(exp(y), d)~x, dist='exponential')
B[3,] <- c(coef(fit)[2], confint(fit, 2))
fit <- survreg(Surv(exp(y), d)~x, dist='lognormal')
B[4,] <- c(coef(fit)[2], confint(fit, 2))
B

# Simulation study
N <- 1000
n <- c(20, 40, 80, 160, 320)
cov <- len <- array(NA, dim=c(N, length(n), 2), dimnames=list(1:N, n, c("Rank", "Weibull")))
f <- function(b, y, x, d) {
  B <- length(b)
  res <- numeric(B)
  for (i in 1:B) {
    res[i] <- test(y-x*b[i], d, x)^2 - qchisq(.95, 1)
  }
  res
}
pb <- txtProgressBar(0, N, style=3)
for (i in 1:N) {
  for (j in 1:length(n)) {
    x <- rbinom(n[j], 1, 0.5)
    lam <- exp(-x)
    yy <- rweibull(n[j], 1.5, 1/lam)
    y <- log(yy)
    d <- rep(1, length(y))
    b1 <- min(y[x==1]) - max(y[x==0])
    b2 <- max(y[x==1]) - min(y[x==0])
    est <- optimize(f, interval=c(b1, b2), y=y, x=x, d=d)$minimum
    lwr <- try(uniroot(f, y=y, x=x, d=d, lower=b1, upper=est)$root, silent=TRUE)
    upr <- try(uniroot(f, y=y, x=x, d=d, lower=est, upper=b2)$root, silent=TRUE)
    if (class(lwr) != "numeric") lwr <- -Inf
    if (class(upr) != "numeric") upr <- Inf
    len[i,j,1] <- upr-lwr
    cov[i,j,1] <- (upr > 1) & (lwr < 1)
    fit <- survreg(Surv(exp(y), d)~x)
    lwr <- confint(fit)[2,1]
    upr <- confint(fit)[2,2]
    len[i,j,2] <- upr-lwr
    cov[i,j,2] <- (upr > 1) & (lwr < 1)
  }
  setTxtProgressBar(pb, i)
}
matplot(n, apply(cov, 2:3, mean), ylab="Coverage", col=pal(2), lwd=3, type="l", lty=1, ylim=c(.85, 1), las=1, bty="n")
toplegend(legend=dimnames(len)[[3]], col=pal(2), lwd=3)
L <- apply(len, 2:3, median)
plot(n, L[,1]/L[,2], ylab="Length ratio (R/W)", col=pal(2), lwd=3, type="l", lty=1, las=1, bty="n", ylim=c(0.9, max(L[,1]/L[,2])))
abline(h=1, lty=2, col="gray", lwd=3)
