# Setup
require(survival)
source("http://myweb.uiowa.edu/pbreheny/7210/f15/notes/fun.R")
PBC <- subset(pbc, !is.na(trt))
PBC <- PBC[order(PBC$time),]
GVHD <- read.delim("http://myweb.uiowa.edu/pbreheny/data/gvhd.txt")

# Slide 3
fit <- coxph(Surv(time/365.25, status!=0) ~ trt + stage + hepato + bili, PBC)
e <- fit$y[,2]-residuals(fit)
efit <- survfit(Surv(e, fit$y[,2])~1)
lim <- c(0,2)
plot(efit, fun='cumhaz', mark.time=FALSE, bty='n', conf.int=FALSE, lwd=2, las=1,
     xlab='Residual', ylab='Cumulative hazard', xlim=lim, ylim=lim)
ciband(efit, fun=function(x) -log(x))
lines(lim, lim, col='red', lwd=2)

# Slide 9
par(mar=c(5,6,0.1,0.1))
plot(log(efit$time), (-log(efit$surv) - efit$time)/efit$std.err, type='l', col='red', lwd=2, bty='n', las=1, ylim=c(-3,3),
     xlab='Residual', ylab=expression((hat(Lambda)(e)-e)/SE), xaxt='n')
at <- seq(-4, 1, len=4)
axis(1, at=at, lab=round(exp(at), 2))
abline(h=c(-2,2), col='gray')

# Slide 6
set.seed(14)
n <- 500
x <- rbinom(n, 1, 0.5)
y <- 2*x + rnorm(n)
fit <- coxph(Surv(exp(y)) ~ x)
e <- fit$y[,2]-residuals(fit)
efit <- survfit(Surv(e, fit$y[,2])~1)
lim <- c(0, 6)
plot(efit, fun='cumhaz', mark.time=FALSE, bty='n', conf.int=FALSE, lwd=2, las=1,
     xlab='Residual', ylab='Cumulative hazard', xlim=lim, ylim=lim)
ciband(efit, fun=function(x) -log(x))
lines(lim, lim, col='red', lwd=2)

# Slide 7
lim <- c(0, 0.25)
plot(efit, fun='cumhaz', mark.time=FALSE, bty='n', conf.int=FALSE, lwd=2, las=1,
     xlab='Residual', ylab='Cumulative hazard', xlim=lim, ylim=lim)
ciband(efit, fun=function(x) -log(x))
lines(lim, lim, col='red', lwd=2)

# Slide 8
par(mar=c(5,6,0.1,0.1))
plot(log(efit$time), (-log(efit$surv) - efit$time)/efit$std.err, type='l', col='red', lwd=2, bty='n', las=1, ylim=c(-3,3),
     xlab='Residual', ylab=expression((hat(Lambda)(e)-e)/SE), xaxt='n')
at <- seq(-6, 2, 2)
axis(1, at=at, lab=round(exp(at), 2))
abline(h=c(-2,2), col='gray')

# Slide 10
fit <- coxph(Surv(Time, Status) ~ Group, GVHD)
e <- fit$y[,2]-residuals(fit)
efit <- survfit(Surv(e, fit$y[,2])~1)
lim <- c(0, max(e))
plot(efit, fun='cumhaz', mark.time=FALSE, bty='n', conf.int=FALSE, lwd=2, las=1,
     xlab='Residual', ylab='Cumulative hazard', xlim=lim, ylim=lim)
ciband(efit, fun=function(x) -log(x))
lines(lim, lim, col='red', lwd=2)

# Slide 11
efit <- survfit(Surv(e, fit$y[,2])~GVHD$Group)
lim <- c(0, max(e))
plot(efit, fun='cumhaz', mark.time=FALSE, bty='n', conf.int=FALSE, lwd=2, col=pal(2), las=1,
     xlab='Residual', ylab='Cumulative hazard', xlim=lim, ylim=lim)
lines(lim, lim, col='gray', lwd=2)

# Slide 15
fit <- coxph(Surv(time/365.25, status!=0) ~ trt + stage + hepato + bili, PBC)
r <- residuals(fit)
plot(PBC$time/365.25, residuals(fit), pch=19, las=1, bty='n', col=pal(2)[fit$y[,2]+1],
     xlab='Time', ylab='Martingale residual')
toplegend(legend=c("Censored", "Died / Liver failure"), pch=19, col=pal(2))

# Slide 20
fit <- coxph(Surv(time/365.25, status!=0) ~ trt + stage + hepato + bili, PBC)
r <- residuals(fit, type='deviance')
plot(PBC$time/365.25, r, pch=19, bty='n', las=1, col=pal(2)[fit$y[,2]+1],
     xlab='Time', ylab='Deviance residual')
toplegend(legend=c("Censored", "Died / Liver failure"), pch=19, col=pal(2))

# AR question
m <- residuals(fit, type='martingale')
r <- residuals(fit, type='deviance')
d <- fit$y[,2]==1
cor(m, r, method='spearman')
cor(m[d], r[d], method='spearman')
cor(m[!d], r[!d], method='spearman')

# Slide 22
cbind(fit$y, model.matrix(fit), r)[order(r)[1:5],]
cbind(fit$y, model.matrix(fit), r, rank(-fit$linear.predictors))[order(-r)[1:5],]

# Slide 24
plot(PBC$albumin, r, pch=19, bty='n', las=1, col='gray60',
     xlab='Albumin', ylab='Deviance residual')
lines(lowess(PBC$albumin, r), col='red', lwd=3)

# Slide 27
require(visreg)
f <- function(x) {pmin(x, 3.5)}
fit1 <- coxph(Surv(time/365.25, status!=0) ~ trt + stage + hepato + bili + f(albumin), PBC)
visreg(fit1, 'albumin', xlab='Albumin', ylab="Linear predictor")
f2 <- function(x) {cbind(x, (x-3.5)*(x-3.5 > 0))}
fit2 <- coxph(Surv(time/365.25, status!=0) ~ trt + stage + hepato + bili + f2(albumin), PBC)
visreg(fit2, 'albumin', xlab='Albumin', ylab="Linear predictor")
visreg(fit1, 'albumin', xlab='Albumin', ylab="Hazard ratio", trans=exp, ylim=c(0, 8))

# Slide 28
fit0 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + bili, PBC)
fit1 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + bili + albumin, PBC)
fit2 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + bili + f(albumin), PBC)
fit3 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + bili + f2(albumin), PBC)
AIC(fit0)
AIC(fit1)
AIC(fit2)
AIC(fit3)

# Slide 29
fit0 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + f(albumin), PBC)
fit1 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + bili + f(albumin), PBC)
r0 <- residuals(fit0, type='deviance')
r1 <- residuals(fit1, type='deviance')
par(mfrow=c(1,2), mar=c(5, 5, 2, 1))
plot(PBC$bili, r0, pch=19, bty='n', las=1, col='gray60',
     xlab='Bilirubin', ylab='Deviance residual')
mtext('without bili')
lines(lowess(PBC$bili, r0), col='red', lwd=3)
plot(PBC$bili, r1, pch=19, bty='n', las=1, col='gray60',
     xlab='Bilirubin', ylab='Deviance residual')
mtext('with bili')
lines(lowess(PBC$bili, r1), col='red', lwd=3)

# Slide 30
fit <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + log(bili) + f(albumin), PBC)
visreg(fit, 'bili', xlab='Bilirubin', ylab="Linear predictor")

# Slide 31
2^coef(fit)['log(bili)']

# Slide 33
fit <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + pspline(bili) + f(albumin), PBC)
visreg(fit, 'bili', xlab='Bilirubin', ylab="Linear predictor")

# Slide 34
fit0 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + f(albumin), PBC)
fit1 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + bili + f(albumin), PBC)
fit2 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + log(bili) + f(albumin), PBC)
fit3 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + pspline(bili) + f(albumin), PBC)
AIC(fit0)
AIC(fit1)
AIC(fit2)
AIC(fit3)

# Slide 35
PBC$Stage <- factor(PBC$stage)
fit <- coxph(Surv(time, status!=0) ~ trt + Stage + hepato + log(bili) + f(albumin), PBC)
visreg(fit, 'Stage', ylab="Linear predictor")

# Slide 39
fit <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + log(bili) + f(albumin), PBC)
D <- residuals(fit, type='dfbeta')
par(mar=c(6,6,5,1))
plot(D[,1], type='h', col=pal(2)[PBC$trt], bty='n', las=1, lwd=1.5,
     xlab='Index (ordered by time on study)', ylab=expression(hat(Delta)[ij]))
toplegend(legend=1:2, lwd=1.5, col=pal(2))
mtext("Treatment:", line=3)

# Slide 40
par(mar=c(6,6,5,1))
plot(D[,2], type='h', col=pal(4)[PBC$Stage], bty='n', las=1, lwd=1.5,
     xlab='Index (ordered by time on study)', ylab=expression(hat(Delta)[ij]))
toplegend(legend=levels(PBC$Stage), lwd=1.5, col=pal(4))
mtext("Stage:", line=3)

# Slide 41
xx <- log(PBC$bili)
xx <- (xx - min(xx)) / (diff(range(xx)))
RGB <- colorRamp(c(pal(2)[2], 'gray60', pal(2)[1]))(xx)
col <- apply(RGB, 1, function(x) rgb(x[1], x[2], x[3], max=255))
par(mar=c(6,6,5,1))
plot(D[,4], type='h', col=col, bty='n', las=1, lwd=1.5,
     xlab='Index (ordered by time on study)', ylab=expression(hat(Delta)[ij]))
toplegend(legend=c("High bili", "Low bili"), lwd=1.5, col=pal(2))

# Slide 41 (bili on original scale)
fit2 <- coxph(Surv(time/365.25, status!=0) ~ trt + stage + hepato + bili + f(albumin), PBC)
D2 <- residuals(fit2, type='dfbeta')
xx <- PBC$bili
xx <- (xx - min(xx)) / (diff(range(xx)))
RGB <- colorRamp(c(pal(2)[2], 'gray60', pal(2)[1]))(xx)
col <- apply(RGB, 1, function(x) rgb(x[1], x[2], x[3], max=255))
par(mar=c(6,6,5,1))
plot(D2[,4], type='h', col=col, bty='n', las=1, lwd=1.5,
     xlab='Index (ordered by time on study)', ylab=expression(hat(Delta)[ij]))
toplegend(legend=c("High bili", "Low bili"), lwd=1.5, col=pal(2))
