source("http://myweb.uiowa.edu/pbreheny/7210/f19/notes/fun.R")
library(survival)

PBC <- pbc
PBC$Hepato <- factor(pbc$hepato, levels=0:1, labels=c("HM: No", "HM: Yes"))

# Slide 3: Hepatomegaly (Marginal)
survdiff(Surv(time, status!=0) ~ Hepato, PBC)
fit <- survfit(Surv(time, status!=0) ~ Hepato, PBC)
Plot(fit, xscale=365.25, xlab="Time (years)")

# Slide 4: Hepatomegaly vs Stage
with(PBC, table(Hepato, stage))

# Slide 5: Stratified
par(mfrow=c(2,2), oma=c(0,0,0,6), mar=c(5,5,2,1))
for (i in 1:4) {
  fit <- survfit(Surv(time, status!=0) ~ Hepato, PBC, subset=(stage==i))
  if (i == 2) {
    Plot(fit, xscale=365.25, xlab="Time (years)", conf.int=FALSE, legend="right")
  } else {
    Plot(fit, xscale=365.25, xlab="Time (years)", conf.int=FALSE, legend="none")
  }
  if (i == 1) {
    mtext(paste0("Stage ", i), line=1)
  } else {
    Diff <- survdiff(Surv(time, status!=0) ~ Hepato, PBC, subset=(stage==i))
    p <- pchisq(Diff$chisq, 1, low=FALSE)
    mtext(paste0("Stage ", i, "; p = ", format.pval(p, digits=1, eps=1e-3)), line=1)
  }
}

# Slides 8-10: Stratified log-rank test
Diff <- survdiff(Surv(time, status != 0) ~ Hepato + strata(stage), PBC)
Tab <- cbind(Diff$obs[2,], Diff$exp[2,], Diff$obs[2,] - Diff$exp[2,])
Tab <- rbind(Tab, apply(Tab, 2, sum))
Tab

# Slide 12: GVHD and LAF
Data <- read.delim("https://s3.amazonaws.com/pbreheny-data-sets/gvhd.txt")
survdiff(Surv(Time, Status) ~ Group, Data)
survdiff(Surv(Time, Status) ~ Group + strata(LAF), Data)

# Slide 13: Stratifying on Age
cAge <- cut(Data$Age, breaks=quantile(Data$Age, 0:3/3), include.lowest=TRUE)
a <- levels(cAge)
par(mfrow=c(1,3), mar=c(5, 5, 4, 1), oma=c(0,0,3,0))
for (i in 1:3) {
  fit <- survfit(Surv(Time, Status) ~ Group, Data, subset=(cAge==a[i]))
  if (i == 3) {
    Plot(fit, xmax=80)
  } else {
    Plot(fit, xmax=80, legend="none")
  }
  Diff <- survdiff(Surv(Time, Status) ~ Group, Data, subset=(cAge==a[i]))
  p <- pchisq(Diff$chisq, 1, low=FALSE)
  mtext(paste0("Age", a[i], "; p = ", format.pval(p, digits=1, eps=1e-3)), line=2)
}

# Slide 14: Age-stratified log-rank
survdiff(Surv(Time, Status) ~ Group, Data)
survdiff(Surv(Time, Status) ~ Group + strata(cAge), Data)
