library(hdrm)
library(hqreg)


# Multinomial -------------------------------------------------------------

attachData(Ramaswamy2001)

# Slide 8
cvfit <- cv.glmnet(X, y, family="multinomial")

# Slide 9
plot(cvfit, xlim=rev(range(log(cvfit$lambda))), xlab=expression(log(lambda)), las=1)

# Slide 10
coef(cvfit)[[1]][5466+1]

# Slide 11
summaryTable <- function(cvfit, X, y) {
  fit <- cvfit$glmnet.fit
  lambda <- cvfit$lambda.min
  pred <- factor(predict(fit, X, type="class", s=lambda), levels=levels(y))
  tab <- table(pred,y)
  tab <- round(100*t(t(table(pred,y))/as.numeric(table(y))))
  rownames(tab) <- colnames(tab) <- abbreviate(levels(y), minlength=2)
  tab <- rbind(tab, table(y))
  tab <- cbind(tab, c(table(pred), length(y)))
  print(mean(pred==y))
  return(tab)
}
summaryTable(cvfit, X.test, y.test)


# Robust regression -------------------------------------------------------

# This takes a while to run
# Feel free to reduce N or the number of sigma values to get it to run faster
# Unfortunately no way to get rid of CV fold updates from hqreg; I should contact maintainer
n <- p <- 100
N <- 100
sig <- seq(1, 10, 1)
J <- length(sig)
MSE <- array(NA, dim=c(N, J, 2), dimnames=list(1:N, sig, c("Least squares", "Huber")))
pb <- txtProgressBar(1, N, style=3)
for (i in 1:N) {
  for (j in 1:J) {
    X <- matrix(rnorm(n*p), n, p)
    messy <- rbinom(n, 1, prob=0.1)
    b <- rep(1:0, c(2, p-2))
    y <- X%*%b + (1-messy)*rnorm(n) + messy*rnorm(n, sd=sig[j])

    cvfit.ls <- cv.glmnet(X, y)
    cvfit.hq <- cv.hqreg(X, y)

    b.ls <- coef(cvfit.ls, s=cvfit.ls$lambda.min)[-1]
    b.hq <- predict(cvfit.hq, type="coef", lambda="lambda.min")[-1]

    MSE[i,j,1] <- crossprod(b-b.ls)
    MSE[i,j,2] <- crossprod(b-b.hq)
  }
  setTxtProgressBar(pb, i)
}
mse <- apply(MSE, 2:3, mean)
matplot(sig, mse, type="l", col=hdrm:::pal(2), lwd=3, lty=1, las=1, bty="n",
        xlab=expression(sigma), ylab="MSE")
hdrm:::toplegend(legend=dimnames(MSE)[[3]], col=hdrm:::pal(2), lwd=3)


# Cox regression ----------------------------------------------------------

# Slide 26
attachData(Shedden2008)
ZZ <- cbind(Age=as.numeric(Z$Age),
            Sex=Z$Sex=="Male",
            Chemo=Z$AdjChemo=="Yes")
XX <- cbind(ZZ, std(X))
w <- rep(0:1, c(ncol(ZZ), ncol(X)))
cvfit.las1 <- cv.glmnet(XX, S, family="cox", penalty.factor=w, lambda.min=0.35)
cvfit.las2 <- cv.ncvsurv(XX, S, penalty="lasso", penalty.factor=w, lambda.min=0.35, trace=TRUE, seed=3)
cvfit.mcp <- cv.ncvsurv(XX, S, penalty.factor=w, lambda.min=0.5, trace=TRUE, seed=3)

# Slides 27-28
plot(cvfit.las1, xlim=rev(range(log(cvfit.las1$lambda))), las=1)
plot(cvfit.las2)
plot(cvfit.mcp)

# Slide 29
b <- coef(cvfit.las2)
b["205308_at"]
exp(b["205308_at"])
b <- coef(cvfit.mcp)
b["205308_at"]
exp(b["205308_at"])
