library(hdrm)
attachData(Golub1999)

# Note: I used ncvreg for lasso models here just to keep the fold assignments
# the same between MCP and lasso; the general code for fitting lasso-penalized
# logistic models is:

cvfit <- cv.glmnet(X, y, family="binomial")

cvfit.lasso <- cv.ncvreg(X, y, family="binomial", penalty="lasso", nfolds=length(y), lambda.min=0.01)
cvfit.mcp <- cv.ncvreg(X, y, family="binomial", nfolds=length(y), gamma=4)


# Deviance ----------------------------------------------------------------

plot(cvfit.lasso)
plot(cvfit.mcp)


# Rsq ---------------------------------------------------------------------

plot(cvfit.lasso, type="rsq")
plot(cvfit.mcp, type="rsq")
summary(cvfit.lasso)
summary(cvfit.mcp)

# Misclassification error -------------------------------------------------

plot(cvfit.lasso, type="pred")
plot(cvfit.mcp, type="pred")

cvfit <- cv.glmnet(X, y, family="binomial", type.measure="class") # glmnet version
plot(cvfit, xlim=rev(range(log(cvfit$lambda))), las=1)


# AUC ---------------------------------------------------------------------

cvfit.auc <- cv.glmnet(X, y, family="binomial", type.measure="auc", nfolds=5)
plot(cvfit.auc, xlim=rev(range(log(cvfit.auc$lambda))), las=1, xlab=expression(log(lambda)))
max(cvfit.auc$cvm)

# ROC curve
library(ROCR)
cvfit.lasso <- cv.ncvreg(X, y, family="binomial", penalty="lasso", nfolds=length(y), returnY=TRUE, lambda.min=0.01)
yy <- 1*(y=="AML")
pred <- prediction(cvfit.lasso$Y[,cvfit.lasso$min], yy)
perf <- performance(pred, "tpr", "fpr")
plot(perf@x.values[[1]], perf@y.values[[1]], type='s', xlab=perf@x.name, ylab=perf@y.name, bty='n', las=1, lwd=3, col='red')
performance(pred, 'auc')@y.values[[1]]
