library(hdrm)
suppressPackageStartupMessages(library(grpreg))

# Geometry ----------------------------------------------------------------

# Generate data
set.seed(2)
b <- seq(-0.5,2.1,len=101)
B <- as.matrix(expand.grid(b,b))
n <- 100
x1 <- rnorm(n)
x2 <- rnorm(n, mean=x1/2, sd=.25)
X <- cbind(x1,x2)
y <- rnorm(n, mean=X%*%c(1,1))

# Non-orthogonal
L <- matrix(apply((y-tcrossprod(X,B))^2, 2, sum), ncol=length(b))
P <- matrix(sqrt(apply(B^2,1,sum)), ncol=length(b))
Q <- L/n + 0*P
l <- min(Q)
u <- max(Q)
levels <- c(.999*l + .001*u, .993*l + .007*u, .98*l + .02*u)
contour(b, b, Q, levels=levels, drawlabels=FALSE, lwd=2, col="gray60",
        xlab=expression(beta[1]), ylab=expression(beta[2]), las=1)
lam <- exp(seq(log(.01), log(20), len=10))
Z <- matrix(NA, nrow=length(lam), ncol=2)
rownames(Z) <- lam
for (i in 1:length(lam)) {
  Q <- L/n + lam[i]*P
  ind <- arrayInd(which.min(Q),dim(Q))
  ## points(b[ind[1]], b[ind[2]], pch=19)
  Z[i,] <- c(b[ind[1]], b[ind[2]])
}
z <- coef(lm(y~0+X))
Z <- rbind(z, Z, c(0,0))
fit <- xspline(Z[,1], Z[,2], c(0,rep(1,length(lam)),0), draw=FALSE)
lines(fit$x, fit$y, lwd=3, col="blue")

# Orthogonal
XX <- grpreg:::orthogonalize(std(X), c(1,1))
L <- matrix(apply((y-tcrossprod(XX,B))^2, 2, sum), ncol=length(b))
P <- matrix(sqrt(apply(B^2,1,sum)), ncol=length(b))
Q <- L/n + 0*P
l <- min(Q)
u <- max(Q)
levels <- c(.999*l + .001*u, .993*l + .007*u, .985*l + .015*u)
contour(b, b, Q, levels=levels, drawlabels=FALSE, lwd=2, col="gray60", xlab=expression(beta[1]), ylab=expression(beta[2]), las=1)
z <- coef(lm(y~0+XX))
lines(c(0,z[1]),c(0,z[2]), lwd=3, col="blue")


# Genetic association study data ------------------------------------------

attachData('glc-amd')
Gene <- group

# Expand X
XX <- matrix(0, nrow(X), 3*ncol(X))
for (j in 1:ncol(X)) {
  XX[,(j-1)*3 + 1] <- X[,j] == 0
  XX[,(j-1)*3 + 2] <- X[,j] == 1
  XX[,(j-1)*3 + 3] <- X[,j] == 2
}
Locus <- rep(1:ncol(X), each=3)

seed <- 5
cvfit1 <- cv.grpreg(X, y, family="binomial", seed=seed, lambda.min=0.2)
cvfit2 <- cv.grpreg(X, y, group=Gene, family="binomial", seed=seed, lambda.min=0.2)
cvfit3 <- cv.grpreg(XX, y, family="binomial", seed=seed, lambda.min=0.2)
cvfit4 <- cv.grpreg(XX, y, group=Locus, family="binomial", seed=seed, lambda.min=0.2)

ylim <- c(0, 0.065)
par(mfrow=c(1,2), mar=c(5, 5, 5, 1))
plot(cvfit1, type="rsq", ylim=ylim)
mtext("Ungrouped (ordinary lasso)", line=2.5)
plot(cvfit2, type="rsq", ylim=ylim)
mtext("Group lasso", line=2.5)

par(mfrow=c(1,2), mar=c(5, 5, 5, 1))
plot(cvfit1, type="pred", ylim=c(0.35, 0.55))
mtext("Ungrouped (ordinary lasso)", line=2.5)
plot(cvfit2, type="pred", ylim=c(0.35, 0.55))
mtext("Group lasso", line=2.5)

summary(cvfit1)
summary(cvfit2)
b1 <- coef(cvfit1, lambda=cvfit1$lambda.min)
length(table(Gene[b[-1]!=0]))
predict(cvfit2, lambda=cvfit2$lambda.min, type="ngroups")
min(summary(cvfit1)$pe)
min(summary(cvfit2)$pe)
max(summary(cvfit1)$r.sq)
max(summary(cvfit2)$r.sq)

par(mfrow=c(1,2), mar=c(5, 5, 5, 1))
ylim <- c(0, 0.065)
plot(cvfit3, type="rsq", ylim=ylim)
mtext("Ungrouped (ordinary lasso)", line=2.5)
plot(cvfit4, type="rsq", ylim=ylim)
mtext("Group lasso", line=2.5)

par(mfrow=c(1,2), mar=c(5, 5, 5, 1))
plot(cvfit3, type="pred", ylim=c(0.35, 0.55))
mtext("Ungrouped (ordinary lasso)", line=2.5)
plot(cvfit4, type="pred", ylim=c(0.35, 0.55))
mtext("Group lasso", line=2.5)

summary(cvfit3)
summary(cvfit4)
min(summary(cvfit3)$pe)
min(summary(cvfit4)$pe)
max(summary(cvfit3)$r.sq)
max(summary(cvfit4)$r.sq)
