diarrhea <- read.delim("https://raw.githubusercontent.com/IowaBiostat/data-sets/main/diarrhea/diarrhea.txt")
boxplot(diarrhea$Stool~diarrhea$Group, main = "Effect of Bismuth Salicylate", 
        xlab = "Stool Output (mL/kg)", ylab = "Group", horizontal = T, col = c("lightblue", "pink"))

diarrhea$logStool <- log(diarrhea$Stool)

boxplot(diarrhea$logStool~diarrhea$Group, main = "Effect of Bismuth Salicylate", 
        xlab = "Log(Stool Output)", ylab = "Group", horizontal = T, col = c("lightblue", "pink"))

library(ggplot2)

#Original
ggplot(diarrhea, aes(x = Stool)) + geom_histogram(fill = "tomato3", color = "black", bins = 20) + facet_wrap(~Group)

#Log-transformed
ggplot(diarrhea, aes(x = log(Stool))) + geom_histogram(fill = "tomato3", color = "black", bins = 20) + facet_wrap(~Group)

# Original
t.test(diarrhea$Stool ~ diarrhea$Group, var.equal = TRUE)

# log-transformed
t.test(diarrhea$logStool ~ diarrhea$Group, var.equal = TRUE)

by(diarrhea$logStool, diarrhea$Group, mean)

estimate <- 5.2124-4.8706 # difference in mean(log_stool)
exp(estimate) # exponentiate to get estimate of diff in stool output

logCI <- t.test(diarrhea$logStool~diarrhea$Group,var.equal=TRUE)$conf.int
exp(logCI)

log_stool_treat <- diarrhea$logStool[diarrhea$Group == "Treatment"]

(x_bar <- mean(log_stool_treat))
(s <- sd(log_stool_treat))
(n <- length(log_stool_treat)) # length() gives us n
(SE <- s/sqrt(n))
(df <- n - 1)
(t_df <- qt(0.025, df))

(log_CI <- x_bar + c(t_df, -t_df) * SE) # CI for LOG_stool, still need to exponentiate it

(CI <- exp(log_CI))

# Rank-Sum Test
wilcox.test(diarrhea$Stool~diarrhea$Group)

# Signed Rank Test
oatbran <- read.delim('https://raw.githubusercontent.com/IowaBiostat/data-sets/main/oatbran/oatbran.txt')
wilcox.test(oatbran$CornFlakes, oatbran$OatBran, paired=TRUE)

# For comparison, in case you don't remember:
t.test(oatbran$CornFlakes, oatbran$OatBran, paired=TRUE)

nhanes <- read.delim('https://raw.githubusercontent.com/IowaBiostat/data-sets/main/nhanes-am/nhanes-am.txt')
plot(nhanes$Weight~nhanes$Height)

## #Spearman's Rank Correlation
## cor.test(nhanes$Height, nhanes$Weight, method = "spearman")
## 
## #For comparison, here's what we got using Pearson's correlation:
## cor.test(nhanes$Height, nhanes$Weight, method = "pearson")

set.seed(58008)
icecream <- as.data.frame(matrix(c(rep("A", 40), rep("B", 60)), nrow = 100))
colnames(icecream) <- "group"
var1 <- rlnorm(40, meanlog = 0, sdlog = 1)
var2 <- rgamma(60, shape = 1, rate = 0.5)

icecream$cups <- c(var1, var2)

ggplot(icecream, aes(x = cups)) + geom_histogram(fill = "pink", color = "black", bins = 20) + facet_wrap(~group)

icecream$logcups <- log(icecream$cups) # transform
ggplot(icecream, aes(x = log(cups))) + geom_histogram(fill = "pink", color = "black", bins = 20) + facet_wrap(~group) # visualize

by(icecream$cups, icecream$group, sd)
table(icecream$group)

t.test(icecream$logcups ~ icecream$group, var.equal = TRUE)

ci <- t.test(icecream$logcups ~ icecream$group, var.equal = TRUE)$conf.int
exp(ci)

fibrosis <- read.delim('https://raw.githubusercontent.com/IowaBiostat/data-sets/main/cystic-fibrosis/cystic-fibrosis.txt')

hist(fibrosis$Drug)
hist(fibrosis$Placebo)

wilcox.test(fibrosis$Drug, fibrosis$Placebo, paired=TRUE)

mean(fibrosis$Drug)
mean(fibrosis$Placebo)

nhanes_woman <- read.delim('https://raw.githubusercontent.com/IowaBiostat/data-sets/main/nhanes-aw/nhanes-aw.txt')

plot(nhanes_woman$Weight~nhanes_woman$Height)

cor.test(nhanes_woman$Height, nhanes_woman$Weight, method = "spearman")
cor.test(nhanes_woman$Height, nhanes_woman$Weight, method = "pearson")
