tips <- read.delim('https://raw.githubusercontent.com/IowaBiostat/data-sets/main/tips/tips.txt')

tips$tip_perc <- (tips$Tip/tips$TotBill)*100
summary(tips$tip_perc)

plot(x = tips$TotBill, y = tips$Tip)

model <- lm(Tip ~ TotBill, data = tips)
model

# You can omit the 'data =' argument if you prefer by feeding the data directly into the vectors using the $ operator. This will produce the same result:

model <- lm(tips$Tip ~ tips$TotBill)

# Getting our correlation between tip and total bill: 
tip_bill_corr <- cor(tips$Tip, tips$TotBill)
tip_bill_corr

# Note that the ratio takes the sd of our y variable as the numerator, and the sd of our x variable as the denominator
tip_bill_corr * sd(tips$Tip) / sd(tips$TotBill)

plot(tips$TotBill, tips$Tip)
abline(model,
       col = "red",
       lwd = 2)

# Note that the correlation from switching the variables is the same:
cor(tips$Tip, tips$TotBill)
cor(tips$TotBill, tips$Tip)

# Creating our models
model <- lm(Tip ~ TotBill, data = tips) # original model y ~ x
model_inverted <- lm(TotBill ~ Tip, data = tips) # switching to x ~ y

par(mfrow = c(1,2))

# Original model
plot(tips$TotBill, tips$Tip,
       xlab = "Total Bill",
       ylab = "Tips")
abline(model,
       col = "red",
       lwd = 2)

# Inverted model- note that the axes are swapped
plot(tips$Tip, tips$TotBill,
       xlab = "Tips",
       ylab = "Total Bill")
abline(model_inverted,
       col = "blue",
       lwd = 2)

# Return the plots window to normal
par(mfrow = c(1,1))

cor(tips$TotBill, tips$tip_perc)

model2 <- lm(tip_perc ~ TotBill, data = tips)
model2

plot(tips$TotBill, tips$tip_perc,
     xlab = "Total Bill",
     ylab = "Tip Rate")
abline(model2,
       col = "red",
       lwd = 2)

gender_tab <- table(tips$Sex) 
gender_tab # number of men/women
prop.table(gender_tab) # proportion of men and women

gender_vs_time <- table(tips$Sex, tips$Time)
prop.table(gender_vs_time, 2) # proportion of female/male given time of day
barplot(gender_vs_time,
        col = c("cyan4", "coral3"),
        main ="Distribution of Bill Payment by Gender and Time of Day",
        ylab = "Frequency")
legend(x = "topleft", 
       legend = c("Male", "Female"), 
       title = "Gender", 
       fill = c("coral3", "cyan4"),
       cex = 0.8)

by(tips$tip_perc, tips$Smoker, summary)
boxplot(tips$tip_perc ~ tips$Smoker,
        xlab = "Smoker in Party?",
        ylab = "Tip Rate")

by(tips$tip_perc, tips$Time, summary)
boxplot(tips$tip_perc ~ tips$Time,
        xlab = "Time",
        ylab = "Tip Rate")

by(tips$tip_perc, tips$Day, summary)
boxplot(tips$tip_perc ~ tips$Day,
        xlab = "Day",
        ylab = "Tip Rate")

cor(tips$TotBill, tips$Tip) * sd(tips$Tip)

Zx <- 2 / sd(tips$Tip)
Zy <- Zx * cor(tips$Tip,tips$TotBill)
Zy * sd(tips$TotBill)

# By hand:
Zx <- 10 / sd(tips$TotBill)
Zy <- Zx * cor(tips$TotBill,tips$Tip)
(y <- mean(tips$Tip) + Zy * sd(tips$Tip))

# Using the model:
model$coefficients[1] + model$coefficients[2]*(mean(tips$TotBill)+10)

# Using the raw numbers:
0.9203 + 0.1050 * (mean(tips$TotBill) + 10)
