# echo=FALSE
tips <- read.delim("http://myweb.uiowa.edu/pbreheny/data/tips.txt")


# 
# This can be done by looking for the total number of observations in our Environment:
tips$index <- 1:244

# Another way is using 'nrow,' which gives us the total number of rows in a dataset
tips$index <- 1:nrow(tips)


# 
tips$Tip_Prop <- tips$Tip/tips$TotBill
summary(tips$Tip_Prop)

# Converting it to a percent:
tips$Tip_Perc <- tips$Tip_Prop*100
summary(tips$Tip_Perc)


# fig.width = 5
plot(x = tips$TotBill, y = tips$Tip)


# fig.width = 5
# For the sake of simplicity, we separate just the non-smokers into a new dataset:
nonsmokers <- tips[tips$Smoker == "No",]
      
plot(nonsmokers$TotBill, nonsmokers$Tip, 
     xlab = "Total Bill Non-Smokers", 
     ylab = "Tip Non-Smokers")


# 
model <- lm(Tip ~ TotBill, data = tips)
model

# You can omit the 'data =' argument if you prefer by feeding the data directly into the vectors using the $ operator. This will produce the same result:

model <- lm(tips$Tip ~ tips$TotBill)


# 
# Getting our correlation between tip and total bill: 
tip_bill_corr <- cor(tips$Tip, tips$TotBill)
tip_bill_corr


# 
# Note that the ratio takes the sd of our y variable as the numerator, and the sd of our x variable as the denominator
tip_bill_corr * sd(tips$Tip) / sd(tips$TotBill)


# fig.width = 5
plot(tips$TotBill, tips$Tip)
abline(model,
       col = "red",
       lwd = 2)


# 
# Note that the correlation from switching the variables is the same:
cor(tips$Tip, tips$TotBill)
cor(tips$TotBill, tips$Tip)


# 
# Creating our models
model <- lm(Tip ~ TotBill, data = tips)
model_inverted <- lm(TotBill ~ Tip, data = tips)

par(mfrow = c(1,2))

# Original model
plot(tips$TotBill, tips$Tip,
       xlab = "Total Bill",
       ylab = "Tips")
abline(model,
       col = "red",
       lwd = 2)

# Inverted model- note that the axes are swapped
plot(tips$Tip, tips$TotBill,
       xlab = "Tips",
       ylab = "Total Bill")
abline(model_inverted,
       col = "blue",
       lwd = 2)

# Return the plots window to normal
par(mfrow = c(1,1))


# fig.width = 5
# Part A
tips$Tip_per_person <- tips$Tip / tips$Size

# Part B
night_owls <- tips[tips$Time == "Night",]

# Part C
plot(x = night_owls$TotBill, y = night_owls$Tip_per_person,
     xlab = "Total Bill Amount", ylab = "Tip per Person", 
     main = "People Dining at Night Scatterplot")

# Part D
night_model <- lm(night_owls$Tip_per_person ~ night_owls$TotBill)
abline(night_model)

# Part E
cor(tips$TotBill, tips$Tip) * sd(tips$Tip)

# Part F
Zx <- 2 / sd(tips$Tip)
Zy <- Zx * cor(tips$Tip,tips$TotBill)
Zy * sd(tips$TotBill)

# Part G (Three different methods)
  # By hand:
Zx <- 10 / sd(tips$TotBill)
Zy <- Zx * cor(tips$TotBill,tips$Tip)
(y <- mean(tips$Tip) + Zy * sd(tips$Tip))

  # Using the model:
model$coefficients[1] + model$coefficients[2]*(mean(tips$TotBill)+10)

  # Using the raw numbers:
0.9203 + 0.1050 * (mean(tips$TotBill) + 10)