## -----------------------------------------------------------------------------
tips <- read.delim('https://raw.githubusercontent.com/IowaBiostat/data-sets/main/tips/tips.txt')


## -----------------------------------------------------------------------------
#| eval: false
# dataset$new_variable


## -----------------------------------------------------------------------------
# calculate percent tipped for each bill
## using the $ operator
tips$tip_perc <- tips$Tip / tips$TotBill * 100

## the with() function simplifies this by specifying the dataset to be used
tips$tip_perc <- with(tips, Tip / TotBill * 100)

# look at a quick summary of our new variable
summary(tips$tip_perc)


## -----------------------------------------------------------------------------
#| eval: false
# ggplot(data = dataset,           # step 1: set your data
#        aes(x = x_var, y = y_var) # step 2: define aesthetics
#        ) +
#   geom_point()                 # step 3: add geometries


## -----------------------------------------------------------------------------
library(ggplot2)
ggplot(tips, 
       aes(x=TotBill, y=Tip)
      ) +
  geom_point()


## -----------------------------------------------------------------------------
#| eval: false
# # syntax
# lm(y_variable ~ x_variable, data = dataset)


## -----------------------------------------------------------------------------
my_model <- lm(Tip ~ TotBill, data = tips)
my_model


## -----------------------------------------------------------------------------
ggplot(tips, 
       aes(x=TotBill, y=Tip)
      ) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE)


## -----------------------------------------------------------------------------
# Correlation between tip and total bill: 
tip_bill_corr <- cor(tips$Tip, tips$TotBill)
tip_bill_corr

tip_bill_corr * sd(tips$Tip) / sd(tips$TotBill)


## -----------------------------------------------------------------------------
#| code-fold: true
#| message: false

par(mfrow = c(1,2))
ggplot(tips, 
       aes(x=TotBill, y=Tip)
      ) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE)

ggplot(tips, 
       aes(x=Tip, y=TotBill)
      ) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE)


## -----------------------------------------------------------------------------
lm(Tip ~ TotBill, tips)

lm(TotBill ~ Tip, tips)


## -----------------------------------------------------------------------------
ggplot(tips, aes(TotBill, y=tip_perc)) +
  geom_point() +
  geom_smooth(method="lm", se=FALSE) +
  labs(
    y="Tip Rate", 
    x="Total Bill", 
    title = "Tip Rate Decreases as Bill Increases"
  )


## -----------------------------------------------------------------------------
cor(tips$TotBill, tips$tip_perc)
model2 <- lm(tip_perc ~ TotBill, data = tips)
model2


## -----------------------------------------------------------------------------
gender_tab <- table(tips$Sex) 
gender_tab # number of men/women

prop.table(gender_tab) # proportion of men and women


## -----------------------------------------------------------------------------
gender_vs_time <- table(tips$Sex, tips$Time)
prop.table(gender_vs_time, 2) # proportion of female/male given time of day


## -----------------------------------------------------------------------------
ggplot(tips, aes(x=Time, fill=Sex)) +
  geom_bar()


## -----------------------------------------------------------------------------
by(tips$tip_perc, tips$Smoker, summary)


ggplot(tips, aes(x=Smoker, y=tip_perc)) +
  geom_boxplot() +
  labs(
    title = "Do smokers tip differently than nonsmokers?",
    x="Smoker in Party?", 
    y = "Percent Tipped"
  )


## -----------------------------------------------------------------------------
by(tips$tip_perc, tips$Time, summary)

ggplot(tips, aes(x=Time, y=tip_perc)) +
  geom_boxplot() +
  labs(
    x="Time", 
    y="Tip Rate", 
    title = "Does tipping behavior change at lunch versus dinner?"
  )


## -----------------------------------------------------------------------------
by(tips$tip_perc, tips$Day, summary)

ggplot(tips, aes(x=Day, y=tip_perc)) +
  geom_boxplot()+
  labs(
    x="Day", 
    y="Tip Rate", 
    title = "Does tipping behavior differ by days of the week?"
  )


## -----------------------------------------------------------------------------
cor(tips$TotBill, tips$Tip) * sd(tips$Tip)


## -----------------------------------------------------------------------------
Zx <- 2 / sd(tips$Tip)
Zy <- Zx * cor(tips$Tip,tips$TotBill)
Zy * sd(tips$TotBill)


## -----------------------------------------------------------------------------
# By hand:
Zx <- 10 / sd(tips$TotBill)
Zy <- Zx * cor(tips$TotBill,tips$Tip)
(y <- mean(tips$Tip) + Zy * sd(tips$Tip))

# Using the model:
# if you have questions about this code, ask your instructor
model <- lm(Tip ~ TotBill, tips)
model$coefficients[1] + model$coefficients[2]*(mean(tips$TotBill)+10)

# Using the raw numbers:
0.9203 + 0.1050 * (mean(tips$TotBill) + 10)

