library(magrittr)
library(kableExtra)

# boxplot
bp <- data.frame(blpr = c(118, 144, 134, 110, 119, 128, 132, 136, 125, 160, 190, 140, 150, 160, 220,100, 70, 90, 75, 110, 115, 108, 50, 111, 110, 60, 80, 70, 40, 60), time = rep(c("before", "after"), each = 15))
boxplot(blpr ~ time, 
        data = bp,
        horizontal = T, 
        main = "Systolic Blood Pressures", 
        col = "orchid")

set.seed(3516)
dat <- rgamma(300, 8, 1)

hist(rgamma(300, 8, 1), main = "Sleeping Habits of College Students", xlab = "Average Time Slept (Hours)", col = "skyblue1",
     breaks = seq(0, 20, by = 2), xaxt = "n", ylim = c(0, 100))
axis(side = 1, at = seq(0, 20, by = 4))
axis(side = 2, at = seq(0, 100, by = 10))

set.seed(17)
classicaltime <- rnorm(20,50,10)
testscores <- classicaltime + rnorm(20, 30, 9)
mod <- lm(testscores~classicaltime)

cat("Mean Time Listening to Classical Music:", signif(mean(classicaltime),4))
cat("\nMean Test Scores:", signif(mean(testscores),4))
cat("\nStd Dev of Time Listening to Classical Music:", signif(sd(classicaltime), 3))
cat("\nStd Dev of Test Scores:", signif(sd(testscores),3))
cat("\nCorrelation:", signif(cor(classicaltime,testscores),4))
cat("\nRegression Line Slope:", signif(mod$coefficients[[2]]))

tab4 <- data.frame(Disease = c("Present", "Absent", "Total"),
                   Positive = c(2970, 11000, 13970),
                   Negative = c(30,539000, 539030),
                   Total = c(3000, 550000, 553000))
kable(tab4) %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  add_header_above(c(" " = 1, "Test Result" = 2, " " = 1))

# Setting our values
n <- 3
x <- 2
p <- 1/3

# Manually using the binomial formula:
factorial(n)/(factorial(x)*factorial(n-x)) * p^x * (1-p)^(n-x)

# You can also do this using the 'choose' function:
choose(n, x)* p^x * (1-p)^(n-x)

dbinom(x = 2, size = 3, prob = 1/3)

# Manually using the binomial formula:
factorial(10)/(factorial(5)*factorial(10-5)) * (1/3)^5 * (1-(1/3))^(10-5)

# Using the dbinom function:
dbinom(x = 5, size = 10, prob = 1/3)

# The probability of picking 1 or less potatoes (1 potato or 0 potatoes)
pbinom(1, size = 10, prob = 1/3)

# The probability of picking 5 or more potatoes (5, 6, 7, 8, 9, or 10 potatoes)
# Note that instead of x = 5, we use x = 4
1 - pbinom(4, size = 10, prob = 1/3) 

# Alternatively, using the lower.tail argument:
pbinom(4, size = 10, prob = 1/3, lower.tail = F)

# Total of the Extremes (p-value):
pbinom(1, size = 10, prob = 1/3) + (1 - pbinom(4, size = 10, prob = 1/3))

# The probability of picking 5 or more potatoes (5, 6, 7, 8, 9, or 10 potatoes)
(five_or_more <- dbinom(5, size = 10, prob = 1/3) + 
                 dbinom(6, size = 10, prob = 1/3) + 
                 dbinom(7, size = 10, prob = 1/3) +
                 dbinom(8, size = 10, prob = 1/3) +
                 dbinom(9, size = 10, prob = 1/3) +
                 dbinom(10, size = 10, prob = 1/3))
# The probability of picking 0 or 1 potatoes
(zero_or_one <- dbinom(0, size = 10, prob = 1/3) + dbinom(1, size = 10, prob = 1/3))

# P-value is the sum of picking one and picking 5 or more:
(p_val <- zero_or_one + five_or_more) # this matches the value from above

binom.test(x=5, n=10, p=1/3)

# Recall that the pbinom function calculates the probability of getting less than or equal to x. Hence, for our first argument we put 1 instead of 2.
1 - pbinom(1, size = 10, prob = 1/6)

binom.test(x=5,n=10,p=1/6)$p.value
