Skip to content

Instantly share code, notes, and snippets.

@saiemgilani
Last active March 20, 2020 03:56
Show Gist options
  • Save saiemgilani/a2aef1c2f2789055d0a84b546363b154 to your computer and use it in GitHub Desktop.
Save saiemgilani/a2aef1c2f2789055d0a84b546363b154 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(cfbscrapR)
library(ggimage)
library(gt)
library(png)
## Pull season data from cfbscrapR
# Play Level Data
pbp_2019 <- data.frame()
for(i in 1:15){
model <- cfb_pbp_data(year = 2019, season_type = "both", week = i, epa_wpa = TRUE)
df <- data.frame(model)
pbp_2019 <- bind_rows(pbp_2019, df)
}
# Game Level Information
games_19 <- cfb_game_info(2019)
# Join Games and Play-by-Play (drop qtr by qtr line score)
plays19 <- left_join(pbp_2019, games_19, by = c("game_id" = "id"))
drops <- c("home_line_scores","away_line_scores")
plays19 <- plays19[, !(names(plays19) %in% drops)]
# Create Garbage time filter, eliminate FCS games, filter for only rushes and passes
pbp_2019 <- plays19 %>%
filter(!is.na(home_conference) & !is.na(away_conference)) %>%
mutate(abs_diff = abs(score_diff),
garbage = ifelse(period == 1 & abs_diff > 43, 1,
ifelse(period == 2 & abs_diff > 37, 1,
ifelse(period == 3 & abs_diff > 27, 1,
ifelse(period == 4 & abs_diff > 22, 1, 0)))))
## Cleaning the data and feature engineering
pbp_2019 <- pbp_2019 %>%
rename(adjusted_yardline = adj_yd_line,
offense = offense_play,
defense = defense_play) %>%
mutate(rz_play = ifelse((adjusted_yardline <= 20), 1, 0),
so_play = ifelse((adjusted_yardline <= 40 | play_type == "(Passing Touchdown) | (Rushing Touchdown"), 1, 0),
pass = if_else(play_type == "Pass Reception" | play_type == "Passing Touchdown" |
play_type == "Sack" | play_type == "Pass Interception Return" |
play_type == "Pass Incompletion" | play_type == "Sack Touchdown" |
(play_type == "Safety" & str_detect(play_text, "sacked")) |
(play_type == "Fumble Recovery (Own)" & str_detect(play_text, "pass")) |
(play_type == "Fumble Recovery (Opponent)" & str_detect(play_text, "pass")), 1, 0),
rush = ifelse(play_type == "Rush" | play_type == "Rushing Touchdown" | (play_type == "Safety" & str_detect(play_text, "run")) |
(play_type == "Fumble Recovery (Opponent)" & str_detect(play_text, "run")) |
(play_type == "Fumble Recovery (Own)" & str_detect(play_text, "run")), 1, 0),
std.down = ifelse(down == 1, 1,
ifelse(down == 2 & distance < 8, 1,
ifelse(down == 3 & distance < 5, 1,
ifelse(down == 4 & distance < 5, 1, 0)))),
pass.down = ifelse(down == 2 & distance > 8, 1,
ifelse(down == 3 & distance > 5, 1,
ifelse(down == 4 & distance > 5, 1, 0))),
rush_pass = if_else(rush == 1, "rush",
if_else(pass == 1, "pass", "NA")),
stuffed_run = ifelse((rush == 1 & yards_gained <=0), 1, 0),
stopped_run = ifelse((rush == 1 & yards_gained <=2), 1, 0),
opp_rate_run = ifelse((rush == 1 & yards_gained >= 4), 1, 0),
epa_success = ifelse((rush == 1 | pass == 1) & EPA >= 0, 1, 0),
epa_explosive = if_else((rush == 1 & EPA >= 1.7917221), 1,
if_else((pass == 1 & EPA >= 2.4486338), 1, 0)),
epa_success_pass = ifelse((pass == 1) & EPA >= 0, 1, 0),
epa_success_rush = ifelse((rush == 1) & EPA >= 0, 1, 0),
epa_explosive_pass = if_else((pass == 1 & EPA >= 2.4486338), 1, 0),
epa_explosive_rush = if_else((rush == 1 & EPA >= 1.7917221), 1, 0),
short_rush_attempt = ifelse(distance <= 2 & rush == 1, 1, 0),
short_rush_success = ifelse(distance <= 2 & rush == 1 & yards_gained >= distance, 1, 0),
short_pass_attempt = ifelse(distance <= 2 & pass == 1, 1, 0),
short_pass_success = ifelse(distance <= 2 & pass == 1 & yards_gained >= distance, 1, 0),
year = 2019
)
## Extract player names
# RB names
pbp_2019 <- pbp_2019 %>%
mutate(rush_player = ifelse(rush == 1, str_extract(play_text, "(.{0,25} )run |(.{0,25} )\\d{0,2} Yd Run"), NA)) %>%
mutate(rush_player = str_remove(rush_player, " run | \\d+ Yd Run"))
# QB names
pbp_2019 <- pbp_2019 %>%
mutate(pass_player = ifelse(pass==1, str_extract(play_text, "pass from (.*?) \\(|(.{0,30} )pass |(.{0,30} )sacked|(.{0,30} )incomplete "), NA)) %>%
mutate(pass_player = str_remove(pass_player, "pass | sacked| incomplete")) %>%
mutate(pass_player = if_else(play_type == "Passing Touchdown", str_extract(play_text, "from(.+)"), pass_player),
pass_player = str_remove(pass_player, "from "),
pass_player = str_remove(pass_player, "\\(.+\\)"),
pass_player = str_remove(pass_player, " \\,"))
## Receiver names
pbp_2019 <- pbp_2019 %>%
mutate(receiver_player = ifelse(pass==1, str_extract(play_text, "to (.+)"), NA)) %>%
mutate(receiver_player = if_else(str_detect(play_text, "Yd pass"), str_extract(play_text, "(.+)\\d"), receiver_player)) %>%
mutate(receiver_player = ifelse(play_type == "Sack", NA, receiver_player)) %>%
mutate(receiver_player = str_remove(receiver_player, "to "),
receiver_player = str_remove(receiver_player, "\\,.+"),
receiver_player = str_remove(receiver_player, "for (.+)"),
receiver_player = str_remove(receiver_player, "( \\d{1,2})"))
## box score stats
box_score_stats_off <- pbp_2019 %>%
group_by(offense, defense) %>%
filter(rush == 1 | pass == 1) %>%
summarize(
avg_epa = mean(EPA, na.rm=TRUE),
avg_epa_z = NA,
avg_epa_p = NA,
avg_epa_rush = mean(EPA[rush == 1], na.rm=TRUE),
avg_epa_rush_z = NA,
avg_epa_rush_p = NA,
avg_epa_pass = mean(EPA[pass == 1], na.rm=TRUE),
avg_epa_pass_z = NA,
avg_epa_pass_p = NA,
avg_rz_epa = mean(EPA[rz_play == 1]),
avg_rz_epa_z = NA,
avg_rz_epa_p = NA,
epa_sr = mean(epa_success, na.rm=TRUE),
epa_sr_z = NA,
epa_sr_p = NA,
epa_sr_rush = mean(epa_success[rush == 1], na.rm=TRUE),
epa_sr_rush_z = NA,
epa_sr_rush_p = NA,
epa_sr_pass = mean(epa_success[pass == 1], na.rm=TRUE),
epa_sr_pass_z = NA,
epa_sr_pass_p = NA,
short_rush_epa_sr = mean(epa_success[short_rush_attempt==1]),
short_rush_epa_sr_z = NA,
short_rush_epa_sr_p = NA,
short_pass_epa_sr = mean(epa_success[short_pass_attempt==1]),
short_pass_epa_sr_z = NA,
short_pass_epa_sr_p = NA,
stuffed_run_rate_epa = mean(EPA[stuffed_run==1 & rush==1]),
stuffed_run_rate_epa_z = NA,
stuffed_run_rate_epa_p = NA,
stopped_run_rate_epa = mean(EPA[stopped_run==1 & rush==1]),
stopped_run_rate_epa_z = NA,
stopped_run_rate_epa_p = NA,
opport_run_rate_epa = mean(EPA[opp_rate_run==1 & rush==1]),
opport_run_rate_epa_z = NA,
opport_run_rate_epa_p = NA,
epa_er = mean(epa_explosive, na.rm = TRUE),
epa_er_z = NA,
epa_er_p = NA,
epa_er_rush = mean(epa_explosive[rush == 1], na.rm = TRUE),
epa_er_rush_z = NA,
epa_er_rush_p = NA,
epa_er_pass = mean(epa_explosive[pass == 1], na.rm = TRUE),
epa_er_pass_z = NA,
epa_er_pass_p = NA,
std_down_epa = mean(EPA[std.down==1]),
std_down_epa_z = NA,
std_down_epa_p = NA,
std_pass_epa = mean(EPA[(std.down==1 & pass == 1)]),
std_pass_epa_z = NA,
std_pass_epa_p = NA,
std_rush_epa = mean(EPA[(std.down==1 & rush == 1)]),
std_rush_epa_z = NA,
std_rush_epa_p = NA,
std_down_epa_sr = mean(epa_success[std.down==1]),
std_down_epa_sr_z = NA,
std_down_epa_sr_p = NA,
std_pass_epa_sr = mean(epa_success[(std.down==1 & pass == 1)]),
std_pass_epa_sr_z = NA,
std_pass_epa_sr_p = NA,
std_rush_epa_sr = mean(epa_success[(std.down==1 & rush == 1)]),
std_rush_epa_sr_z = NA,
std_rush_epa_sr_p = NA,
pass_down_epa = mean(EPA[pass.down==1]),
pass_down_epa_z = NA,
pass_down_epa_p = NA,
pass_down_pass_epa = mean(EPA[(pass.down==1 & pass==1)]),
pass_down_pass_epa_z = NA,
pass_down_pass_epa_p = NA,
pass_down_rush_epa = mean(EPA[(pass.down==1 & rush==1)]),
pass_down_rush_epa_z = NA,
pass_down_rush_epa_p = NA,
pass_down_epa_sr = mean(epa_success[pass.down==1]),
pass_down_epa_sr_z = NA,
pass_down_epa_sr_p = NA,
pass_down_pass_epa_sr = mean(epa_success[(pass.down==1 & pass==1)]),
pass_down_pass_epa_sr_z = NA,
pass_down_pass_epa_sr_p = NA,
pass_down_rush_epa_sr = mean(epa_success[(pass.down==1 & rush==1)]),
pass_down_rush_epa_sr_z = NA,
pass_down_rush_epa_sr_p = NA,
scoring_opp_epa = mean(EPA[scoring_opp==1]),
scoring_opp_epa_z = NA,
scoring_opp_epa_p = NA,
scoring_opp_pass_epa = mean(EPA[(scoring_opp==1 & pass==1)]),
scoring_opp_pass_epa_z = NA,
scoring_opp_pass_epa_p = NA,
scoring_opp_rush_epa = mean(EPA[(scoring_opp==1 & rush==1)]),
scoring_opp_rush_epa_z = NA,
scoring_opp_rush_epa_p = NA,
scoring_opp_epa_sr = mean(epa_success[scoring_opp==1]),
scoring_opp_epa_sr_z = NA,
scoring_opp_epa_sr_p = NA,
scoring_opp_pass_epa_sr = mean(epa_success[(scoring_opp==1 & pass==1)]),
scoring_opp_pass_epa_sr_z = NA,
scoring_opp_pass_epa_sr_p = NA,
scoring_opp_rush_epa_sr = mean(epa_success[(scoring_opp==1 & rush==1)]),
scoring_opp_rush_epa_sr_z = NA,
scoring_opp_rush_epa_sr_p = NA,
rz_epa = mean(EPA[rz_play==1]),
rz_epa_z = NA,
rz_epa_p = NA,
rz_pass_epa = mean(EPA[rz_play==1 & pass==1]),
rz_pass_epa_z = NA,
rz_pass_epa_p = NA,
rz_rush_epa = mean(EPA[rz_play==1 & rush==1]),
rz_rush_epa_z = NA,
rz_rush_epa_p = NA,
rz_epa_sr = mean(epa_success[rz_play == 1]),
rz_epa_sr_z = NA,
rz_epa_sr_p = NA,
rz_pass_epa_sr = mean(epa_success[rz_play == 1 & pass==1]),
rz_pass_epa_sr_z = NA,
rz_pass_epa_sr_p = NA,
rz_rush_epa_sr = mean(epa_success[rz_play == 1 & rush==1]),
rz_rush_epa_sr_z = NA,
rz_rush_epa_sr_p = NA,
plays = n()
) %>% ungroup()
box_score_stats_off <- box_score_stats_off %>%
mutate(
avg_epa_z = scale(avg_epa),
avg_epa_p = pnorm(avg_epa_z),
avg_epa_rush_z = scale(avg_epa_rush),
avg_epa_rush_p = pnorm(avg_epa_rush_z),
avg_epa_pass_z = scale(avg_epa_pass),
avg_epa_pass_p = pnorm(avg_epa_pass_z),
avg_rz_epa_z = scale(avg_rz_epa),
avg_rz_epa_p = pnorm(avg_rz_epa_z),
scoring_opp_epa_z = scale(scoring_opp_epa),
scoring_opp_epa_p = pnorm(scoring_opp_epa_z),
std_down_epa_z = scale(std_down_epa),
std_down_epa_p = pnorm(std_down_epa_z),
pass_down_epa_z = scale(pass_down_epa),
pass_down_epa_p = pnorm(pass_down_epa_z),
short_rush_epa_sr_z = scale(short_rush_epa_sr),
short_rush_epa_sr_p = pnorm(short_rush_epa_sr_z),
short_pass_epa_sr_z = scale(short_pass_epa_sr),
short_pass_epa_sr_p = pnorm(short_pass_epa_sr_z),
stuffed_run_rate_epa_z = scale(stuffed_run_rate_epa),
stuffed_run_rate_epa_p = pnorm(stuffed_run_rate_epa_z),
stopped_run_rate_epa_z = scale(stopped_run_rate_epa),
stopped_run_rate_epa_p = pnorm(stopped_run_rate_epa_z),
opport_run_rate_epa_z = scale(opport_run_rate_epa),
opport_run_rate_epa_p = pnorm(opport_run_rate_epa_z),
epa_sr_z = scale(epa_sr),
epa_sr_p = pnorm(epa_sr_z),
epa_sr_rush_z = scale(epa_sr_rush),
epa_sr_rush_p = pnorm(epa_sr_rush_z),
epa_sr_pass_z = scale(epa_sr_pass),
epa_sr_pass_p = pnorm(epa_sr_pass_z),
epa_er_z = scale(epa_er),
epa_er_p = pnorm(epa_er_z),
epa_er_pass_z = scale(epa_er_pass),
epa_er_pass_p = pnorm(epa_er_pass_z),
epa_er_rush_z = scale(epa_er_rush),
epa_er_rush_p = pnorm(epa_er_rush_z),
std_pass_epa_z = scale(std_pass_epa),
std_pass_epa_p = pnorm(std_pass_epa_z),
std_rush_epa_z = scale(std_rush_epa),
std_rush_epa_p = pnorm(std_rush_epa_z),
std_down_epa_sr_z = scale(std_down_epa_sr),
std_down_epa_sr_p = pnorm(std_down_epa_sr_z),
std_pass_epa_sr_z = scale(std_pass_epa_sr),
std_pass_epa_sr_p = pnorm(std_pass_epa_sr_z),
std_rush_epa_sr_z = scale(std_rush_epa_sr),
std_rush_epa_sr_p = pnorm(std_rush_epa_sr_z),
pass_down_pass_epa_z = scale(pass_down_pass_epa),
pass_down_pass_epa_p = pnorm(pass_down_pass_epa_z),
pass_down_rush_epa_z = scale(pass_down_rush_epa),
pass_down_rush_epa_p = pnorm(pass_down_rush_epa_z),
pass_down_epa_sr_z = scale(pass_down_epa_sr),
pass_down_epa_sr_p = pnorm(pass_down_epa_sr_z),
pass_down_pass_epa_sr_z = scale(pass_down_pass_epa_sr),
pass_down_pass_epa_sr_p = pnorm(pass_down_pass_epa_sr_z),
pass_down_rush_epa_sr_z = scale(pass_down_rush_epa_sr),
pass_down_rush_epa_sr_p = pnorm(pass_down_rush_epa_sr_z),
scoring_opp_pass_epa_z = scale(scoring_opp_pass_epa),
scoring_opp_pass_epa_p = pnorm(scoring_opp_pass_epa_z),
scoring_opp_rush_epa_z = scale(scoring_opp_rush_epa),
scoring_opp_rush_epa_p = pnorm(scoring_opp_rush_epa_z),
scoring_opp_epa_sr_z = scale(scoring_opp_epa_sr),
scoring_opp_epa_sr_p = pnorm(scoring_opp_epa_sr_z),
scoring_opp_pass_epa_sr_z = scale(scoring_opp_pass_epa_sr),
scoring_opp_pass_epa_sr_p = pnorm(scoring_opp_pass_epa_sr_z),
scoring_opp_rush_epa_sr_z = scale(scoring_opp_rush_epa_sr),
scoring_opp_rush_epa_sr_p = pnorm(scoring_opp_rush_epa_sr_z),
rz_epa_z = scale(rz_epa),
rz_epa_p = pnorm(rz_epa_z),
rz_pass_epa_z = scale(rz_pass_epa),
rz_pass_epa_p = pnorm(rz_pass_epa_z),
rz_rush_epa_z = scale(rz_rush_epa),
rz_rush_epa_p = pnorm(rz_rush_epa_z),
rz_epa_sr_z = scale(rz_epa_sr),
rz_epa_sr_p = pnorm(rz_epa_sr_z),
rz_pass_epa_sr_z = scale(rz_pass_epa_sr),
rz_pass_epa_sr_p = pnorm(rz_pass_epa_sr_z),
rz_rush_epa_sr_z = scale(rz_rush_epa_sr),
rz_rush_epa_sr_p = pnorm(rz_rush_epa_sr_z)
)
## box score stats
box_score_stats_def <- pbp_2019 %>%
group_by(defense, offense) %>%
filter(rush == 1 | pass == 1) %>%
summarize(
avg_epa = mean(EPA, na.rm=TRUE),
avg_epa_z = NA,
avg_epa_p = NA,
avg_epa_rush = mean(EPA[rush == 1], na.rm=TRUE),
avg_epa_rush_z = NA,
avg_epa_rush_p = NA,
avg_epa_pass = mean(EPA[pass == 1], na.rm=TRUE),
avg_epa_pass_z = NA,
avg_epa_pass_p = NA,
avg_rz_epa = mean(EPA[rz_play == 1]),
avg_rz_epa_z = NA,
avg_rz_epa_p = NA,
epa_sr = mean(epa_success, na.rm=TRUE),
epa_sr_z = NA,
epa_sr_p = NA,
epa_sr_rush = mean(epa_success[rush == 1], na.rm=TRUE),
epa_sr_rush_z = NA,
epa_sr_rush_p = NA,
epa_sr_pass = mean(epa_success[pass == 1], na.rm=TRUE),
epa_sr_pass_z = NA,
epa_sr_pass_p = NA,
short_rush_epa_sr = mean(epa_success[short_rush_attempt==1]),
short_rush_epa_sr_z = NA,
short_rush_epa_sr_p = NA,
short_pass_epa_sr = mean(epa_success[short_pass_attempt==1]),
short_pass_epa_sr_z = NA,
short_pass_epa_sr_p = NA,
stuffed_run_rate_epa = mean(EPA[stuffed_run==1 & rush==1]),
stuffed_run_rate_epa_z = NA,
stuffed_run_rate_epa_p = NA,
stopped_run_rate_epa = mean(EPA[stopped_run==1 & rush==1]),
stopped_run_rate_epa_z = NA,
stopped_run_rate_epa_p = NA,
opport_run_rate_epa = mean(EPA[opp_rate_run==1 & rush==1]),
opport_run_rate_epa_z = NA,
opport_run_rate_epa_p = NA,
epa_er = mean(epa_explosive, na.rm = TRUE),
epa_er_z = NA,
epa_er_p = NA,
epa_er_rush = mean(epa_explosive[rush == 1], na.rm = TRUE),
epa_er_rush_z = NA,
epa_er_rush_p = NA,
epa_er_pass = mean(epa_explosive[pass == 1], na.rm = TRUE),
epa_er_pass_z = NA,
epa_er_pass_p = NA,
std_down_epa = mean(EPA[std.down==1]),
std_down_epa_z = NA,
std_down_epa_p = NA,
std_pass_epa = mean(EPA[(std.down==1 & pass == 1)]),
std_pass_epa_z = NA,
std_pass_epa_p = NA,
std_rush_epa = mean(EPA[(std.down==1 & rush == 1)]),
std_rush_epa_z = NA,
std_rush_epa_p = NA,
std_down_epa_sr = mean(epa_success[std.down==1]),
std_down_epa_sr_z = NA,
std_down_epa_sr_p = NA,
std_pass_epa_sr = mean(epa_success[(std.down==1 & pass == 1)]),
std_pass_epa_sr_z = NA,
std_pass_epa_sr_p = NA,
std_rush_epa_sr = mean(epa_success[(std.down==1 & rush == 1)]),
std_rush_epa_sr_z = NA,
std_rush_epa_sr_p = NA,
pass_down_epa = mean(EPA[pass.down==1]),
pass_down_epa_z = NA,
pass_down_epa_p = NA,
pass_down_pass_epa = mean(EPA[(pass.down==1 & pass==1)]),
pass_down_pass_epa_z = NA,
pass_down_pass_epa_p = NA,
pass_down_rush_epa = mean(EPA[(pass.down==1 & rush==1)]),
pass_down_rush_epa_z = NA,
pass_down_rush_epa_p = NA,
pass_down_epa_sr = mean(epa_success[pass.down==1]),
pass_down_epa_sr_z = NA,
pass_down_epa_sr_p = NA,
pass_down_pass_epa_sr = mean(epa_success[(pass.down==1 & pass==1)]),
pass_down_pass_epa_sr_z = NA,
pass_down_pass_epa_sr_p = NA,
pass_down_rush_epa_sr = mean(epa_success[(pass.down==1 & rush==1)]),
pass_down_rush_epa_sr_z = NA,
pass_down_rush_epa_sr_p = NA,
scoring_opp_epa = mean(EPA[scoring_opp==1]),
scoring_opp_epa_z = NA,
scoring_opp_epa_p = NA,
scoring_opp_pass_epa = mean(EPA[(scoring_opp==1 & pass==1)]),
scoring_opp_pass_epa_z = NA,
scoring_opp_pass_epa_p = NA,
scoring_opp_rush_epa = mean(EPA[(scoring_opp==1 & rush==1)]),
scoring_opp_rush_epa_z = NA,
scoring_opp_rush_epa_p = NA,
scoring_opp_epa_sr = mean(epa_success[scoring_opp==1]),
scoring_opp_epa_sr_z = NA,
scoring_opp_epa_sr_p = NA,
scoring_opp_pass_epa_sr = mean(epa_success[(scoring_opp==1 & pass==1)]),
scoring_opp_pass_epa_sr_z = NA,
scoring_opp_pass_epa_sr_p = NA,
scoring_opp_rush_epa_sr = mean(epa_success[(scoring_opp==1 & rush==1)]),
scoring_opp_rush_epa_sr_z = NA,
scoring_opp_rush_epa_sr_p = NA,
rz_epa = mean(EPA[rz_play==1]),
rz_epa_z = NA,
rz_epa_p = NA,
rz_pass_epa = mean(EPA[rz_play==1 & pass==1]),
rz_pass_epa_z = NA,
rz_pass_epa_p = NA,
rz_rush_epa = mean(EPA[rz_play==1 & rush==1]),
rz_rush_epa_z = NA,
rz_rush_epa_p = NA,
rz_epa_sr = mean(epa_success[rz_play == 1]),
rz_epa_sr_z = NA,
rz_epa_sr_p = NA,
rz_pass_epa_sr = mean(epa_success[rz_play == 1 & pass==1]),
rz_pass_epa_sr_z = NA,
rz_pass_epa_sr_p = NA,
rz_rush_epa_sr = mean(epa_success[rz_play == 1 & rush==1]),
rz_rush_epa_sr_z = NA,
rz_rush_epa_sr_p = NA,
plays = n()
) %>% ungroup()
box_score_stats_def <- box_score_stats_def %>%
mutate(
avg_epa_z = scale(avg_epa),
avg_epa_p = pnorm(avg_epa_z),
avg_epa_rush_z = scale(avg_epa_rush),
avg_epa_rush_p = pnorm(avg_epa_rush_z),
avg_epa_pass_z = scale(avg_epa_pass),
avg_epa_pass_p = pnorm(avg_epa_pass_z),
avg_rz_epa_z = scale(avg_rz_epa),
avg_rz_epa_p = pnorm(avg_rz_epa_z),
scoring_opp_epa_z = scale(scoring_opp_epa),
scoring_opp_epa_p = pnorm(scoring_opp_epa_z),
std_down_epa_z = scale(std_down_epa),
std_down_epa_p = pnorm(std_down_epa_z),
pass_down_epa_z = scale(pass_down_epa),
pass_down_epa_p = pnorm(pass_down_epa_z),
short_rush_epa_sr_z = scale(short_rush_epa_sr),
short_rush_epa_sr_p = pnorm(short_rush_epa_sr_z),
short_pass_epa_sr_z = scale(short_pass_epa_sr),
short_pass_epa_sr_p = pnorm(short_pass_epa_sr_z),
stuffed_run_rate_epa_z = scale(stuffed_run_rate_epa),
stuffed_run_rate_epa_p = pnorm(stuffed_run_rate_epa_z),
stopped_run_rate_epa_z = scale(stopped_run_rate_epa),
stopped_run_rate_epa_p = pnorm(stopped_run_rate_epa_z),
opport_run_rate_epa_z = scale(opport_run_rate_epa),
opport_run_rate_epa_p = pnorm(opport_run_rate_epa_z),
epa_sr_z = scale(epa_sr),
epa_sr_p = pnorm(epa_sr_z),
epa_sr_rush_z = scale(epa_sr_rush),
epa_sr_rush_p = pnorm(epa_sr_rush_z),
epa_sr_pass_z = scale(epa_sr_pass),
epa_sr_pass_p = pnorm(epa_sr_pass_z),
epa_er_z = scale(epa_er),
epa_er_p = pnorm(epa_er_z),
epa_er_pass_z = scale(epa_er_pass),
epa_er_pass_p = pnorm(epa_er_pass_z),
epa_er_rush_z = scale(epa_er_rush),
epa_er_rush_p = pnorm(epa_er_rush_z),
std_pass_epa_z = scale(std_pass_epa),
std_pass_epa_p = pnorm(std_pass_epa_z),
std_rush_epa_z = scale(std_rush_epa),
std_rush_epa_p = pnorm(std_rush_epa_z),
std_down_epa_sr_z = scale(std_down_epa_sr),
std_down_epa_sr_p = pnorm(std_down_epa_sr_z),
std_pass_epa_sr_z = scale(std_pass_epa_sr),
std_pass_epa_sr_p = pnorm(std_pass_epa_sr_z),
std_rush_epa_sr_z = scale(std_rush_epa_sr),
std_rush_epa_sr_p = pnorm(std_rush_epa_sr_z),
pass_down_pass_epa_z = scale(pass_down_pass_epa),
pass_down_pass_epa_p = pnorm(pass_down_pass_epa_z),
pass_down_rush_epa_z = scale(pass_down_rush_epa),
pass_down_rush_epa_p = pnorm(pass_down_rush_epa_z),
pass_down_epa_sr_z = scale(pass_down_epa_sr),
pass_down_epa_sr_p = pnorm(pass_down_epa_sr_z),
pass_down_pass_epa_sr_z = scale(pass_down_pass_epa_sr),
pass_down_pass_epa_sr_p = pnorm(pass_down_pass_epa_sr_z),
pass_down_rush_epa_sr_z = scale(pass_down_rush_epa_sr),
pass_down_rush_epa_sr_p = pnorm(pass_down_rush_epa_sr_z),
scoring_opp_pass_epa_z = scale(scoring_opp_pass_epa),
scoring_opp_pass_epa_p = pnorm(scoring_opp_pass_epa_z),
scoring_opp_rush_epa_z = scale(scoring_opp_rush_epa),
scoring_opp_rush_epa_p = pnorm(scoring_opp_rush_epa_z),
scoring_opp_epa_sr_z = scale(scoring_opp_epa_sr),
scoring_opp_epa_sr_p = pnorm(scoring_opp_epa_sr_z),
scoring_opp_pass_epa_sr_z = scale(scoring_opp_pass_epa_sr),
scoring_opp_pass_epa_sr_p = pnorm(scoring_opp_pass_epa_sr_z),
scoring_opp_rush_epa_sr_z = scale(scoring_opp_rush_epa_sr),
scoring_opp_rush_epa_sr_p = pnorm(scoring_opp_rush_epa_sr_z),
rz_epa_z = scale(rz_epa),
rz_epa_p = pnorm(rz_epa_z),
rz_pass_epa_z = scale(rz_pass_epa),
rz_pass_epa_p = pnorm(rz_pass_epa_z),
rz_rush_epa_z = scale(rz_rush_epa),
rz_rush_epa_p = pnorm(rz_rush_epa_z),
rz_epa_sr_z = scale(rz_epa_sr),
rz_epa_sr_p = pnorm(rz_epa_sr_z),
rz_pass_epa_sr_z = scale(rz_pass_epa_sr),
rz_pass_epa_sr_p = pnorm(rz_pass_epa_sr_z),
rz_rush_epa_sr_z = scale(rz_rush_epa_sr),
rz_rush_epa_sr_p = pnorm(rz_rush_epa_sr_z)
)
## new all season stats - offense
season_stats_offense <- pbp_2019 %>%
group_by(offense) %>%
filter(rush == 1 | pass == 1) %>%
summarize(
avg_epa = mean(EPA, na.rm=TRUE),
avg_epa_z = NA,
avg_epa_p = NA,
avg_epa_rush = mean(EPA[rush == 1], na.rm=TRUE),
avg_epa_rush_z = NA,
avg_epa_rush_p = NA,
avg_epa_pass = mean(EPA[pass == 1], na.rm=TRUE),
avg_epa_pass_z = NA,
avg_epa_pass_p = NA,
avg_rz_epa = mean(EPA[rz_play == 1]),
avg_rz_epa_z = NA,
avg_rz_epa_p = NA,
epa_sr = mean(epa_success, na.rm=TRUE),
epa_sr_z = NA,
epa_sr_p = NA,
epa_sr_rush = mean(epa_success[rush == 1], na.rm=TRUE),
epa_sr_rush_z = NA,
epa_sr_rush_p = NA,
epa_sr_pass = mean(epa_success[pass == 1], na.rm=TRUE),
epa_sr_pass_z = NA,
epa_sr_pass_p = NA,
short_rush_epa_sr = mean(epa_success[short_rush_attempt==1]),
short_rush_epa_sr_z = NA,
short_rush_epa_sr_p = NA,
short_pass_epa_sr = mean(epa_success[short_pass_attempt==1]),
short_pass_epa_sr_z = NA,
short_pass_epa_sr_p = NA,
stuffed_run_rate_epa = mean(EPA[stuffed_run==1 & rush==1]),
stuffed_run_rate_epa_z = NA,
stuffed_run_rate_epa_p = NA,
stopped_run_rate_epa = mean(EPA[stopped_run==1 & rush==1]),
stopped_run_rate_epa_z = NA,
stopped_run_rate_epa_p = NA,
opport_run_rate_epa = mean(EPA[opp_rate_run==1 & rush==1]),
opport_run_rate_epa_z = NA,
opport_run_rate_epa_p = NA,
epa_er = mean(epa_explosive, na.rm = TRUE),
epa_er_z = NA,
epa_er_p = NA,
epa_er_rush = mean(epa_explosive[rush == 1], na.rm = TRUE),
epa_er_rush_z = NA,
epa_er_rush_p = NA,
epa_er_pass = mean(epa_explosive[pass == 1], na.rm = TRUE),
epa_er_pass_z = NA,
epa_er_pass_p = NA,
std_down_epa = mean(EPA[std.down==1]),
std_down_epa_z = NA,
std_down_epa_p = NA,
std_pass_epa = mean(EPA[(std.down==1 & pass == 1)]),
std_pass_epa_z = NA,
std_pass_epa_p = NA,
std_rush_epa = mean(EPA[(std.down==1 & rush == 1)]),
std_rush_epa_z = NA,
std_rush_epa_p = NA,
std_down_epa_sr = mean(epa_success[std.down==1]),
std_down_epa_sr_z = NA,
std_down_epa_sr_p = NA,
std_pass_epa_sr = mean(epa_success[(std.down==1 & pass == 1)]),
std_pass_epa_sr_z = NA,
std_pass_epa_sr_p = NA,
std_rush_epa_sr = mean(epa_success[(std.down==1 & rush == 1)]),
std_rush_epa_sr_z = NA,
std_rush_epa_sr_p = NA,
pass_down_epa = mean(EPA[pass.down==1]),
pass_down_epa_z = NA,
pass_down_epa_p = NA,
pass_down_pass_epa = mean(EPA[(pass.down==1 & pass==1)]),
pass_down_pass_epa_z = NA,
pass_down_pass_epa_p = NA,
pass_down_rush_epa = mean(EPA[(pass.down==1 & rush==1)]),
pass_down_rush_epa_z = NA,
pass_down_rush_epa_p = NA,
pass_down_epa_sr = mean(epa_success[pass.down==1]),
pass_down_epa_sr_z = NA,
pass_down_epa_sr_p = NA,
pass_down_pass_epa_sr = mean(epa_success[(pass.down==1 & pass==1)]),
pass_down_pass_epa_sr_z = NA,
pass_down_pass_epa_sr_p = NA,
pass_down_rush_epa_sr = mean(epa_success[(pass.down==1 & rush==1)]),
pass_down_rush_epa_sr_z = NA,
pass_down_rush_epa_sr_p = NA,
scoring_opp_epa = mean(EPA[scoring_opp==1]),
scoring_opp_epa_z = NA,
scoring_opp_epa_p = NA,
scoring_opp_pass_epa = mean(EPA[(scoring_opp==1 & pass==1)]),
scoring_opp_pass_epa_z = NA,
scoring_opp_pass_epa_p = NA,
scoring_opp_rush_epa = mean(EPA[(scoring_opp==1 & rush==1)]),
scoring_opp_rush_epa_z = NA,
scoring_opp_rush_epa_p = NA,
scoring_opp_epa_sr = mean(epa_success[scoring_opp==1]),
scoring_opp_epa_sr_z = NA,
scoring_opp_epa_sr_p = NA,
scoring_opp_pass_epa_sr = mean(epa_success[(scoring_opp==1 & pass==1)]),
scoring_opp_pass_epa_sr_z = NA,
scoring_opp_pass_epa_sr_p = NA,
scoring_opp_rush_epa_sr = mean(epa_success[(scoring_opp==1 & rush==1)]),
scoring_opp_rush_epa_sr_z = NA,
scoring_opp_rush_epa_sr_p = NA,
rz_epa = mean(EPA[rz_play==1]),
rz_epa_z = NA,
rz_epa_p = NA,
rz_pass_epa = mean(EPA[rz_play==1 & pass==1]),
rz_pass_epa_z = NA,
rz_pass_epa_p = NA,
rz_rush_epa = mean(EPA[rz_play==1 & rush==1]),
rz_rush_epa_z = NA,
rz_rush_epa_p = NA,
rz_epa_sr = mean(epa_success[rz_play == 1]),
rz_epa_sr_z = NA,
rz_epa_sr_p = NA,
rz_pass_epa_sr = mean(epa_success[rz_play == 1 & pass==1]),
rz_pass_epa_sr_z = NA,
rz_pass_epa_sr_p = NA,
rz_rush_epa_sr = mean(epa_success[rz_play == 1 & rush==1]),
rz_rush_epa_sr_z = NA,
rz_rush_epa_sr_p = NA,
plays = n()
) %>% ungroup()
season_stats_offense <- season_stats_offense %>%
mutate(
avg_epa_z = scale(avg_epa),
avg_epa_p = pnorm(avg_epa_z),
avg_epa_rush_z = scale(avg_epa_rush),
avg_epa_rush_p = pnorm(avg_epa_rush_z),
avg_epa_pass_z = scale(avg_epa_pass),
avg_epa_pass_p = pnorm(avg_epa_pass_z),
avg_rz_epa_z = scale(avg_rz_epa),
avg_rz_epa_p = pnorm(avg_rz_epa_z),
scoring_opp_epa_z = scale(scoring_opp_epa),
scoring_opp_epa_p = pnorm(scoring_opp_epa_z),
std_down_epa_z = scale(std_down_epa),
std_down_epa_p = pnorm(std_down_epa_z),
pass_down_epa_z = scale(pass_down_epa),
pass_down_epa_p = pnorm(pass_down_epa_z),
short_rush_epa_sr_z = scale(short_rush_epa_sr),
short_rush_epa_sr_p = pnorm(short_rush_epa_sr_z),
short_pass_epa_sr_z = scale(short_pass_epa_sr),
short_pass_epa_sr_p = pnorm(short_pass_epa_sr_z),
stuffed_run_rate_epa_z = scale(stuffed_run_rate_epa),
stuffed_run_rate_epa_p = pnorm(stuffed_run_rate_epa_z),
stopped_run_rate_epa_z = scale(stopped_run_rate_epa),
stopped_run_rate_epa_p = pnorm(stopped_run_rate_epa_z),
opport_run_rate_epa_z = scale(opport_run_rate_epa),
opport_run_rate_epa_p = pnorm(opport_run_rate_epa_z),
epa_sr_z = scale(epa_sr),
epa_sr_p = pnorm(epa_sr_z),
epa_sr_rush_z = scale(epa_sr_rush),
epa_sr_rush_p = pnorm(epa_sr_rush_z),
epa_sr_pass_z = scale(epa_sr_pass),
epa_sr_pass_p = pnorm(epa_sr_pass_z),
epa_er_z = scale(epa_er),
epa_er_p = pnorm(epa_er_z),
epa_er_pass_z = scale(epa_er_pass),
epa_er_pass_p = pnorm(epa_er_pass_z),
epa_er_rush_z = scale(epa_er_rush),
epa_er_rush_p = pnorm(epa_er_rush_z),
std_pass_epa_z = scale(std_pass_epa),
std_pass_epa_p = pnorm(std_pass_epa_z),
std_rush_epa_z = scale(std_rush_epa),
std_rush_epa_p = pnorm(std_rush_epa_z),
std_down_epa_sr_z = scale(std_down_epa_sr),
std_down_epa_sr_p = pnorm(std_down_epa_sr_z),
std_pass_epa_sr_z = scale(std_pass_epa_sr),
std_pass_epa_sr_p = pnorm(std_pass_epa_sr_z),
std_rush_epa_sr_z = scale(std_rush_epa_sr),
std_rush_epa_sr_p = pnorm(std_rush_epa_sr_z),
pass_down_pass_epa_z = scale(pass_down_pass_epa),
pass_down_pass_epa_p = pnorm(pass_down_pass_epa_z),
pass_down_rush_epa_z = scale(pass_down_rush_epa),
pass_down_rush_epa_p = pnorm(pass_down_rush_epa_z),
pass_down_epa_sr_z = scale(pass_down_epa_sr),
pass_down_epa_sr_p = pnorm(pass_down_epa_sr_z),
pass_down_pass_epa_sr_z = scale(pass_down_pass_epa_sr),
pass_down_pass_epa_sr_p = pnorm(pass_down_pass_epa_sr_z),
pass_down_rush_epa_sr_z = scale(pass_down_rush_epa_sr),
pass_down_rush_epa_sr_p = pnorm(pass_down_rush_epa_sr_z),
scoring_opp_pass_epa_z = scale(scoring_opp_pass_epa),
scoring_opp_pass_epa_p = pnorm(scoring_opp_pass_epa_z),
scoring_opp_rush_epa_z = scale(scoring_opp_rush_epa),
scoring_opp_rush_epa_p = pnorm(scoring_opp_rush_epa_z),
scoring_opp_epa_sr_z = scale(scoring_opp_epa_sr),
scoring_opp_epa_sr_p = pnorm(scoring_opp_epa_sr_z),
scoring_opp_pass_epa_sr_z = scale(scoring_opp_pass_epa_sr),
scoring_opp_pass_epa_sr_p = pnorm(scoring_opp_pass_epa_sr_z),
scoring_opp_rush_epa_sr_z = scale(scoring_opp_rush_epa_sr),
scoring_opp_rush_epa_sr_p = pnorm(scoring_opp_rush_epa_sr_z),
rz_epa_z = scale(rz_epa),
rz_epa_p = pnorm(rz_epa_z),
rz_pass_epa_z = scale(rz_pass_epa),
rz_pass_epa_p = pnorm(rz_pass_epa_z),
rz_rush_epa_z = scale(rz_rush_epa),
rz_rush_epa_p = pnorm(rz_rush_epa_z),
rz_epa_sr_z = scale(rz_epa_sr),
rz_epa_sr_p = pnorm(rz_epa_sr_z),
rz_pass_epa_sr_z = scale(rz_pass_epa_sr),
rz_pass_epa_sr_p = pnorm(rz_pass_epa_sr_z),
rz_rush_epa_sr_z = scale(rz_rush_epa_sr),
rz_rush_epa_sr_p = pnorm(rz_rush_epa_sr_z)
)
## season stats - defense
season_stats_defense <- pbp_2019 %>%
group_by(defense) %>%
filter(rush == 1 | pass == 1) %>%
summarize(
avg_epa = mean(EPA, na.rm=TRUE),
avg_epa_z = NA,
avg_epa_p = NA,
avg_epa_rush = mean(EPA[rush == 1], na.rm=TRUE),
avg_epa_rush_z = NA,
avg_epa_rush_p = NA,
avg_epa_pass = mean(EPA[pass == 1], na.rm=TRUE),
avg_epa_pass_z = NA,
avg_epa_pass_p = NA,
avg_rz_epa = mean(EPA[rz_play == 1]),
avg_rz_epa_z = NA,
avg_rz_epa_p = NA,
epa_sr = mean(epa_success, na.rm=TRUE),
epa_sr_z = NA,
epa_sr_p = NA,
epa_sr_rush = mean(epa_success[rush == 1], na.rm=TRUE),
epa_sr_rush_z = NA,
epa_sr_rush_p = NA,
epa_sr_pass = mean(epa_success[pass == 1], na.rm=TRUE),
epa_sr_pass_z = NA,
epa_sr_pass_p = NA,
short_rush_epa_sr = mean(epa_success[short_rush_attempt==1]),
short_rush_epa_sr_z = NA,
short_rush_epa_sr_p = NA,
short_pass_epa_sr = mean(epa_success[short_pass_attempt==1]),
short_pass_epa_sr_z = NA,
short_pass_epa_sr_p = NA,
stuffed_run_rate_epa = mean(EPA[stuffed_run==1 & rush==1]),
stuffed_run_rate_epa_z = NA,
stuffed_run_rate_epa_p = NA,
stopped_run_rate_epa = mean(EPA[stopped_run==1 & rush==1]),
stopped_run_rate_epa_z = NA,
stopped_run_rate_epa_p = NA,
opport_run_rate_epa = mean(EPA[opp_rate_run==1 & rush==1]),
opport_run_rate_epa_z = NA,
opport_run_rate_epa_p = NA,
epa_er = mean(epa_explosive, na.rm = TRUE),
epa_er_z = NA,
epa_er_p = NA,
epa_er_rush = mean(epa_explosive[rush == 1], na.rm = TRUE),
epa_er_rush_z = NA,
epa_er_rush_p = NA,
epa_er_pass = mean(epa_explosive[pass == 1], na.rm = TRUE),
epa_er_pass_z = NA,
epa_er_pass_p = NA,
std_down_epa = mean(EPA[std.down==1]),
std_down_epa_z = NA,
std_down_epa_p = NA,
std_pass_epa = mean(EPA[(std.down==1 & pass == 1)]),
std_pass_epa_z = NA,
std_pass_epa_p = NA,
std_rush_epa = mean(EPA[(std.down==1 & rush == 1)]),
std_rush_epa_z = NA,
std_rush_epa_p = NA,
std_down_epa_sr = mean(epa_success[std.down==1]),
std_down_epa_sr_z = NA,
std_down_epa_sr_p = NA,
std_pass_epa_sr = mean(epa_success[(std.down==1 & pass == 1)]),
std_pass_epa_sr_z = NA,
std_pass_epa_sr_p = NA,
std_rush_epa_sr = mean(epa_success[(std.down==1 & rush == 1)]),
std_rush_epa_sr_z = NA,
std_rush_epa_sr_p = NA,
pass_down_epa = mean(EPA[pass.down==1]),
pass_down_epa_z = NA,
pass_down_epa_p = NA,
pass_down_pass_epa = mean(EPA[(pass.down==1 & pass==1)]),
pass_down_pass_epa_z = NA,
pass_down_pass_epa_p = NA,
pass_down_rush_epa = mean(EPA[(pass.down==1 & rush==1)]),
pass_down_rush_epa_z = NA,
pass_down_rush_epa_p = NA,
pass_down_epa_sr = mean(epa_success[pass.down==1]),
pass_down_epa_sr_z = NA,
pass_down_epa_sr_p = NA,
pass_down_pass_epa_sr = mean(epa_success[(pass.down==1 & pass==1)]),
pass_down_pass_epa_sr_z = NA,
pass_down_pass_epa_sr_p = NA,
pass_down_rush_epa_sr = mean(epa_success[(pass.down==1 & rush==1)]),
pass_down_rush_epa_sr_z = NA,
pass_down_rush_epa_sr_p = NA,
scoring_opp_epa = mean(EPA[scoring_opp==1]),
scoring_opp_epa_z = NA,
scoring_opp_epa_p = NA,
scoring_opp_pass_epa = mean(EPA[(scoring_opp==1 & pass==1)]),
scoring_opp_pass_epa_z = NA,
scoring_opp_pass_epa_p = NA,
scoring_opp_rush_epa = mean(EPA[(scoring_opp==1 & rush==1)]),
scoring_opp_rush_epa_z = NA,
scoring_opp_rush_epa_p = NA,
scoring_opp_epa_sr = mean(epa_success[scoring_opp==1]),
scoring_opp_epa_sr_z = NA,
scoring_opp_epa_sr_p = NA,
scoring_opp_pass_epa_sr = mean(epa_success[(scoring_opp==1 & pass==1)]),
scoring_opp_pass_epa_sr_z = NA,
scoring_opp_pass_epa_sr_p = NA,
scoring_opp_rush_epa_sr = mean(epa_success[(scoring_opp==1 & rush==1)]),
scoring_opp_rush_epa_sr_z = NA,
scoring_opp_rush_epa_sr_p = NA,
rz_epa = mean(EPA[rz_play==1]),
rz_epa_z = NA,
rz_epa_p = NA,
rz_pass_epa = mean(EPA[rz_play==1 & pass==1]),
rz_pass_epa_z = NA,
rz_pass_epa_p = NA,
rz_rush_epa = mean(EPA[rz_play==1 & rush==1]),
rz_rush_epa_z = NA,
rz_rush_epa_p = NA,
rz_epa_sr = mean(epa_success[rz_play == 1]),
rz_epa_sr_z = NA,
rz_epa_sr_p = NA,
rz_pass_epa_sr = mean(epa_success[rz_play == 1 & pass==1]),
rz_pass_epa_sr_z = NA,
rz_pass_epa_sr_p = NA,
rz_rush_epa_sr = mean(epa_success[rz_play == 1 & rush==1]),
rz_rush_epa_sr_z = NA,
rz_rush_epa_sr_p = NA,
plays = n()
) %>% ungroup()
season_stats_defense <- season_stats_defense %>%
mutate(
avg_epa_z = scale(avg_epa),
avg_epa_p = pnorm(avg_epa_z),
avg_epa_rush_z = scale(avg_epa_rush),
avg_epa_rush_p = pnorm(avg_epa_rush_z),
avg_epa_pass_z = scale(avg_epa_pass),
avg_epa_pass_p = pnorm(avg_epa_pass_z),
avg_rz_epa_z = scale(avg_rz_epa),
avg_rz_epa_p = pnorm(avg_rz_epa_z),
scoring_opp_epa_z = scale(scoring_opp_epa),
scoring_opp_epa_p = pnorm(scoring_opp_epa_z),
std_down_epa_z = scale(std_down_epa),
std_down_epa_p = pnorm(std_down_epa_z),
pass_down_epa_z = scale(pass_down_epa),
pass_down_epa_p = pnorm(pass_down_epa_z),
short_rush_epa_sr_z = scale(short_rush_epa_sr),
short_rush_epa_sr_p = pnorm(short_rush_epa_sr_z),
short_pass_epa_sr_z = scale(short_pass_epa_sr),
short_pass_epa_sr_p = pnorm(short_pass_epa_sr_z),
stuffed_run_rate_epa_z = scale(stuffed_run_rate_epa),
stuffed_run_rate_epa_p = pnorm(stuffed_run_rate_epa_z),
stopped_run_rate_epa_z = scale(stopped_run_rate_epa),
stopped_run_rate_epa_p = pnorm(stopped_run_rate_epa_z),
opport_run_rate_epa_z = scale(opport_run_rate_epa),
opport_run_rate_epa_p = pnorm(opport_run_rate_epa_z),
epa_sr_z = scale(epa_sr),
epa_sr_p = pnorm(epa_sr_z),
epa_sr_rush_z = scale(epa_sr_rush),
epa_sr_rush_p = pnorm(epa_sr_rush_z),
epa_sr_pass_z = scale(epa_sr_pass),
epa_sr_pass_p = pnorm(epa_sr_pass_z),
epa_er_z = scale(epa_er),
epa_er_p = pnorm(epa_er_z),
epa_er_pass_z = scale(epa_er_pass),
epa_er_pass_p = pnorm(epa_er_pass_z),
epa_er_rush_z = scale(epa_er_rush),
epa_er_rush_p = pnorm(epa_er_rush_z),
std_pass_epa_z = scale(std_pass_epa),
std_pass_epa_p = pnorm(std_pass_epa_z),
std_rush_epa_z = scale(std_rush_epa),
std_rush_epa_p = pnorm(std_rush_epa_z),
std_down_epa_sr_z = scale(std_down_epa_sr),
std_down_epa_sr_p = pnorm(std_down_epa_sr_z),
std_pass_epa_sr_z = scale(std_pass_epa_sr),
std_pass_epa_sr_p = pnorm(std_pass_epa_sr_z),
std_rush_epa_sr_z = scale(std_rush_epa_sr),
std_rush_epa_sr_p = pnorm(std_rush_epa_sr_z),
pass_down_pass_epa_z = scale(pass_down_pass_epa),
pass_down_pass_epa_p = pnorm(pass_down_pass_epa_z),
pass_down_rush_epa_z = scale(pass_down_rush_epa),
pass_down_rush_epa_p = pnorm(pass_down_rush_epa_z),
pass_down_epa_sr_z = scale(pass_down_epa_sr),
pass_down_epa_sr_p = pnorm(pass_down_epa_sr_z),
pass_down_pass_epa_sr_z = scale(pass_down_pass_epa_sr),
pass_down_pass_epa_sr_p = pnorm(pass_down_pass_epa_sr_z),
pass_down_rush_epa_sr_z = scale(pass_down_rush_epa_sr),
pass_down_rush_epa_sr_p = pnorm(pass_down_rush_epa_sr_z),
scoring_opp_pass_epa_z = scale(scoring_opp_pass_epa),
scoring_opp_pass_epa_p = pnorm(scoring_opp_pass_epa_z),
scoring_opp_rush_epa_z = scale(scoring_opp_rush_epa),
scoring_opp_rush_epa_p = pnorm(scoring_opp_rush_epa_z),
scoring_opp_epa_sr_z = scale(scoring_opp_epa_sr),
scoring_opp_epa_sr_p = pnorm(scoring_opp_epa_sr_z),
scoring_opp_pass_epa_sr_z = scale(scoring_opp_pass_epa_sr),
scoring_opp_pass_epa_sr_p = pnorm(scoring_opp_pass_epa_sr_z),
scoring_opp_rush_epa_sr_z = scale(scoring_opp_rush_epa_sr),
scoring_opp_rush_epa_sr_p = pnorm(scoring_opp_rush_epa_sr_z),
rz_epa_z = scale(rz_epa),
rz_epa_p = pnorm(rz_epa_z),
rz_pass_epa_z = scale(rz_pass_epa),
rz_pass_epa_p = pnorm(rz_pass_epa_z),
rz_rush_epa_z = scale(rz_rush_epa),
rz_rush_epa_p = pnorm(rz_rush_epa_z),
rz_epa_sr_z = scale(rz_epa_sr),
rz_epa_sr_p = pnorm(rz_epa_sr_z),
rz_pass_epa_sr_z = scale(rz_pass_epa_sr),
rz_pass_epa_sr_p = pnorm(rz_pass_epa_sr_z),
rz_rush_epa_sr_z = scale(rz_rush_epa_sr),
rz_rush_epa_sr_p = pnorm(rz_rush_epa_sr_z)
)
## avg EPA data frame
pbp_2019 %>%
mutate(epa_explosive = if_else(rush == 1 & EPA >= 1.7917221, 1,
if_else(pass == 1 & EPA >= 2.4486338, 1, 0)))
box_score_stats_off <- box_score_stats_off %>%
rename(team = offense)
box_score_stats_def <- box_score_stats_def %>%
rename(team = defense)
box_score_stats <- box_score_stats_off %>%
left_join(box_score_stats_def, by = c("team","defense"="offense"), suffix = c("_Offense", "_Defense")) %>%
rename(opponent = defense)
season_stats_offense <- season_stats_offense %>%
rename(team = offense)
season_stats_defense <- season_stats_defense %>%
rename(team = defense)
season_stats <- season_stats_offense %>%
left_join(season_stats_defense, by = "team", suffix = c("_Offense", "_Defense"))
## write csvs
write.csv(box_score_stats, file = "box_score_stats.csv", row.names=FALSE)
write.csv(season_stats, file = "season_stats.csv", row.names=FALSE)
write.csv(season_stats_offense, file = "season_stats_off.csv", row.names=FALSE)
write.csv(season_stats_defense, file = "season_stats_def.csv", row.names=FALSE)
write.csv(pbp_2019, file = "pbp_2019.csv", row.names=FALSE)
season_stats_offense <- read.csv(file = './season_stats_off.csv', stringsAsFactors = FALSE,header=TRUE)
season_stats_defense <- read.csv(file = './season_stats_def.csv', stringsAsFactors = FALSE,header=TRUE)
## Add logos
teams_logo <- read.csv("https://raw.githubusercontent.com/saiemgilani/NCAA_FB_EPA/master/logos.csv") %>%
select(school, logo) %>%
rename(team = school)
m <- png::readPNG("Tomahawk_Nation_Full.png")
img <- matrix(rgb(m[,,1],m[,,2],m[,,3], m[,,4] * 0.1), nrow=dim(m)[1]) #0.2 is alpha
rast <- grid::rasterGrob(img, interpolate = T)
# selected subset of teams
selected_teams <- c("Florida State","Florida",
"Clemson","Miami","Louisville",
"NC State","Boston College","Syracuse",
"Notre Dame","Virginia Tech","Virginia",
"Boise State","Duke","Wake Forest",
"North Carolina","Pittsburgh",
"Georgia Tech","West Virginia","Memphis")
# filter to offensive teams
season_stats_offense <- season_stats_offense %>%
filter(team %in% selected_teams)
# filter to defensive teams
season_stats_defense <- season_stats_defense %>%
filter(team %in% selected_teams)
season_stats_offense <- season_stats_offense %>%
left_join(teams_logo, by = "team")
season_stats_defense <- season_stats_defense %>%
left_join(teams_logo, by = "team")
#################################################################
#
# OFFENSE - PERCENTILES
#
#################################################################
season_stats_offense %>%
ggplot(aes(x = 100*epa_sr_p, y = 100*avg_epa_p)) +
geom_image(aes(image = season_stats_offense$logo), size = .03, by = "width", asp = 1.8) +
annotation_custom(rast, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_vline(xintercept = mean(100*season_stats_offense$epa_sr_p),
linetype = "dashed",
color = "blue") +
geom_hline(yintercept = mean(100*season_stats_offense$avg_epa_p),
linetype = "dashed",
color = "blue") +
labs(x = "EPA success rate percentile", y= "EPA per attempt percentile",
title = "2019 NCAA Team Offensive Efficiency",
subtitle = "Offensive EPA Success Rate and EPA per Attempt",
caption = "Figure: @SaiemGilani | Data: @CFB_data with #cfbscrapR") +
theme_bw() +
theme(axis.title = element_text(size = 11),
axis.text = element_text(size = 10),
plot.title = element_text(size = 12),
plot.subtitle = element_text(size = 10),
plot.caption = element_text(size = 10),
plot.margin=unit(c(2,2,2,2),"cm"))+
ggsave("perc_off_epa_sr_avg_epa.png", height = 9/1.2, width = 16/1.2)
season_stats_offense %>%
ggplot(aes(x = 100*epa_sr_p, y = 100*epa_er_p)) +
geom_image(aes(image = season_stats_offense$logo), size = .03, by = "width", asp = 1.8)+
annotation_custom(rast, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_vline(xintercept = mean(100*season_stats_offense$epa_sr_p),
linetype = "dashed",
color = "blue") +
geom_hline(yintercept = mean(100*season_stats_offense$epa_er_p),
linetype = "dashed",
color = "blue") +
labs(x = "EPA success rate percentile", y= "EPA explosion rate percentile",
title = "2019 NCAA Team Offensive Efficiency",
subtitle = "Offensive EPA Success Rate and Explosion Rate percentiles",
caption = "Figure: @SaiemGilani | Data: @CFB_data with #cfbscrapR") +
theme_bw() +
theme(axis.title = element_text(size = 11),
axis.text = element_text(size = 10),
plot.title = element_text(size = 12),
plot.subtitle = element_text(size = 10),
plot.caption = element_text(size = 10),
plot.margin=unit(c(2,2,2,2),"cm"))
ggsave("perc_off_epa_sr_er.png", height = 9/1.2, width = 16/1.2)
season_stats_offense %>%
ggplot(aes(x = 100*epa_er_rush_p, y = 100*epa_er_pass_p)) +
geom_image(aes(image = season_stats_offense$logo), size = .03, by = "width", asp = 1.8) +
annotation_custom(rast, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_vline(xintercept = mean(100*season_stats_offense$epa_er_rush_p),
linetype = "dashed",
color = "blue") +
geom_hline(yintercept = mean(100*season_stats_offense$epa_er_pass_p),
linetype = "dashed",
color = "blue") +
labs(x = "Offensive Rushing EPA Explosion rate percentile", y= "Offensive Passing EPA Explosion rate percentile",
title = "2019 NCAA Team Offensive Efficiency",
subtitle = "Offensive Rushing and Passing EPA Explosion Rate percentile",
caption = "Figure: @SaiemGilani | Data: @CFB_data with #cfbscrapR") +
theme_bw() +
theme(axis.title = element_text(size = 11),
axis.text = element_text(size = 10),
plot.title = element_text(size = 12),
plot.subtitle = element_text(size = 10),
plot.caption = element_text(size = 10),
plot.margin=unit(c(2,2,2,2),"cm"))+
ggsave("perc_off_epa_er_rush_pass.png", height = 9/1.2, width = 16/1.2)
season_stats_offense %>%
ggplot(aes(x = 100*std_down_epa_sr_p, y = 100*pass_down_epa_sr_p)) +
geom_image(aes(image = season_stats_offense$logo), size = .03, by = "width", asp = 1.8) +
annotation_custom(rast, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_vline(xintercept = mean(100*season_stats_offense$std_down_epa_sr_p),
linetype = "dashed",
color = "blue") +
geom_hline(yintercept = mean(100*season_stats_offense$pass_down_epa_sr_p),
linetype = "dashed",
color = "blue") +
labs(x = "Standard Down EPA success rate percentile", y= "Passing Down EPA success rate percentile",
title = "2019 NCAA Team Offensive Efficiency",
subtitle = "Offensive EPA Success Rate for Standard and Passing Downs percentile",
caption = "Figure: @SaiemGilani | Data: @CFB_data with #cfbscrapR") +
theme_bw() +
theme(axis.title = element_text(size = 11),
axis.text = element_text(size = 10),
plot.title = element_text(size = 12),
plot.subtitle = element_text(size = 10),
plot.caption = element_text(size = 10),
plot.margin=unit(c(2,2,2,2),"cm"))+
ggsave("perc_off_epa_sr_std_pass_down.png", height = 9/1.2, width = 16/1.2)
season_stats_offense %>%
ggplot(aes(x = 100*epa_sr_rush_p, y = 100*epa_sr_pass_p)) +
geom_image(aes(image = season_stats_offense$logo), size = .03, by = "width", asp = 1.8) +
annotation_custom(rast, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_vline(xintercept = mean(100*season_stats_offense$epa_sr_rush_p),
linetype = "dashed",
color = "blue") +
geom_hline(yintercept = mean(100*season_stats_offense$epa_sr_pass_p),
linetype = "dashed",
color = "blue") +
labs(x = "Offensive Rushing EPA success rate percentile", y= "Offensive Passing EPA success rate percentile",
title = "2019 NCAA Team Offensive Efficiency",
subtitle = "Offensive Rushing and Passing EPA Success Rate percentile",
caption = "Figure: @SaiemGilani | Data: @CFB_data with #cfbscrapR") +
theme_bw() +
theme(axis.title = element_text(size = 11),
axis.text = element_text(size = 10),
plot.title = element_text(size = 12),
plot.subtitle = element_text(size = 10),
plot.caption = element_text(size = 10),
plot.margin=unit(c(2,2,2,2),"cm"))+
ggsave("perc_off_epa_sr_rush_pass.png", height = 9/1.2, width = 16/1.2)
#################################################################
#
# DEFENSE - PERCENTILES
#
#################################################################
season_stats_defense %>%
ggplot(aes(x = 100*epa_sr_p, y = 100*avg_epa_p)) +
geom_image(aes(image = season_stats_defense$logo), size = .03, by = "width", asp = 1.8) +
annotation_custom(rast, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_vline(xintercept = mean(100*season_stats_defense$epa_sr_p),
linetype = "dashed",
color = "blue") +
geom_hline(yintercept = mean(100*season_stats_defense$avg_epa_p),
linetype = "dashed",
color = "blue") +
labs(x = "EPA success rate percentile", y= "EPA per attempt percentile",
title = "2019 NCAA Team Defensive Efficiency",
subtitle = "Defensive EPA Success Rate and EPA per Attempt percentile",
caption = "Figure: @SaiemGilani | Data: @CFB_data with #cfbscrapR") +
theme_bw() +
theme(axis.title = element_text(size = 11),
axis.text = element_text(size = 10),
plot.title = element_text(size = 12),
plot.subtitle = element_text(size = 10),
plot.caption = element_text(size = 10),
plot.margin=unit(c(2,2,2,2),"cm"))+
ggsave("perc_def_epa_sr_avg_epa.png", height = 9/1.2, width = 16/1.2)
season_stats_defense %>%
ggplot(aes(x = 100*epa_sr_p, y = 100*epa_er_p)) +
geom_image(aes(image = season_stats_defense$logo), size = .03, by = "width", asp = 1.8) +
annotation_custom(rast, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_vline(xintercept = mean(100*season_stats_defense$epa_sr_p),
linetype = "dashed",
color = "blue") +
geom_hline(yintercept = mean(100*season_stats_defense$epa_er_p),
linetype = "dashed",
color = "blue") +
labs(x = "EPA success rate percentile", y= "EPA explosion rate percentile",
title = "2019 NCAA Team Defensive Efficiency",
subtitle = "Defensive EPA Success Rate and Explosion Rate percentile",
caption = "Figure: @SaiemGilani | Data: @CFB_data with #cfbscrapR") +
theme_bw() +
theme(axis.title = element_text(size = 11),
axis.text = element_text(size = 10),
plot.title = element_text(size = 12),
plot.subtitle = element_text(size = 10),
plot.caption = element_text(size = 10),
plot.margin=unit(c(2,2,2,2),"cm"))
ggsave("perc_def_epa_sr_er.png", height = 9/1.2, width = 16/1.2)
season_stats_defense %>%
ggplot(aes(x = 100*epa_er_rush_p, y = 100*epa_er_pass_p)) +
geom_image(aes(image = season_stats_defense$logo), size = .03, by = "width", asp = 1.8) +
annotation_custom(rast, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_vline(xintercept = mean(100*season_stats_defense$epa_er_rush_p),
linetype = "dashed",
color = "blue") +
geom_hline(yintercept = mean(100*season_stats_defense$epa_er_pass_p),
linetype = "dashed",
color = "blue") +
labs(x = "EPA rush explosion rate", y= "EPA pass explosion rate",
title = "2019 NCAA Team Defensive Efficiency",
subtitle = "Defensive EPA Rush and Pass Explosion Rate",
caption = "Figure: @SaiemGilani | Data: @CFB_data with #cfbscrapR") +
theme_bw() +
theme(axis.title = element_text(size = 11),
axis.text = element_text(size = 10),
plot.title = element_text(size = 12),
plot.subtitle = element_text(size = 10),
plot.caption = element_text(size = 10),
plot.margin=unit(c(2,2,2,2),"cm"))
ggsave("perc_def_epa_er_pass_rush.png", height = 9/1.2, width = 16/1.2)
season_stats_defense %>%
ggplot(aes(x = 100*std_down_epa_sr_p, y = 100*pass_down_epa_sr_p)) +
geom_image(aes(image = season_stats_defense$logo), size = .03, by = "width", asp = 1.8) +
annotation_custom(rast, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_vline(xintercept = mean(100*season_stats_defense$std_down_epa_sr_p),
linetype = "dashed",
color = "blue") +
geom_hline(yintercept = mean(100*season_stats_defense$pass_down_epa_sr_p),
linetype = "dashed",
color = "blue") +
labs(x = "Standard Down EPA success rate percentile", y= "Passing Down EPA success rate percentile",
title = "2019 NCAA Team Defensive Efficiency",
subtitle = "Defensive EPA Success Rate for Standard and Passing Downs percentile",
caption = "Figure: @SaiemGilani | Data: @CFB_data with #cfbscrapR") +
theme_bw() +
theme(axis.title = element_text(size = 11),
axis.text = element_text(size = 10),
plot.title = element_text(size = 12),
plot.subtitle = element_text(size = 10),
plot.caption = element_text(size = 10),
plot.margin=unit(c(2,2,2,2),"cm"))+
ggsave("perc_def_epa_sr_std_pass_down.png", height = 9/1.2, width = 16/1.2)
season_stats_defense %>%
ggplot(aes(x = 100*epa_sr_rush_p, y = 100*epa_sr_pass_p)) +
geom_image(aes(image = season_stats_defense$logo), size = .03, by = "width", asp = 1.8) +
annotation_custom(rast, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_vline(xintercept = mean(100*season_stats_defense$epa_sr_rush_p),
linetype = "dashed",
color = "blue") +
geom_hline(yintercept = mean(100*season_stats_defense$epa_sr_pass_p),
linetype = "dashed",
color = "blue") +
labs(x = "Defensive Rushing EPA success rate percentile", y= "Defensive Passing EPA success rate percentile",
title = "2019 NCAA Team Defensive Efficiency",
subtitle = "Defensive Rushing and Passing EPA Success Rate percentile",
caption = "Figure: @SaiemGilani | Data: @CFB_data with #cfbscrapR") +
theme_bw() +
theme(axis.title = element_text(size = 11),
axis.text = element_text(size = 10),
plot.title = element_text(size = 12),
plot.subtitle = element_text(size = 10),
plot.caption = element_text(size = 10),
plot.margin=unit(c(2,2,2,2),"cm"))+
ggsave("perc_def_epa_sr_rush_pass.png", height = 9/1.2, width = 16/1.2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment