Last active
March 27, 2020 16:10
-
-
Save saiemgilani/ae9f9f8fb5fdb153d236675e89ebecfd to your computer and use it in GitHub Desktop.
cfbscrapR_tutorial
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################## | |
# If this is your first time working with R, | |
# tidyverse, devtools, remotes, ggimage, cfbscrapR, or GT: | |
# Uncomment (highlight and Ctrl-Shift-C) | |
# and install by running (highlight and Ctrl-C) the below six lines as necessary | |
# install.packages("tidyverse") | |
# install.packages("devtools") | |
# install.packages("remotes") | |
# install.packages("ggimage") | |
# devtools::install_github("meysubb/cfbscrapR") | |
# remotes::install_github("rstudio/gt") | |
# set your working directory (in other words, your working folder location on your computer) | |
# using something like: | |
# setwd("C:/Users/username/Documents/NCAA_FB_EPA") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(ggimage) | |
library(cfbscrapR) | |
library(gt) | |
# This function will take a minute or two to run | |
# You only need to run it once, once completed, feel free to comment out | |
# If you need to exit the session and want to store the data, | |
# see write.csv comment below | |
pbp_2019 <- data.frame() | |
for(i in 1:15){ | |
data <- cfb_pbp_data(year = 2019, season_type = "both", week = i, epa_wpa = TRUE) %>% | |
mutate(week = i) | |
df <- data.frame(data) | |
pbp_2019 <- bind_rows(pbp_2019, df) | |
} | |
# If you would like to save to csv run the line below | |
# write.csv(pbp_2019,"pbp_2019.csv",row.names=F) | |
# warning, if full year pbp, ~91mb | |
head(pbp_2019) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pbp_2019 %>% | |
select(offense_play, defense_play, down, distance, play_type, yards_gained) %>% | |
head() | |
glimpse(pbp_2019) | |
levels(factor(pbp_2019$play_type)) | |
pbp_2019 %>% count(play_type, sort = TRUE) | |
# filter for rushing and passing plays | |
plays <- pbp_2019 %>% filter(rush == 1 | pass == 1) | |
# selected subset of teams | |
selected_teams <- c("Florida State","Florida", | |
"Clemson","Miami","Louisville", | |
"NC State","Boston College","Syracuse", | |
"Notre Dame","Virginia Tech","Virginia", | |
"Boise State","Duke","Wake Forest", | |
"North Carolina","Pittsburgh", | |
"Georgia Tech","West Virginia","Memphis") | |
# group by offensive team | |
offense <- plays %>% group_by(offense_play) %>% | |
summarise(epa.pass.off = mean(EPA[pass==1]), | |
epa.rush.off = mean(EPA[rush==1]), | |
num.plays = n()) %>% | |
filter(num.plays > 300, | |
offense_play %in% selected_teams) | |
# group by defensive team | |
defense <- plays %>% group_by(defense_play) %>% | |
summarise(epa.pass.def = mean(EPA[pass==1]), | |
epa.rush.def = mean(EPA[rush==1]), | |
num.plays = n()) %>% | |
filter(num.plays > 300, | |
defense_play %in% selected_teams) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cfblogos <- read.csv("https://raw.githubusercontent.com/saiemgilani/NCAA_FB_EPA/master/logos.csv") %>% select(school, logo) | |
#Offensive Team Pass/Rush EPA | |
team_epa <- left_join(offense, defense, by = c("offense_play" = "defense_play")) | |
team.epa <- team_epa %>% left_join(cfblogos, by = c("offense_play" = "school")) | |
head(team.epa) | |
team.epa %>% ggplot(aes(x=epa.rush.off, y=epa.pass.off)) + | |
geom_image(image = team.epa$logo, asp = 16/9) + | |
geom_vline(xintercept = mean(team.epa$epa.rush.off), linetype = "dashed", color = "blue") + | |
geom_hline(yintercept = mean(team.epa$epa.pass.off), linetype = "dashed", color = "blue") + | |
labs(x = "Offensive Rush EPA/Play", y= "Offensive Pass EPA/Play", | |
title = "2019 NCAA Team Offensive Efficiency", | |
caption = "Data: @CFB_data with #cfbscrapR") + | |
theme_bw() + | |
theme(axis.title = element_text(size = 12), | |
axis.text = element_text(size = 10), | |
plot.title = element_text(size = 14), | |
plot.subtitle = element_text(size = 12), | |
plot.caption = element_text(size = 10)) | |
ggsave('team_off_epa_logos.png') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Defensive Team Pass/Rush EPA | |
team_depa <- left_join(defense, offense, by = c("defense_play" = "offense_play")) | |
team.depa <- team_depa %>% left_join(cfblogos, by = c("defense_play" = "school")) | |
head(team.depa) | |
team.depa %>% ggplot(aes(x=epa.rush.def, y=epa.pass.def)) + | |
geom_image(image = team.depa$logo, asp = 16/9) + | |
geom_vline(xintercept = mean(team.depa$epa.rush.def), linetype = "dashed", color = "blue") + | |
geom_hline(yintercept = mean(team.depa$epa.pass.def), linetype = "dashed", color = "blue") + | |
labs(x = "Defensive Rush EPA/Play", y= "Defensive Pass EPA/Play", | |
title = "2019 NCAA Team Defensive Efficiency", | |
caption = "Data: @CFB_data with #cfbscrapR") + | |
theme_bw() + | |
theme(axis.title = element_text(size = 12), | |
axis.text = element_text(size = 10), | |
plot.title = element_text(size = 14), | |
plot.subtitle = element_text(size = 12), | |
plot.caption = element_text(size = 10)) | |
ggsave('team_def_epa_logos.png') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################## | |
# If this is your first time working with R, | |
# tidyverse, devtools, remotes, ggimage, cfbscrapR, or GT: | |
# Uncomment (highlight and Ctrl-Shift-C) | |
# and install by running (highlight and Ctrl-C) the below six lines as necessary | |
# install.packages("tidyverse") | |
# install.packages("devtools") | |
# install.packages("remotes") | |
# install.packages("ggimage") | |
# devtools::install_github("meysubb/cfbscrapR") | |
# remotes::install_github("rstudio/gt") | |
# set your working directory (in other words, your working folder location on your computer) | |
# using something like: | |
# setwd("C:/Users/username/Documents/NCAA_FB_EPA") | |
library(tidyverse) | |
library(ggimage) | |
library(cfbscrapR) | |
library(gt) | |
# This function will take a minute or two to run | |
# You only need to run it once, once completed, feel free to comment out | |
# If you need to exit the session and want to store the data, | |
# see write.csv comment below | |
pbp_2019 <- data.frame() | |
for(i in 1:15){ | |
data <- cfb_pbp_data(year = 2019, season_type = "both", week = i, epa_wpa = TRUE) %>% | |
mutate(week = i) | |
df <- data.frame(data) | |
pbp_2019 <- bind_rows(pbp_2019, df) | |
} | |
# If you would like to save to csv run the line below | |
# write.csv(pbp_2019,"pbp_2019.csv",row.names=F) | |
# warning, if full year pbp, ~91mb | |
head(pbp_2019) | |
pbp_2019 %>% | |
select(offense_play, defense_play, down, distance, play_type, yards_gained) %>% | |
head() | |
glimpse(pbp_2019) | |
levels(factor(pbp_2019$play_type)) | |
pbp_2019 %>% count(play_type, sort = TRUE) | |
# filter for rushing and passing plays | |
plays <- pbp_2019 %>% filter(rush == 1 | pass == 1) | |
# selected subset of teams | |
selected_teams <- c("Florida State","Florida", | |
"Clemson","Miami","Louisville", | |
"NC State","Boston College","Syracuse", | |
"Notre Dame","Virginia Tech","Virginia", | |
"Boise State","Duke","Wake Forest", | |
"North Carolina","Pittsburgh", | |
"Georgia Tech","West Virginia","Memphis") | |
# group by offensive team | |
offense <- plays %>% group_by(offense_play) %>% | |
summarise(epa.pass.off = mean(EPA[pass==1]), | |
epa.rush.off = mean(EPA[rush==1]), | |
num.plays = n()) %>% | |
filter(num.plays > 300, | |
offense_play %in% selected_teams) | |
# group by defensive team | |
defense <- plays %>% group_by(defense_play) %>% | |
summarise(epa.pass.def = mean(EPA[pass==1]), | |
epa.rush.def = mean(EPA[rush==1]), | |
num.plays = n()) %>% | |
filter(num.plays > 300, | |
defense_play %in% selected_teams) | |
cfblogos <- read.csv("https://raw.githubusercontent.com/saiemgilani/NCAA_FB_EPA/master/logos.csv") %>% select(school, logo) | |
#Offensive Team Pass/Rush EPA | |
team_epa <- left_join(offense, defense, by = c("offense_play" = "defense_play")) | |
team.epa <- team_epa %>% left_join(cfblogos, by = c("offense_play" = "school")) | |
head(team.epa) | |
team.epa %>% ggplot(aes(x=epa.rush.off, y=epa.pass.off)) + | |
geom_image(image = team.epa$logo, asp = 16/9) + | |
geom_vline(xintercept = mean(team.epa$epa.rush.off), linetype = "dashed", color = "blue") + | |
geom_hline(yintercept = mean(team.epa$epa.pass.off), linetype = "dashed", color = "blue") + | |
labs(x = "Offensive Rush EPA/Play", y= "Offensive Pass EPA/Play", | |
title = "2019 NCAA Team Offensive Efficiency", | |
caption = "Data: @CFB_data with #cfbscrapR") + | |
theme_bw() + | |
theme(axis.title = element_text(size = 12), | |
axis.text = element_text(size = 10), | |
plot.title = element_text(size = 14), | |
plot.subtitle = element_text(size = 12), | |
plot.caption = element_text(size = 10)) | |
ggsave('team_off_epa_logos.png') | |
#Defensive Team Pass/Rush EPA | |
team_depa <- left_join(defense, offense, by = c("defense_play" = "offense_play")) | |
team.depa <- team_depa %>% left_join(cfblogos, by = c("defense_play" = "school")) | |
head(team.depa) | |
team.depa %>% ggplot(aes(x=epa.rush.def, y=epa.pass.def)) + | |
geom_image(image = team.depa$logo, asp = 16/9) + | |
geom_vline(xintercept = mean(team.depa$epa.rush.def), linetype = "dashed", color = "blue") + | |
geom_hline(yintercept = mean(team.depa$epa.pass.def), linetype = "dashed", color = "blue") + | |
labs(x = "Defensive Rush EPA/Play", y= "Defensive Pass EPA/Play", | |
title = "2019 NCAA Team Defensive Efficiency", | |
caption = "Data: @CFB_data with #cfbscrapR") + | |
theme_bw() + | |
theme(axis.title = element_text(size = 12), | |
axis.text = element_text(size = 10), | |
plot.title = element_text(size = 14), | |
plot.subtitle = element_text(size = 12), | |
plot.caption = element_text(size = 10)) | |
ggsave('team_def_epa_logos.png') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment