Skip to content

Instantly share code, notes, and snippets.

@saiemgilani
Last active March 27, 2020 16:10
Show Gist options
  • Save saiemgilani/ae9f9f8fb5fdb153d236675e89ebecfd to your computer and use it in GitHub Desktop.
Save saiemgilani/ae9f9f8fb5fdb153d236675e89ebecfd to your computer and use it in GitHub Desktop.
cfbscrapR_tutorial
##################################
# If this is your first time working with R,
# tidyverse, devtools, remotes, ggimage, cfbscrapR, or GT:
# Uncomment (highlight and Ctrl-Shift-C)
# and install by running (highlight and Ctrl-C) the below six lines as necessary
# install.packages("tidyverse")
# install.packages("devtools")
# install.packages("remotes")
# install.packages("ggimage")
# devtools::install_github("meysubb/cfbscrapR")
# remotes::install_github("rstudio/gt")
# set your working directory (in other words, your working folder location on your computer)
# using something like:
# setwd("C:/Users/username/Documents/NCAA_FB_EPA")
library(tidyverse)
library(ggimage)
library(cfbscrapR)
library(gt)
# This function will take a minute or two to run
# You only need to run it once, once completed, feel free to comment out
# If you need to exit the session and want to store the data,
# see write.csv comment below
pbp_2019 <- data.frame()
for(i in 1:15){
data <- cfb_pbp_data(year = 2019, season_type = "both", week = i, epa_wpa = TRUE) %>%
mutate(week = i)
df <- data.frame(data)
pbp_2019 <- bind_rows(pbp_2019, df)
}
# If you would like to save to csv run the line below
# write.csv(pbp_2019,"pbp_2019.csv",row.names=F)
# warning, if full year pbp, ~91mb
head(pbp_2019)
pbp_2019 %>%
select(offense_play, defense_play, down, distance, play_type, yards_gained) %>%
head()
glimpse(pbp_2019)
levels(factor(pbp_2019$play_type))
pbp_2019 %>% count(play_type, sort = TRUE)
# filter for rushing and passing plays
plays <- pbp_2019 %>% filter(rush == 1 | pass == 1)
# selected subset of teams
selected_teams <- c("Florida State","Florida",
"Clemson","Miami","Louisville",
"NC State","Boston College","Syracuse",
"Notre Dame","Virginia Tech","Virginia",
"Boise State","Duke","Wake Forest",
"North Carolina","Pittsburgh",
"Georgia Tech","West Virginia","Memphis")
# group by offensive team
offense <- plays %>% group_by(offense_play) %>%
summarise(epa.pass.off = mean(EPA[pass==1]),
epa.rush.off = mean(EPA[rush==1]),
num.plays = n()) %>%
filter(num.plays > 300,
offense_play %in% selected_teams)
# group by defensive team
defense <- plays %>% group_by(defense_play) %>%
summarise(epa.pass.def = mean(EPA[pass==1]),
epa.rush.def = mean(EPA[rush==1]),
num.plays = n()) %>%
filter(num.plays > 300,
defense_play %in% selected_teams)
cfblogos <- read.csv("https://raw.githubusercontent.com/saiemgilani/NCAA_FB_EPA/master/logos.csv") %>% select(school, logo)
#Offensive Team Pass/Rush EPA
team_epa <- left_join(offense, defense, by = c("offense_play" = "defense_play"))
team.epa <- team_epa %>% left_join(cfblogos, by = c("offense_play" = "school"))
head(team.epa)
team.epa %>% ggplot(aes(x=epa.rush.off, y=epa.pass.off)) +
geom_image(image = team.epa$logo, asp = 16/9) +
geom_vline(xintercept = mean(team.epa$epa.rush.off), linetype = "dashed", color = "blue") +
geom_hline(yintercept = mean(team.epa$epa.pass.off), linetype = "dashed", color = "blue") +
labs(x = "Offensive Rush EPA/Play", y= "Offensive Pass EPA/Play",
title = "2019 NCAA Team Offensive Efficiency",
caption = "Data: @CFB_data with #cfbscrapR") +
theme_bw() +
theme(axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
plot.title = element_text(size = 14),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(size = 10))
ggsave('team_off_epa_logos.png')
#Defensive Team Pass/Rush EPA
team_depa <- left_join(defense, offense, by = c("defense_play" = "offense_play"))
team.depa <- team_depa %>% left_join(cfblogos, by = c("defense_play" = "school"))
head(team.depa)
team.depa %>% ggplot(aes(x=epa.rush.def, y=epa.pass.def)) +
geom_image(image = team.depa$logo, asp = 16/9) +
geom_vline(xintercept = mean(team.depa$epa.rush.def), linetype = "dashed", color = "blue") +
geom_hline(yintercept = mean(team.depa$epa.pass.def), linetype = "dashed", color = "blue") +
labs(x = "Defensive Rush EPA/Play", y= "Defensive Pass EPA/Play",
title = "2019 NCAA Team Defensive Efficiency",
caption = "Data: @CFB_data with #cfbscrapR") +
theme_bw() +
theme(axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
plot.title = element_text(size = 14),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(size = 10))
ggsave('team_def_epa_logos.png')
##################################
# If this is your first time working with R,
# tidyverse, devtools, remotes, ggimage, cfbscrapR, or GT:
# Uncomment (highlight and Ctrl-Shift-C)
# and install by running (highlight and Ctrl-C) the below six lines as necessary
# install.packages("tidyverse")
# install.packages("devtools")
# install.packages("remotes")
# install.packages("ggimage")
# devtools::install_github("meysubb/cfbscrapR")
# remotes::install_github("rstudio/gt")
# set your working directory (in other words, your working folder location on your computer)
# using something like:
# setwd("C:/Users/username/Documents/NCAA_FB_EPA")
library(tidyverse)
library(ggimage)
library(cfbscrapR)
library(gt)
# This function will take a minute or two to run
# You only need to run it once, once completed, feel free to comment out
# If you need to exit the session and want to store the data,
# see write.csv comment below
pbp_2019 <- data.frame()
for(i in 1:15){
data <- cfb_pbp_data(year = 2019, season_type = "both", week = i, epa_wpa = TRUE) %>%
mutate(week = i)
df <- data.frame(data)
pbp_2019 <- bind_rows(pbp_2019, df)
}
# If you would like to save to csv run the line below
# write.csv(pbp_2019,"pbp_2019.csv",row.names=F)
# warning, if full year pbp, ~91mb
head(pbp_2019)
pbp_2019 %>%
select(offense_play, defense_play, down, distance, play_type, yards_gained) %>%
head()
glimpse(pbp_2019)
levels(factor(pbp_2019$play_type))
pbp_2019 %>% count(play_type, sort = TRUE)
# filter for rushing and passing plays
plays <- pbp_2019 %>% filter(rush == 1 | pass == 1)
# selected subset of teams
selected_teams <- c("Florida State","Florida",
"Clemson","Miami","Louisville",
"NC State","Boston College","Syracuse",
"Notre Dame","Virginia Tech","Virginia",
"Boise State","Duke","Wake Forest",
"North Carolina","Pittsburgh",
"Georgia Tech","West Virginia","Memphis")
# group by offensive team
offense <- plays %>% group_by(offense_play) %>%
summarise(epa.pass.off = mean(EPA[pass==1]),
epa.rush.off = mean(EPA[rush==1]),
num.plays = n()) %>%
filter(num.plays > 300,
offense_play %in% selected_teams)
# group by defensive team
defense <- plays %>% group_by(defense_play) %>%
summarise(epa.pass.def = mean(EPA[pass==1]),
epa.rush.def = mean(EPA[rush==1]),
num.plays = n()) %>%
filter(num.plays > 300,
defense_play %in% selected_teams)
cfblogos <- read.csv("https://raw.githubusercontent.com/saiemgilani/NCAA_FB_EPA/master/logos.csv") %>% select(school, logo)
#Offensive Team Pass/Rush EPA
team_epa <- left_join(offense, defense, by = c("offense_play" = "defense_play"))
team.epa <- team_epa %>% left_join(cfblogos, by = c("offense_play" = "school"))
head(team.epa)
team.epa %>% ggplot(aes(x=epa.rush.off, y=epa.pass.off)) +
geom_image(image = team.epa$logo, asp = 16/9) +
geom_vline(xintercept = mean(team.epa$epa.rush.off), linetype = "dashed", color = "blue") +
geom_hline(yintercept = mean(team.epa$epa.pass.off), linetype = "dashed", color = "blue") +
labs(x = "Offensive Rush EPA/Play", y= "Offensive Pass EPA/Play",
title = "2019 NCAA Team Offensive Efficiency",
caption = "Data: @CFB_data with #cfbscrapR") +
theme_bw() +
theme(axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
plot.title = element_text(size = 14),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(size = 10))
ggsave('team_off_epa_logos.png')
#Defensive Team Pass/Rush EPA
team_depa <- left_join(defense, offense, by = c("defense_play" = "offense_play"))
team.depa <- team_depa %>% left_join(cfblogos, by = c("defense_play" = "school"))
head(team.depa)
team.depa %>% ggplot(aes(x=epa.rush.def, y=epa.pass.def)) +
geom_image(image = team.depa$logo, asp = 16/9) +
geom_vline(xintercept = mean(team.depa$epa.rush.def), linetype = "dashed", color = "blue") +
geom_hline(yintercept = mean(team.depa$epa.pass.def), linetype = "dashed", color = "blue") +
labs(x = "Defensive Rush EPA/Play", y= "Defensive Pass EPA/Play",
title = "2019 NCAA Team Defensive Efficiency",
caption = "Data: @CFB_data with #cfbscrapR") +
theme_bw() +
theme(axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
plot.title = element_text(size = 14),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(size = 10))
ggsave('team_def_epa_logos.png')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment