library(NFLSimulatoR)
library(knitr)
library(foreach)
library(doParallel)
library(dplyr)
The play-by-play data used for the strategy simulations comes from nflscrapR
and is read into R using the NFLSimulatoR
function
download_nflscrapR_data()
. The prep_pbp_data()
function will then clean and prepare the data for use in the
sample_drives()
function. Furthermore, we wanted our
results to represent “normal” NFL drives where score differential and
time remaining would not influence play calling. This was accomplished
by filtering the play-by-play data to require a score differential of
less than 28 points and greater than 2 minutes remaining in a half.
<- dplyr::bind_cols(
df ::load_pbp(2018),
nflfastR::load_pbp(2019))
nflfastR
<- df %>%
pbp_data prep_pbp_data(.) %>%
filter(abs(score_differential) < 28, half_seconds_remaining/60 > 2)
<- pbp_data %>%
pbp_data_18 filter(.,substr(game_date,0,4) == 2018)
<- pbp_data %>%
pbp_data_19 filter(.,substr(game_date,0,4) == 2019)
With the sample_drives()
function, drives can be
simulated according to a chosen proportion of pass plays by selecting
the passes_rushes
strategy. To improve performance when
running thousands of simulated drives, the foreach
and
doParallel
packages are used to run the simulations on
multiple cores. The following simulates drives for a pass proportion of
0 to 1 (by .1) and stores the results in a data frame
results
.
# Pass Proportion 2019
<- NULL
drives <- NULL
results_pass_19 <- NULL
df_drives registerDoParallel(cores = 4)
<- seq(0,1, by = .1)
prop <- foreach (i= 1:11, .combine = rbind, .packages = c("NFLSimulatoR", "progress","dplyr", "tidyverse")) %dopar% {
results_pass_19 set.seed(i)
<- sample_drives(n_sims = 10,
drives from_yard_line = 25,
play_by_play_data = pbp_data_19,
strategy = "passes_rushes",
single_drive = T,
progress = F,
prop_passes = prop[i])
<- drives %>%
df_drives #add additional identifiers below as needed i.e. year, etc
mutate(proportion = prop[i],year = 19)
}
We can also compare various fourth down strategies using the
sample_drives()
function. Simply pass “fourth_downs” to the
strategy
argument and a vector storing selected strategies
to the fourth_down_strategy
argument.
# 4th down strategies
<- NULL
drives <- NULL
results_fourths_1 <- NULL
df_drives registerDoParallel(cores = 4)
<- c("always_go_for_it","empirical","exp_pts","never_go_for_it", "yds_less_than")
strats <- foreach (i = 3:4, .combine = rbind, .packages = c("NFLSimulatoR", "progress","dplyr", "tidyverse")) %dopar% {
results_fourths_1 set.seed(i)
<- sample_drives(n_sims = 10000,
drives from_yard_line = 25,
play_by_play_data = pbp_data,
strategy = "fourth_downs",
fourth_down_strategy = strats[i],
single_drive = T,
progress = F
)<- drives %>%
df_drives #add additional identifiers below as needed i.e. year, etc
mutate(Scenario = strats[i])
}
To further analyze passing vs. rushing we can run the simulations
based on a team’s ability to pass the football. This is accomplished by
dividing the play-by-play data into groups three groups (low, mid, high)
based on a team’s respective Passer Rating (RTG) relative to the league
average over the last three seasons (2017-2019). This file can be
downloaded from Google
Drive here. The six datasets, three for 2018 and 2019 respectively,
are stored in the list object RTG_list
.
# RTG Data
# Team RTG read in (2017-2019)
<- read.csv("path/to/file/given/above/Team_Passing_Offense.csv")
RTG
#Store Tercile Cutoffs (2017-2019)
<- quantile(RTG$Rate,probs = c(0:3/3))
cutoffs
# Passer Rate Terciles
<- list()
RTG_list <- c("2018","2019")
years <- c("Low","Mid","High")
terciles for (j in 1:2){
<- list()
list_year for (i in 1:3){
<- RTG %>%
teams filter(.,Year == years[j],
>= cutoffs[i] & Rate < cutoffs[(i+1)] ) %>%
Rate select(.,Team)
paste(terciles[i],years[j],sep = "_")]] <- pbp_data %>%
list_year[[filter(.,substr(game_date,0,4) == years[j],
%in% as.matrix(teams))
posteam
}<- append(RTG_list,list_year)
RTG_list }
Using the same structure as the pass vs. rush simulations above, we can simulate drives using each of the 6 subsets of data.
# Passer Rating - RTG
<- NULL
drives <- NULL
df_drives <- NULL
RTG_thirds_sims registerDoParallel(cores = 4)
<- seq(0,1, by = .1)
prop <- foreach (j = 1:6, .combine = rbind ) %:%
RTG_thirds_sims foreach (i= 1:11, .combine = rbind, .packages = c("NFLSimulatoR", "progress","dplyr", "tidyverse")) %dopar% {
set.seed(i)
<- sample_drives(n_sims = 10,
drives from_yard_line = 25,
play_by_play_data = RTG_list[[j]],
strategy = "passes_rushes",
single_drive = T,
progress = F,
prop_passes = prop[i])
<- drives %>%
df_drives #add additional identifiers below as needed i.e. year, etc
mutate(proportion = prop[i],
RTG = names(RTG_list[j]),
year = substr(RTG, nchar(RTG)-1, nchar(RTG)))
}