library(outbreaks) # for data
library(trending) # for trend fitting
library(dplyr, warn.conflicts = FALSE) # for data manipulation
# load data
data(covid19_england_nhscalls_2020)
# define a model
model <- glm_nb_model(count ~ day + weekday)
# select 6 weeks of data (from a period when the prevalence was decreasing)
last_date <- as.Date("2020-05-28")
first_date <- last_date - 8*7
pathways_recent <-
covid19_england_nhscalls_2020 %>%
filter(date >= first_date, date <= last_date) %>%
group_by(date, day, weekday) %>%
summarise(count = sum(count), .groups = "drop")
# split data for fitting and prediction
dat <-
pathways_recent %>%
group_by(date <= first_date + 6*7) %>%
group_split()
fitting_data <- dat[[2]]
pred_data <- select(dat[[1]], date, day, weekday)
fitted_model <- fit(model, fitting_data)
# default
fitted_model %>%
predict(pred_data) %>%
glimpse()
#> Rows: 14
#> Columns: 8
#> $ date <date> 2020-05-15, 2020-05-16, 2020-05-17, 2020-05-18, 2020-05-19, …
#> $ day <int> 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71
#> $ weekday <fct> rest_of_week, weekend, weekend, monday, rest_of_week, rest_of…
#> $ estimate <dbl> 12682.379, 10624.988, 10261.987, 13839.821, 11036.028, 10658.…
#> $ lower_ci <dbl> 11389.734, 9298.983, 8955.560, 11749.030, 9782.389, 9416.365,…
#> $ upper_ci <dbl> 14121.729, 12140.078, 11758.995, 16302.677, 12450.323, 12065.…
#> $ lower_pi <dbl> 8750, 7309, 7152, 9534, 7663, 7169, 7202, 6932, 5656, 5563, 7…
#> $ upper_pi <dbl> 17091, 14588, 14101, 19191, 15076, 14380, 14191, 13642, 11453…
# without prediction intervals
fitted_model %>%
predict(pred_data, add_pi = FALSE) %>%
glimpse()
#> Rows: 14
#> Columns: 6
#> $ date <date> 2020-05-15, 2020-05-16, 2020-05-17, 2020-05-18, 2020-05-19, …
#> $ day <int> 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71
#> $ weekday <fct> rest_of_week, weekend, weekend, monday, rest_of_week, rest_of…
#> $ estimate <dbl> 12682.379, 10624.988, 10261.987, 13839.821, 11036.028, 10658.…
#> $ lower_ci <dbl> 11389.734, 9298.983, 8955.560, 11749.030, 9782.389, 9416.365,…
#> $ upper_ci <dbl> 14121.729, 12140.078, 11758.995, 16302.677, 12450.323, 12065.…
# without uncertainty
fitted_model %>%
predict(pred_data, uncertainty = FALSE) %>%
glimpse()
#> Rows: 14
#> Columns: 8
#> $ date <date> 2020-05-15, 2020-05-16, 2020-05-17, 2020-05-18, 2020-05-19, …
#> $ day <int> 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71
#> $ weekday <fct> rest_of_week, weekend, weekend, monday, rest_of_week, rest_of…
#> $ estimate <dbl> 12682.379, 10624.988, 10261.987, 13839.821, 11036.028, 10658.…
#> $ lower_ci <dbl> 11389.734, 9298.983, 8955.560, 11749.030, 9782.389, 9416.365,…
#> $ upper_ci <dbl> 14121.729, 12140.078, 11758.995, 16302.677, 12450.323, 12065.…
#> $ lower_pi <dbl> 9070, 7401, 7198, 9615, 7675, 7489, 7136, 6847, 5702, 5393, 7…
#> $ upper_pi <dbl> 17184, 14557, 14030, 18996, 15255, 14584, 14255, 13568, 11433…
# non-bootstraped (parametric) prediction intervals
fitted_model %>%
predict(pred_data, simulate_pi = FALSE) %>%
glimpse()
#> Rows: 14
#> Columns: 8
#> $ date <date> 2020-05-15, 2020-05-16, 2020-05-17, 2020-05-18, 2020-05-19, …
#> $ day <int> 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71
#> $ weekday <fct> rest_of_week, weekend, weekend, monday, rest_of_week, rest_of…
#> $ estimate <dbl> 12682.379, 10624.988, 10261.987, 13839.821, 11036.028, 10658.…
#> $ lower_ci <dbl> 11389.734, 9298.983, 8955.560, 11749.030, 9782.389, 9416.365,…
#> $ upper_ci <dbl> 14121.729, 12140.078, 11758.995, 16302.677, 12450.323, 12065.…
#> $ lower_pi <dbl> 8107, 6618, 6373, 8363, 6962, 6701, 6450, 6208, 5079, 4889, 6…
#> $ upper_pi <dbl> 18870, 16223, 15714, 21784, 16638, 16124, 15626, 15145, 12992…