library(sspm)
#> Loading required package: sf
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
#> Loading required package: mgcv
#> Loading required package: nlme
#> This is mgcv 1.8-40. For overview type 'help("mgcv-package")'.
library(mgcv)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following object is masked from 'package:nlme':
#>
#> collapse
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
The following example shows the typical sspm
workflow.
We will use simulated data.
sfa_boundaries#> Simple feature collection with 4 features and 1 field
#> Geometry type: MULTIPOLYGON
#> Dimension: XY
#> Bounding box: xmin: -64.18658 ymin: 46.00004 xmax: -46.6269 ymax: 60.84333
#> Geodetic CRS: WGS 84
#> sfa geometry
#> 1 4 MULTIPOLYGON (((-59.36453 5...
#> 2 5 MULTIPOLYGON (((-55 53.75, ...
#> 3 6 MULTIPOLYGON (((-49.9269 49...
#> 4 7 MULTIPOLYGON (((-54.48779 4...
borealis_simulated#> # A tibble: 1,800 × 8
#> year_f sfa weight_per_km2 temp_at_bottom lon_dec lat_dec row uniqueID
#> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <int> <chr>
#> 1 1995 7 14.7 1.31 -62.8 59.2 1 y1995s7r1
#> 2 1995 7 366. 0 -56.7 55.2 2 y1995s7r2
#> 3 1995 7 108. 0.982 -60.6 56.9 3 y1995s7r3
#> 4 1995 7 62.3 0 -58.0 56.4 4 y1995s7r4
#> 5 1995 7 0 0.698 -59.0 57.1 5 y1995s7r5
#> 6 1995 7 0 1.45 -58.8 56.3 6 y1995s7r6
#> 7 1995 7 236. 1.05 -51.1 50.7 7 y1995s7r7
#> 8 1995 7 68.2 0 -54.1 54.5 8 y1995s7r8
#> 9 1995 7 93.6 1.86 -54.9 53.7 9 y1995s7r9
#> 10 1995 7 25.4 1.74 -54.7 50.5 10 y1995s7r10
#> # … with 1,790 more rows
predator_simulated#> # A tibble: 10,200 × 7
#> year_f sfa weight_per_km2 lon_dec lat_dec row uniqueID
#> <fct> <chr> <dbl> <dbl> <dbl> <int> <chr>
#> 1 1995 7 9.90 -61.0 60.0 1 y1995s7r1
#> 2 1995 7 128. -60.0 57.6 2 y1995s7r2
#> 3 1995 7 358. -58.0 55.6 3 y1995s7r3
#> 4 1995 7 0 -58.0 55.9 4 y1995s7r4
#> 5 1995 7 103. -60.5 57.2 5 y1995s7r5
#> 6 1995 7 57.3 -53.7 51.8 6 y1995s7r6
#> 7 1995 7 58.6 -53.6 50.7 7 y1995s7r7
#> 8 1995 7 0 -50.2 50.1 8 y1995s7r8
#> 9 1995 7 0 -54.6 54.0 9 y1995s7r9
#> 10 1995 7 131. -52.8 51.0 10 y1995s7r10
#> # … with 10,190 more rows
catch_simulated#> # A tibble: 2,020 × 7
#> year_f sfa catch lon_dec lat_dec row uniqueID
#> <fct> <chr> <dbl> <dbl> <dbl> <int> <chr>
#> 1 1991 4 2527. -62.2 59.5 1 y1991s4r1
#> 2 1991 4 4194. -61.0 58.0 2 y1991s4r2
#> 3 1991 4 7438. -62.4 60.3 3 y1991s4r3
#> 4 1991 4 0 -58.2 55.2 4 y1991s4r4
#> 5 1991 4 3837. -57.8 55.5 5 y1991s4r5
#> 6 1991 4 3196. -58.6 56.3 6 y1991s4r6
#> 7 1991 4 3214. -57.9 55.7 7 y1991s4r7
#> 8 1991 4 0 -56.4 54.4 8 y1991s4r8
#> 9 1991 4 539. -58.9 55.6 9 y1991s4r9
#> 10 1991 4 4234. -55.2 53.2 10 y1991s4r10
#> # … with 2,010 more rows
sspm
workflow is to create a
sspm_boundary
from an sf
object, providing the
boundary
that delineates the boundary regions. The object
can then be plotted with spm_plot
(as can most
sspm
objects).<- spm_as_boundary(boundaries = sfa_boundaries,
bounds boundary = "sfa")
plot(bounds)
data.frame
,
tibble
or sf
object into a
sspm_data
object, with a few other pieces of relevant
information, such as the name, dataset type (biomass, predictor or
catch, depending on the type of information contained), time column and
coordinates column (i not sf
) and unique row identifier.
Here we wrap the borealis dataset that contains the biomass
information.<-
biomass_dataset spm_as_dataset(borealis_simulated, name = "borealis",
density = "weight_per_km2",
time = "year_f",
coords = c('lon_dec','lat_dec'),
uniqueID = "uniqueID")
#> ℹ Casting data matrix into simple feature collection using columns: lon_dec, lat_dec
#> ! Warning: sspm is assuming WGS 84 CRS is to be used for casting
biomass_dataset#>
#> ‒‒ Dataset borealis ‒‒
#> → [1800 rows, 9 columns]
#> → Density : weight_per_km2
#> → Time : year_f
<-
predator_dataset spm_as_dataset(predator_simulated, name = "all_predators",
density = "weight_per_km2",
time = "year_f",
coords = c("lon_dec", "lat_dec"),
uniqueID = "uniqueID")
#> ℹ Casting data matrix into simple feature collection using columns: lon_dec, lat_dec
#> ! Warning: sspm is assuming WGS 84 CRS is to be used for casting
predator_dataset#>
#> ‒‒ Dataset all_predators ‒‒
#> → [10200 rows, 8 columns]
#> → Density : weight_per_km2
#> → Time : year_f
sspm
workflow relies on the discretization of the
boundary objects, the default method being voronoi tesselation.<- bounds %>%
bounds_voronoi spm_discretize(method = "tesselate_voronoi",
with = biomass_dataset,
nb_samples = 30)
#> ℹ Discretizing using method tesselate_voronoi
bounds_voronoi#>
#> ‒‒ Boundaries (Discrete) ‒‒
#> → [4 rows, 3 columns]
#> ★ Points — [120 features, 10 columns]
#> ★ Patches — [92 features, 4 columns]
#> → Column : sfa
#> → Area : area_sfa
The other available method is triangulate_delaunay
for
delaunay triangulation. Here the a
argument is used to set
the size of the mesh (see RTriangle::triangulate
for more
details).
## Not run
<- bounds %>%
bounds_delaunay spm_discretize(method = "triangulate_delaunay", a = 1, q = 30)
bounds_delaunay
plot(bounds_voronoi)
## Not run
plot(bounds_delaunay)
spm_patches()
and spm_points()
.spm_patches(bounds_voronoi)
#> Simple feature collection with 92 features and 3 fields
#> Geometry type: POLYGON
#> Dimension: XY
#> Bounding box: xmin: -64.18658 ymin: 46.00004 xmax: -46.6269 ymax: 60.84488
#> Geodetic CRS: WGS 84
#> # A tibble: 92 × 4
#> sfa patch_id patch_area geometry
#> * <fct> <fct> [km^2] <POLYGON [°]>
#> 1 4 P1 5867. ((-62.56544 59.47292, -62.51885 60.2501, -62.42353…
#> 2 4 P2 6294. ((-61.15825 60.5856, -61.18083 60.61861, -61.16696…
#> 3 4 P3 1594. ((-61.98352 59.91756, -62.14032 59.90335, -62.3916…
#> 4 4 P4 1882. ((-61.60726 58.17744, -61.55968 58.21747, -61.5430…
#> 5 4 P5 1890. ((-61.54308 58.72455, -60.98005 58.90554, -60.9787…
#> 6 4 P6 2149. ((-62.81971 59.18792, -62.54312 59.42411, -62.5654…
#> 7 4 P7 2461. ((-61.55968 58.21747, -60.90082 58.30673, -60.7969…
#> 8 4 P8 1580. ((-60.85729 60.08226, -60.87677 60.07412, -61.2755…
#> 9 4 P9 7244. ((-60.30623 57.88979, -60.09616 58.23633, -60.1909…
#> 10 4 P10 1570. ((-59.93021 58.87649, -60.19091 58.62062, -60.0961…
#> # … with 82 more rows
spm_points(bounds_voronoi)
#> Simple feature collection with 120 features and 9 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -62.95014 ymin: 46.05739 xmax: -47.39935 ymax: 60.81292
#> Geodetic CRS: WGS 84
#> # A tibble: 120 × 10
#> # Groups: sfa [4]
#> year_f weight_per_km2 temp_at_bottom lon_dec lat_dec row uniqueID
#> * <fct> [kg/km^2] <dbl> <dbl> <dbl> <int> <chr>
#> 1 1995 7117. 3.88 -61.7 60.0 21 y1995s6r21
#> 2 1995 0 2.33 -59.2 57.3 23 y1995s6r23
#> 3 1995 0 1.03 -54.0 46.1 40 y1995s6r40
#> 4 1996 553. 2.97 -59.5 56.0 65 y1996s7r65
#> 5 1996 6538. 4.42 -61.3 60.8 102 y1996s5r102
#> 6 1996 2427. 2.17 -55.4 54.4 105 y1996s5r105
#> 7 1996 1945. 0 -48.4 47.2 138 y1996s4r138
#> 8 1997 0 0.649 -55.3 54.7 144 y1997s7r144
#> 9 1997 1430. 0 -54.4 50.2 149 y1997s7r149
#> 10 1997 0 5.18 -60.9 59.6 182 y1997s4r182
#> # … with 110 more rows, and 3 more variables: geometry <POINT [°]>, sfa <fct>,
#> # area_sfa [km^2]
sspm
model, by using spatial-temporal
smoothers, by passing each dataset through spm_smooth
. Here
we first smooth weight_per_km2
as well as
temp_at_bottom
. Note that the boundary column
sfa
can be used in the formula as the data will be first
joined to the provided boundaries.<- biomass_dataset %>%
biomass_smooth spm_smooth(weight_per_km2 ~ sfa + smooth_time(by = sfa) +
smooth_space() +
smooth_space_time(),
boundaries = bounds_voronoi,
family=tw)%>%
spm_smooth(temp_at_bottom ~ smooth_time(by=sfa, xt = NULL) +
smooth_space() +
smooth_space_time(xt = NULL),
family=gaussian)
#> ℹ Fitting formula: weight_per_km2 ~ sfa + smooth_time(by = sfa) + smooth_space() + smooth_space_time() for dataset 'borealis'
#> ℹ Note: response variable temp_at_bottom is NOT a biomass density variable
#> ℹ Fitting formula: temp_at_bottom ~ smooth_time(by = sfa, xt = NULL) + smooth_space() + smooth_space_time(xt = NULL) for dataset 'borealis'
biomass_smooth#>
#> ‒‒ Dataset borealis (Mapped) ‒‒
#> → [1801 rows, 12 columns]
#> → Density : weight_per_km2
#> → Time : year_f
#> → Smoothed data : [2208 rows, 8 columns]
#> ★ Smoothed vars: temp_at_bottom — weight_per_km2
plot(biomass_smooth, var = "weight_per_km2", log = FALSE)
#> Registered S3 method overwritten by 'ggforce':
#> method from
#> scale_type.units units
You can also make a spatial plot
plot(biomass_smooth, var = "weight_per_km2", use_sf = TRUE)
weight_per_km2
variable in the
predator data.<- predator_dataset %>%
predator_smooth spm_smooth(weight_per_km2 ~ smooth_time() + smooth_space(),
boundaries = bounds_voronoi,
drop.unused.levels = F, family=tw, method= "fREML")
#> ℹ Fitting formula: weight_per_km2 ~ smooth_time() + smooth_space() for dataset 'all_predators'
predator_smooth#>
#> ‒‒ Dataset all_predators (Mapped) ‒‒
#> → [10201 rows, 11 columns]
#> → Density : weight_per_km2
#> → Time : year_f
#> → Smoothed data : [3680 rows, 7 columns]
#> ★ Smoothed vars: weight_per_km2
<-
catch_dataset spm_as_dataset(catch_simulated, name = "catch_data",
biomass = "catch",
time = "year_f",
uniqueID = "uniqueID",
coords = c("lon_dec", "lat_dec"))
#> ℹ Casting data matrix into simple feature collection using columns: lon_dec, lat_dec
#> ! Warning: sspm is assuming WGS 84 CRS is to be used for casting
catch_dataset#>
#> ‒‒ Dataset catch_data ‒‒
#> → [2020 rows, 8 columns]
#> → Biomass : catch
#> → Time : year_f
spm_aggregate
functions. Here we use
spm_aggregate_catch
:<-
biomass_smooth_w_catch spm_aggregate_catch(biomass = biomass_smooth,
catch = catch_dataset,
biomass_variable = "weight_per_km2",
catch_variable = "catch",
fill = mean)
#> ℹ Offsetting biomass with catch data using columns: weight_per_km2, catch
biomass_smooth_w_catch#>
#> ‒‒ Dataset borealis (Mapped) ‒‒
#> → [1801 rows, 12 columns]
#> → Density : weight_per_km2
#> → Time : year_f
#> → Smoothed data : [2208 rows, 13 columns]
#> ★ Smoothed vars: temp_at_bottom — weight_per_km2
#> ★ Vars with catch: weight_per_km2_borealis_with_catch
sspm
model object, using one dataset of type biomass, one dataset of type
predictor and (optionnaly) a dataset of type catch.<- sspm(biomass = biomass_smooth_w_catch,
sspm_model predictors = predator_smooth)
#> ℹ Joining smoothed data from all datasets
sspm_model#>
#> ‒‒ Model (2 datasets) ‒‒
#> → Smoothed data : [2208 rows, 14 columns]
#> ★ Smoothed vars: temp_at_bottom — weight_per_km2_all_predators — weight_per_km2_borealis
#> ★ Vars with catch: weight_per_km2_borealis_with_catch
spm_split
.<- sspm_model %>%
sspm_model spm_split(year_f %in% c(1990:2017))
sspm_model#>
#> ‒‒ Model (2 datasets) ‒‒
#> → Smoothed data : [2208 rows, 15 columns] / [2116 train, 92 test]
#> ★ Smoothed vars: temp_at_bottom — weight_per_km2_all_predators — weight_per_km2_borealis
#> ★ Vars with catch: weight_per_km2_borealis_with_catch
spm_lag
.<- sspm_model %>%
sspm_model spm_lag(vars = c("weight_per_km2_borealis_with_catch",
"weight_per_km2_all_predators"),
n = 1)
sspm_model#>
#> ‒‒ Model (2 datasets) ‒‒
#> → Smoothed data : [2208 rows, 17 columns] / [2116 train, 92 test]
#> ★ Smoothed vars: temp_at_bottom — weight_per_km2_all_predators — weight_per_km2_borealis
#> ★ Vars with catch: weight_per_km2_borealis_with_catch — weight_per_km2_borealis_with_catch_lag_1
#> ★ lagged vars: weight_per_km2_all_predators_lag_1 — weight_per_km2_borealis_with_catch_lag_1
spm
.<- sspm_model %>%
sspm_model_fit spm(log_productivity ~ sfa +
+
weight_per_km2_all_predators_lag_1 smooth_space(by = weight_per_km2_borealis_with_catch) +
smooth_space(),
family = mgcv::scat)
#> ℹ Fitting SPM formula: log_productivity ~ sfa + weight_per_km2_all_predators_lag_1 + smooth_space(by = weight_per_km2_borealis_with_catch) + smooth_space()
#> Warning in estimate.theta(theta, family, G$y, linkinv(eta), scale = scale1, :
#> step failure in theta estimation
#> Warning in estimate.theta(theta, family, G$y, linkinv(eta), scale = scale1, :
#> step failure in theta estimation
#> Warning in estimate.theta(theta, family, G$y, linkinv(eta), scale = scale1, :
#> step failure in theta estimation
sspm_model_fit#>
#> ‒‒ Model fit ‒‒
#> → Smoothed data : [2208 rows, 17 columns] / [2116 train, 92 test]
#> → Fit summary :
#>
#> Family: Scaled t(Inf,0.245)
#> Link function: identity
#>
#> Formula:
#> log_productivity ~ sfa + weight_per_km2_all_predators_lag_1 +
#> s(patch_id, k = 30, bs = "mrf", xt = list(penalty = pen_mat_space),
#> by = weight_per_km2_borealis_with_catch) + s(patch_id,
#> k = 30, bs = "mrf", xt = list(penalty = pen_mat_space))
#>
#> Parametric coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -6.489e-01 6.973e-02 -9.306 < 2e-16 ***
#> sfa5 -5.092e-02 6.242e-02 -0.816 0.414781
#> sfa6 5.858e-02 8.000e-02 0.732 0.464106
#> sfa7 -3.388e-02 8.774e-02 -0.386 0.699443
#> weight_per_km2_all_predators_lag_1 -3.894e-05 1.126e-05 -3.460 0.000552 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Approximate significance of smooth terms:
#> edf Ref.df F p-value
#> s(patch_id):weight_per_km2_borealis_with_catch 15.90 30 28.879 < 2e-16 ***
#> s(patch_id) 6.18 29 0.414 0.000398 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> R-sq.(adj) = 0.547 Deviance explained = 55.3%
#> -REML = 2914 Scale est. = 1 n = 2024
plot(sspm_model_fit, train_test = TRUE, scales = "free")
#> Warning: Removed 92 rows containing missing values (geom_point).
<- predict(sspm_model_fit)
preds head(preds)
#> Simple feature collection with 6 features and 6 fields
#> Geometry type: POLYGON
#> Dimension: XY
#> Bounding box: xmin: -64.18658 ymin: 59.47292 xmax: -62.26589 ymax: 60.84335
#> Geodetic CRS: WGS 84
#> pred_log pred patch_id year_f sfa patch_area
#> 1 0.20905078 1.2325076 P1 1995 4 5866.769 [km^2]
#> 2 -0.01257562 0.9875031 P1 1996 4 5866.769 [km^2]
#> 3 -0.07280029 0.9297865 P1 1997 4 5866.769 [km^2]
#> 4 -0.07996789 0.9231460 P1 1998 4 5866.769 [km^2]
#> 5 -0.11092437 0.8950064 P1 1999 4 5866.769 [km^2]
#> 6 -0.12435432 0.8830669 P1 2000 4 5866.769 [km^2]
#> geometry
#> 1 POLYGON ((-62.56544 59.4729...
#> 2 POLYGON ((-62.56544 59.4729...
#> 3 POLYGON ((-62.56544 59.4729...
#> 4 POLYGON ((-62.56544 59.4729...
#> 5 POLYGON ((-62.56544 59.4729...
#> 6 POLYGON ((-62.56544 59.4729...
We can also get the predictions for biomass by passing the biomass variable name.
<- predict(sspm_model_fit, biomass = "weight_per_km2_borealis")
biomass_preds head(biomass_preds)
#> Simple feature collection with 6 features and 8 fields
#> Geometry type: POLYGON
#> Dimension: XY
#> Bounding box: xmin: -64.18658 ymin: 59.47292 xmax: -62.26589 ymax: 60.84335
#> Geodetic CRS: WGS 84
#> # A tibble: 6 × 9
#> year_f patch_id sfa patch_area biomass_with_catch biomass biomass_density…
#> <dbl> <fct> <fct> [km^2] [kg] [kg] [kg/km^2]
#> 1 1995 P1 4 5867. NA NA NA
#> 2 1996 P1 4 5867. 21584010. 21579593. 3679.
#> 3 1997 P1 4 5867. 15522657. 15520115. 2646.
#> 4 1998 P1 4 5867. 14264679. 14263299. 2431.
#> 5 1999 P1 4 5867. 13831937. 13826776. 2358.
#> 6 2000 P1 4 5867. 13028456. 13024039. 2221.
#> # … with 2 more variables: biomass_density [kg/km^2], geometry <POLYGON [°]>
We can also predict the biomass one step ahead.
<- predict(sspm_model_fit, biomass = "weight_per_km2_borealis",
biomass_one_step next_ts = TRUE)
#> ! Not all vars are lagged vars: weight_per_km2_borealis_with_catch
head(biomass_one_step)
#> Simple feature collection with 6 features and 5 fields
#> Geometry type: POLYGON
#> Dimension: XY
#> Bounding box: xmin: -64.18658 ymin: 58.17744 xmax: -60.85729 ymax: 60.84488
#> Geodetic CRS: WGS 84
#> # A tibble: 6 × 6
#> patch_id year_f sfa biomass patch_area geometry
#> <fct> <dbl> <fct> [kg] [km^2] <POLYGON [°]>
#> 1 P1 2019 4 NA 5867. ((-62.56544 59.47292, -62.51885 60.2…
#> 2 P2 2019 4 NA 6294. ((-61.15825 60.5856, -61.18083 60.61…
#> 3 P3 2019 4 NA 1594. ((-61.98352 59.91756, -62.14032 59.9…
#> 4 P4 2019 4 NA 1882. ((-61.60726 58.17744, -61.55968 58.2…
#> 5 P5 2019 4 NA 1890. ((-61.54308 58.72455, -60.98005 58.9…
#> 6 P6 2019 4 NA 2149. ((-62.81971 59.18792, -62.54312 59.4…
plot(sspm_model_fit, log = T, scales = 'free')
#> Warning: Removed 1 row(s) containing missing values (geom_path).
#> Warning: Removed 92 rows containing missing values (geom_point).
plot(sspm_model_fit, log = T, use_sf = TRUE)
plot(sspm_model_fit, biomass = "weight_per_km2_borealis", scales = "free")
#> Warning: Removed 1 row(s) containing missing values (geom_path).
#> Warning: Removed 92 rows containing missing values (geom_point).
plot(sspm_model_fit, biomass = "weight_per_km2_borealis", use_sf = TRUE)
plot(sspm_model_fit, biomass = "weight_per_km2_borealis",
next_ts = TRUE, aggregate = TRUE, scales = "free", interval = T)
#> ! Not all vars are lagged vars: weight_per_km2_borealis_with_catch
#> Warning in matrix(qrc[-1], nrow(pm$X), ncol(pm$X) - 1, byrow = TRUE): non-empty
#> data for zero-extent matrix
#> Warning: Removed 2 row(s) containing missing values (geom_path).
#> Warning: Removed 8 rows containing missing values (geom_point).
#> Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf
#> Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf