An example with simulated data

library(sspm)
#> Loading required package: sf
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
#> Loading required package: mgcv
#> Loading required package: nlme
#> This is mgcv 1.8-40. For overview type 'help("mgcv-package")'.
library(mgcv)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following object is masked from 'package:nlme':
#> 
#>     collapse
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

The following example shows the typical sspm workflow. We will use simulated data.

sfa_boundaries
#> Simple feature collection with 4 features and 1 field
#> Geometry type: MULTIPOLYGON
#> Dimension:     XY
#> Bounding box:  xmin: -64.18658 ymin: 46.00004 xmax: -46.6269 ymax: 60.84333
#> Geodetic CRS:  WGS 84
#>   sfa                       geometry
#> 1   4 MULTIPOLYGON (((-59.36453 5...
#> 2   5 MULTIPOLYGON (((-55 53.75, ...
#> 3   6 MULTIPOLYGON (((-49.9269 49...
#> 4   7 MULTIPOLYGON (((-54.48779 4...
borealis_simulated
#> # A tibble: 1,800 × 8
#>    year_f sfa   weight_per_km2 temp_at_bottom lon_dec lat_dec   row uniqueID  
#>    <fct>  <chr>          <dbl>          <dbl>   <dbl>   <dbl> <int> <chr>     
#>  1 1995   7               14.7          1.31    -62.8    59.2     1 y1995s7r1 
#>  2 1995   7              366.           0       -56.7    55.2     2 y1995s7r2 
#>  3 1995   7              108.           0.982   -60.6    56.9     3 y1995s7r3 
#>  4 1995   7               62.3          0       -58.0    56.4     4 y1995s7r4 
#>  5 1995   7                0            0.698   -59.0    57.1     5 y1995s7r5 
#>  6 1995   7                0            1.45    -58.8    56.3     6 y1995s7r6 
#>  7 1995   7              236.           1.05    -51.1    50.7     7 y1995s7r7 
#>  8 1995   7               68.2          0       -54.1    54.5     8 y1995s7r8 
#>  9 1995   7               93.6          1.86    -54.9    53.7     9 y1995s7r9 
#> 10 1995   7               25.4          1.74    -54.7    50.5    10 y1995s7r10
#> # … with 1,790 more rows
predator_simulated
#> # A tibble: 10,200 × 7
#>    year_f sfa   weight_per_km2 lon_dec lat_dec   row uniqueID  
#>    <fct>  <chr>          <dbl>   <dbl>   <dbl> <int> <chr>     
#>  1 1995   7               9.90   -61.0    60.0     1 y1995s7r1 
#>  2 1995   7             128.     -60.0    57.6     2 y1995s7r2 
#>  3 1995   7             358.     -58.0    55.6     3 y1995s7r3 
#>  4 1995   7               0      -58.0    55.9     4 y1995s7r4 
#>  5 1995   7             103.     -60.5    57.2     5 y1995s7r5 
#>  6 1995   7              57.3    -53.7    51.8     6 y1995s7r6 
#>  7 1995   7              58.6    -53.6    50.7     7 y1995s7r7 
#>  8 1995   7               0      -50.2    50.1     8 y1995s7r8 
#>  9 1995   7               0      -54.6    54.0     9 y1995s7r9 
#> 10 1995   7             131.     -52.8    51.0    10 y1995s7r10
#> # … with 10,190 more rows
catch_simulated
#> # A tibble: 2,020 × 7
#>    year_f sfa   catch lon_dec lat_dec   row uniqueID  
#>    <fct>  <chr> <dbl>   <dbl>   <dbl> <int> <chr>     
#>  1 1991   4     2527.   -62.2    59.5     1 y1991s4r1 
#>  2 1991   4     4194.   -61.0    58.0     2 y1991s4r2 
#>  3 1991   4     7438.   -62.4    60.3     3 y1991s4r3 
#>  4 1991   4        0    -58.2    55.2     4 y1991s4r4 
#>  5 1991   4     3837.   -57.8    55.5     5 y1991s4r5 
#>  6 1991   4     3196.   -58.6    56.3     6 y1991s4r6 
#>  7 1991   4     3214.   -57.9    55.7     7 y1991s4r7 
#>  8 1991   4        0    -56.4    54.4     8 y1991s4r8 
#>  9 1991   4      539.   -58.9    55.6     9 y1991s4r9 
#> 10 1991   4     4234.   -55.2    53.2    10 y1991s4r10
#> # … with 2,010 more rows

bounds <- spm_as_boundary(boundaries = sfa_boundaries, 
                          boundary = "sfa")

plot(bounds)

biomass_dataset <- 
  spm_as_dataset(borealis_simulated, name = "borealis",
                 density = "weight_per_km2",
                 time = "year_f",
                 coords = c('lon_dec','lat_dec'), 
                 uniqueID = "uniqueID")
#> ℹ  Casting data matrix into simple feature collection using columns: lon_dec, lat_dec
#> !  Warning: sspm is assuming WGS 84 CRS is to be used for casting

biomass_dataset
#> 
#> ‒‒ Dataset borealis ‒‒
#> →  [1800 rows, 9 columns]
#> →  Density : weight_per_km2
#> →  Time : year_f

predator_dataset <- 
  spm_as_dataset(predator_simulated, name = "all_predators", 
                 density = "weight_per_km2",
                 time = "year_f",
                 coords = c("lon_dec", "lat_dec"),
                 uniqueID = "uniqueID")
#> ℹ  Casting data matrix into simple feature collection using columns: lon_dec, lat_dec
#> !  Warning: sspm is assuming WGS 84 CRS is to be used for casting

predator_dataset
#> 
#> ‒‒ Dataset all_predators ‒‒
#> →  [10200 rows, 8 columns]
#> →  Density : weight_per_km2
#> →  Time : year_f

bounds_voronoi <- bounds %>% 
  spm_discretize(method = "tesselate_voronoi",
                 with = biomass_dataset, 
                 nb_samples = 30)
#> ℹ  Discretizing using method tesselate_voronoi

bounds_voronoi
#> 
#> ‒‒ Boundaries (Discrete) ‒‒
#> →  [4 rows, 3 columns]
#>   ★ Points — [120 features, 10 columns]
#>   ★ Patches — [92 features, 4 columns]
#> →  Column : sfa
#> →  Area : area_sfa

The other available method is triangulate_delaunay for delaunay triangulation. Here the a argument is used to set the size of the mesh (see RTriangle::triangulate for more details).

## Not run
bounds_delaunay <- bounds %>%
  spm_discretize(method = "triangulate_delaunay", a = 1, q = 30)
bounds_delaunay

plot(bounds_voronoi)

## Not run
plot(bounds_delaunay)

spm_patches(bounds_voronoi)
#> Simple feature collection with 92 features and 3 fields
#> Geometry type: POLYGON
#> Dimension:     XY
#> Bounding box:  xmin: -64.18658 ymin: 46.00004 xmax: -46.6269 ymax: 60.84488
#> Geodetic CRS:  WGS 84
#> # A tibble: 92 × 4
#>    sfa   patch_id patch_area                                            geometry
#>  * <fct> <fct>        [km^2]                                       <POLYGON [°]>
#>  1 4     P1            5867. ((-62.56544 59.47292, -62.51885 60.2501, -62.42353…
#>  2 4     P2            6294. ((-61.15825 60.5856, -61.18083 60.61861, -61.16696…
#>  3 4     P3            1594. ((-61.98352 59.91756, -62.14032 59.90335, -62.3916…
#>  4 4     P4            1882. ((-61.60726 58.17744, -61.55968 58.21747, -61.5430…
#>  5 4     P5            1890. ((-61.54308 58.72455, -60.98005 58.90554, -60.9787…
#>  6 4     P6            2149. ((-62.81971 59.18792, -62.54312 59.42411, -62.5654…
#>  7 4     P7            2461. ((-61.55968 58.21747, -60.90082 58.30673, -60.7969…
#>  8 4     P8            1580. ((-60.85729 60.08226, -60.87677 60.07412, -61.2755…
#>  9 4     P9            7244. ((-60.30623 57.88979, -60.09616 58.23633, -60.1909…
#> 10 4     P10           1570. ((-59.93021 58.87649, -60.19091 58.62062, -60.0961…
#> # … with 82 more rows
spm_points(bounds_voronoi)
#> Simple feature collection with 120 features and 9 fields
#> Geometry type: POINT
#> Dimension:     XY
#> Bounding box:  xmin: -62.95014 ymin: 46.05739 xmax: -47.39935 ymax: 60.81292
#> Geodetic CRS:  WGS 84
#> # A tibble: 120 × 10
#> # Groups:   sfa [4]
#>    year_f weight_per_km2 temp_at_bottom lon_dec lat_dec   row uniqueID   
#>  * <fct>       [kg/km^2]          <dbl>   <dbl>   <dbl> <int> <chr>      
#>  1 1995            7117.          3.88    -61.7    60.0    21 y1995s6r21 
#>  2 1995               0           2.33    -59.2    57.3    23 y1995s6r23 
#>  3 1995               0           1.03    -54.0    46.1    40 y1995s6r40 
#>  4 1996             553.          2.97    -59.5    56.0    65 y1996s7r65 
#>  5 1996            6538.          4.42    -61.3    60.8   102 y1996s5r102
#>  6 1996            2427.          2.17    -55.4    54.4   105 y1996s5r105
#>  7 1996            1945.          0       -48.4    47.2   138 y1996s4r138
#>  8 1997               0           0.649   -55.3    54.7   144 y1997s7r144
#>  9 1997            1430.          0       -54.4    50.2   149 y1997s7r149
#> 10 1997               0           5.18    -60.9    59.6   182 y1997s4r182
#> # … with 110 more rows, and 3 more variables: geometry <POINT [°]>, sfa <fct>,
#> #   area_sfa [km^2]

biomass_smooth <- biomass_dataset %>%  
  spm_smooth(weight_per_km2 ~ sfa + smooth_time(by = sfa) + 
               smooth_space() + 
               smooth_space_time(),
             boundaries = bounds_voronoi, 
             family=tw)%>% 
  spm_smooth(temp_at_bottom ~ smooth_time(by=sfa, xt = NULL) +
               smooth_space() +
               smooth_space_time(xt = NULL),
             family=gaussian)
#> ℹ  Fitting formula: weight_per_km2 ~ sfa + smooth_time(by = sfa) + smooth_space() + smooth_space_time() for dataset 'borealis'
#> ℹ  Note:  response variable temp_at_bottom is NOT a biomass density variable
#> ℹ  Fitting formula: temp_at_bottom ~ smooth_time(by = sfa, xt = NULL) + smooth_space() + smooth_space_time(xt = NULL) for dataset 'borealis'

biomass_smooth
#> 
#> ‒‒ Dataset borealis (Mapped) ‒‒
#> →  [1801 rows, 12 columns]
#> →  Density : weight_per_km2
#> →  Time : year_f
#> →  Smoothed data : [2208 rows, 8 columns]
#>    ★ Smoothed vars: temp_at_bottom — weight_per_km2

plot(biomass_smooth, var = "weight_per_km2", log = FALSE)
#> Registered S3 method overwritten by 'ggforce':
#>   method           from 
#>   scale_type.units units

plot(biomass_smooth, var = "weight_per_km2", use_sf = TRUE)

predator_smooth <- predator_dataset %>%  
  spm_smooth(weight_per_km2 ~ smooth_time() + smooth_space(),
             boundaries = bounds_voronoi,
             drop.unused.levels = F, family=tw, method= "fREML")
#> ℹ  Fitting formula: weight_per_km2 ~ smooth_time() + smooth_space() for dataset 'all_predators'

predator_smooth
#> 
#> ‒‒ Dataset all_predators (Mapped) ‒‒
#> →  [10201 rows, 11 columns]
#> →  Density : weight_per_km2
#> →  Time : year_f
#> →  Smoothed data : [3680 rows, 7 columns]
#>    ★ Smoothed vars: weight_per_km2

catch_dataset <- 
  spm_as_dataset(catch_simulated, name = "catch_data", 
                 biomass = "catch",
                 time = "year_f", 
                 uniqueID = "uniqueID", 
                 coords = c("lon_dec", "lat_dec"))
#> ℹ  Casting data matrix into simple feature collection using columns: lon_dec, lat_dec
#> !  Warning: sspm is assuming WGS 84 CRS is to be used for casting

catch_dataset
#> 
#> ‒‒ Dataset catch_data ‒‒
#> →  [2020 rows, 8 columns]
#> →  Biomass : catch
#> →  Time : year_f

biomass_smooth_w_catch <- 
  spm_aggregate_catch(biomass = biomass_smooth, 
                      catch = catch_dataset, 
                      biomass_variable = "weight_per_km2",
                      catch_variable = "catch",
                      fill = mean)
#> ℹ  Offsetting biomass with catch data using columns: weight_per_km2, catch

biomass_smooth_w_catch
#> 
#> ‒‒ Dataset borealis (Mapped) ‒‒
#> →  [1801 rows, 12 columns]
#> →  Density : weight_per_km2
#> →  Time : year_f
#> →  Smoothed data : [2208 rows, 13 columns]
#>    ★ Smoothed vars: temp_at_bottom — weight_per_km2
#>    ★ Vars with catch: weight_per_km2_borealis_with_catch

sspm_model <- sspm(biomass = biomass_smooth_w_catch, 
                   predictors = predator_smooth)
#> ℹ  Joining smoothed data from all datasets

sspm_model
#> 
#> ‒‒ Model (2 datasets) ‒‒
#> →  Smoothed data : [2208 rows, 14 columns]
#>    ★ Smoothed vars: temp_at_bottom — weight_per_km2_all_predators — weight_per_km2_borealis
#>    ★ Vars with catch: weight_per_km2_borealis_with_catch

sspm_model <- sspm_model %>% 
  spm_split(year_f %in% c(1990:2017))

sspm_model
#> 
#> ‒‒ Model (2 datasets) ‒‒
#> →  Smoothed data : [2208 rows, 15 columns] / [2116 train, 92 test]
#>    ★ Smoothed vars: temp_at_bottom — weight_per_km2_all_predators — weight_per_km2_borealis
#>    ★ Vars with catch: weight_per_km2_borealis_with_catch

sspm_model <- sspm_model %>% 
  spm_lag(vars = c("weight_per_km2_borealis_with_catch", 
                   "weight_per_km2_all_predators"), 
          n = 1)

sspm_model
#> 
#> ‒‒ Model (2 datasets) ‒‒
#> →  Smoothed data : [2208 rows, 17 columns] / [2116 train, 92 test]
#>    ★ Smoothed vars: temp_at_bottom — weight_per_km2_all_predators — weight_per_km2_borealis
#>    ★ Vars with catch: weight_per_km2_borealis_with_catch — weight_per_km2_borealis_with_catch_lag_1
#>    ★ lagged vars: weight_per_km2_all_predators_lag_1 — weight_per_km2_borealis_with_catch_lag_1

sspm_model_fit <- sspm_model %>% 
  spm(log_productivity ~ sfa +
        weight_per_km2_all_predators_lag_1 +
        smooth_space(by = weight_per_km2_borealis_with_catch) +
        smooth_space(), 
      family = mgcv::scat)
#> ℹ  Fitting SPM formula: log_productivity ~ sfa + weight_per_km2_all_predators_lag_1 + smooth_space(by = weight_per_km2_borealis_with_catch) + smooth_space()
#> Warning in estimate.theta(theta, family, G$y, linkinv(eta), scale = scale1, :
#> step failure in theta estimation

#> Warning in estimate.theta(theta, family, G$y, linkinv(eta), scale = scale1, :
#> step failure in theta estimation

#> Warning in estimate.theta(theta, family, G$y, linkinv(eta), scale = scale1, :
#> step failure in theta estimation

sspm_model_fit
#> 
#> ‒‒ Model fit ‒‒
#> →  Smoothed data : [2208 rows, 17 columns] / [2116 train, 92 test]
#> →  Fit summary : 
#> 
#> Family: Scaled t(Inf,0.245) 
#> Link function: identity 
#> 
#> Formula:
#> log_productivity ~ sfa + weight_per_km2_all_predators_lag_1 + 
#>     s(patch_id, k = 30, bs = "mrf", xt = list(penalty = pen_mat_space), 
#>         by = weight_per_km2_borealis_with_catch) + s(patch_id, 
#>     k = 30, bs = "mrf", xt = list(penalty = pen_mat_space))
#> 
#> Parametric coefficients:
#>                                      Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)                        -6.489e-01  6.973e-02  -9.306  < 2e-16 ***
#> sfa5                               -5.092e-02  6.242e-02  -0.816 0.414781    
#> sfa6                                5.858e-02  8.000e-02   0.732 0.464106    
#> sfa7                               -3.388e-02  8.774e-02  -0.386 0.699443    
#> weight_per_km2_all_predators_lag_1 -3.894e-05  1.126e-05  -3.460 0.000552 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Approximate significance of smooth terms:
#>                                                  edf Ref.df      F  p-value    
#> s(patch_id):weight_per_km2_borealis_with_catch 15.90     30 28.879  < 2e-16 ***
#> s(patch_id)                                     6.18     29  0.414 0.000398 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> R-sq.(adj) =  0.547   Deviance explained = 55.3%
#> -REML =   2914  Scale est. = 1         n = 2024

plot(sspm_model_fit, train_test = TRUE, scales = "free")
#> Warning: Removed 92 rows containing missing values (geom_point).

preds <- predict(sspm_model_fit)
head(preds)
#> Simple feature collection with 6 features and 6 fields
#> Geometry type: POLYGON
#> Dimension:     XY
#> Bounding box:  xmin: -64.18658 ymin: 59.47292 xmax: -62.26589 ymax: 60.84335
#> Geodetic CRS:  WGS 84
#>      pred_log      pred patch_id year_f sfa      patch_area
#> 1  0.20905078 1.2325076       P1   1995   4 5866.769 [km^2]
#> 2 -0.01257562 0.9875031       P1   1996   4 5866.769 [km^2]
#> 3 -0.07280029 0.9297865       P1   1997   4 5866.769 [km^2]
#> 4 -0.07996789 0.9231460       P1   1998   4 5866.769 [km^2]
#> 5 -0.11092437 0.8950064       P1   1999   4 5866.769 [km^2]
#> 6 -0.12435432 0.8830669       P1   2000   4 5866.769 [km^2]
#>                         geometry
#> 1 POLYGON ((-62.56544 59.4729...
#> 2 POLYGON ((-62.56544 59.4729...
#> 3 POLYGON ((-62.56544 59.4729...
#> 4 POLYGON ((-62.56544 59.4729...
#> 5 POLYGON ((-62.56544 59.4729...
#> 6 POLYGON ((-62.56544 59.4729...

We can also get the predictions for biomass by passing the biomass variable name.

biomass_preds <- predict(sspm_model_fit, biomass = "weight_per_km2_borealis")
head(biomass_preds)
#> Simple feature collection with 6 features and 8 fields
#> Geometry type: POLYGON
#> Dimension:     XY
#> Bounding box:  xmin: -64.18658 ymin: 59.47292 xmax: -62.26589 ymax: 60.84335
#> Geodetic CRS:  WGS 84
#> # A tibble: 6 × 9
#>   year_f patch_id sfa   patch_area biomass_with_catch   biomass biomass_density…
#>    <dbl> <fct>    <fct>     [km^2]               [kg]      [kg]        [kg/km^2]
#> 1   1995 P1       4          5867.                NA        NA               NA 
#> 2   1996 P1       4          5867.          21584010. 21579593.            3679.
#> 3   1997 P1       4          5867.          15522657. 15520115.            2646.
#> 4   1998 P1       4          5867.          14264679. 14263299.            2431.
#> 5   1999 P1       4          5867.          13831937. 13826776.            2358.
#> 6   2000 P1       4          5867.          13028456. 13024039.            2221.
#> # … with 2 more variables: biomass_density [kg/km^2], geometry <POLYGON [°]>

biomass_one_step <- predict(sspm_model_fit, biomass = "weight_per_km2_borealis", 
                            next_ts = TRUE)
#> ! Not all vars are lagged vars: weight_per_km2_borealis_with_catch
head(biomass_one_step)
#> Simple feature collection with 6 features and 5 fields
#> Geometry type: POLYGON
#> Dimension:     XY
#> Bounding box:  xmin: -64.18658 ymin: 58.17744 xmax: -60.85729 ymax: 60.84488
#> Geodetic CRS:  WGS 84
#> # A tibble: 6 × 6
#>   patch_id year_f sfa   biomass patch_area                              geometry
#>   <fct>     <dbl> <fct>    [kg]     [km^2]                         <POLYGON [°]>
#> 1 P1         2019 4          NA      5867. ((-62.56544 59.47292, -62.51885 60.2…
#> 2 P2         2019 4          NA      6294. ((-61.15825 60.5856, -61.18083 60.61…
#> 3 P3         2019 4          NA      1594. ((-61.98352 59.91756, -62.14032 59.9…
#> 4 P4         2019 4          NA      1882. ((-61.60726 58.17744, -61.55968 58.2…
#> 5 P5         2019 4          NA      1890. ((-61.54308 58.72455, -60.98005 58.9…
#> 6 P6         2019 4          NA      2149. ((-62.81971 59.18792, -62.54312 59.4…

plot(sspm_model_fit, log = T, scales = 'free')
#> Warning: Removed 1 row(s) containing missing values (geom_path).
#> Warning: Removed 92 rows containing missing values (geom_point).

plot(sspm_model_fit, log = T, use_sf = TRUE)

plot(sspm_model_fit, biomass = "weight_per_km2_borealis",  scales = "free")
#> Warning: Removed 1 row(s) containing missing values (geom_path).
#> Warning: Removed 92 rows containing missing values (geom_point).

plot(sspm_model_fit, biomass = "weight_per_km2_borealis", use_sf = TRUE)

plot(sspm_model_fit, biomass = "weight_per_km2_borealis",
     next_ts = TRUE, aggregate = TRUE, scales = "free", interval = T)
#> ! Not all vars are lagged vars: weight_per_km2_borealis_with_catch
#> Warning in matrix(qrc[-1], nrow(pm$X), ncol(pm$X) - 1, byrow = TRUE): non-empty
#> data for zero-extent matrix
#> Warning: Removed 2 row(s) containing missing values (geom_path).
#> Warning: Removed 8 rows containing missing values (geom_point).
#> Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf

#> Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf

#> Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf

#> Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf

#> Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf

#> Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf

#> Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf

#> Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
#> -Inf