We’ll fit some linear and non-linear models for dominant height, and compare them. We’ll use the first 10 strata of the exemple dataset exfm16.
library(forestmangr)
library(dplyr)
library(tidyr)
data(exfm14)
<- exfm14 %>% filter(strata%in%1:10)
data_ex
data_ex#> # A tibble: 485 x 4
#> strata plot age dh
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1 2 30 12.3
#> 2 1 2 42 16.3
#> 3 1 2 54 17.6
#> 4 1 2 66 18.9
#> 5 1 2 78 21.0
#> 6 1 2 90 20.5
#> # ... with 479 more rows
In order to fit Schumacher’s dominant height model, we can use lm_table
. Thanks to the log
and inv
functions, there is no need to create new variables:
<- lm_table(data_ex, log(dh) ~ inv(age))
mod1
mod1#> b0 b1 Rsqr Rsqr_adj Std.Error
#> 1 3.484289 -24.84688 0.5774814 0.5766066 0.1489379
To fit a non-linear model, like Chapman-Richards’ we can use the nls_table
function. This function uses Levenberg-Marquardt’s algorithm by default, in order to assure a good fit. Since this is a non-linear fit, we have to input initial values for all coefficients:
<- nls_table(data_ex, dh ~ b0 * (1 - exp( -b1 * age ) )^b2,
mod2 mod_start = c( b0=23, b1=0.03, b2 = 1.3 ) )
mod2#> b0 b1 b2
#> 1 25.07289 0.04864685 2.210243
If we wanted to fit one model of each stratum, we can use the .groups
argument:
<- lm_table(data_ex, log(dh) ~ inv(age), .groups = "strata")
mod1
mod1#> strata b0 b1 Rsqr Rsqr_adj Std.Error
#> 1 1 3.409597 -23.89531 0.6735579 0.6626765 0.1231461
#> 2 2 3.438958 -20.93546 0.5838795 0.5748334 0.1161602
#> 3 3 3.466038 -24.71091 0.5240282 0.5145088 0.1672240
#> 4 4 3.503344 -23.68113 0.6594713 0.6511657 0.1146404
#> 5 5 3.536465 -26.23788 0.5750804 0.5692596 0.1571615
#> 6 6 3.541486 -26.18396 0.5843789 0.5758968 0.1589094
#> 7 7 3.546067 -28.33781 0.7170698 0.7125789 0.1331317
#> 8 8 3.385322 -22.29504 0.5114795 0.4966758 0.1641533
#> 9 9 3.444338 -23.34891 0.5151849 0.5062068 0.1638949
#> 10 10 3.411819 -24.00458 0.5749233 0.5585742 0.1491196
<- nls_table(data_ex, dh ~ b0 * (1 - exp( -b1 * age ) )^b2,
mod2 mod_start = c( b0=23, b1=0.03, b2 = 1.3 ),
.groups = "strata" )
mod2#> strata b0 b1 b2
#> 1 1 22.38251 0.06883817 3.9495071
#> 2 2 23.61424 0.08097176 5.4171092
#> 3 3 24.09040 0.05813164 2.9428225
#> 4 4 29.73188 0.02172249 0.8875606
#> 5 5 26.74705 0.04163860 1.8799458
#> 6 6 25.63739 0.05955898 3.4005108
#> 7 7 26.68246 0.03878189 1.8419905
#> 8 8 23.35938 0.05159001 2.2394412
#> 9 9 27.31603 0.02544091 0.9842925
#> 10 10 22.95016 0.06214439 3.3861913
If the fit is not ideal, it’s possible to use a dataframe with starting values for each stratum, and use it as an input for mod_start
:
<- data.frame(strata = c(1:10),
tab_start rbind(
data.frame(b0=rep(23, 5),b1=rep(0.03,5),b2=rep(1.3,5) ),
data.frame(b0=rep(23, 5),b1=rep(0.03,5),b2=rep(.5,5) )))
tab_start#> strata b0 b1 b2
#> 1 1 23 0.03 1.3
#> 2 2 23 0.03 1.3
#> 3 3 23 0.03 1.3
#> 4 4 23 0.03 1.3
#> 5 5 23 0.03 1.3
#> 6 6 23 0.03 0.5
#> 7 7 23 0.03 0.5
#> 8 8 23 0.03 0.5
#> 9 9 23 0.03 0.5
#> 10 10 23 0.03 0.5
<- nls_table(data_ex, dh ~ b0 * (1 - exp( -b1 * age ) )^b2,
mod2 mod_start = tab_start,
.groups = "strata" )
mod2#> strata b0 b1 b2
#> 1 1 22.38251 0.06883817 3.9495071
#> 2 2 23.61424 0.08097176 5.4171092
#> 3 3 24.09040 0.05813164 2.9428225
#> 4 4 29.73188 0.02172249 0.8875606
#> 5 5 26.74705 0.04163860 1.8799458
#> 6 6 25.63740 0.05955884 3.4004949
#> 7 7 26.68250 0.03878163 1.8419741
#> 8 8 23.35905 0.05159620 2.2398949
#> 9 9 27.31667 0.02543790 0.9841984
#> 10 10 22.95016 0.06214456 3.3862119
Now we’re going to fit some other models. These are:
Schumacher: \[ Ln(DH) = \beta_0 + \beta_1 * \frac{1}{age} \]
Chapman-Richards: \[ DH = \beta_0 * (1 - exp^{-\beta_1 * age})^{\beta_2} \]
Bayley-Clutter: \[ Ln(DH) = \beta_0 + \beta_1 * \begin{pmatrix} \frac{1}{age} \end{pmatrix} ^{\beta_2} \]
Curtis: \[ DH = \beta_0 + \beta_1 * \frac{1}{age} \]
We’ll fit these models and add their estimated values to the original data using the merge_est
output and naming each estimated variable with the est_name
argument:
<- data_ex %>%
data_ex_est lm_table(log(dh) ~ inv(age), .groups = "strata",
output = "merge_est", est.name = "Schumacher") %>%
nls_table(dh ~ b0 * (1 - exp( -b1 * age ) )^b2,
mod_start = c( b0=23, b1=0.03, b2 = 1.3 ),.groups="strata",
output ="merge_est",est.name="Chapman-Richards") %>%
nls_table(log(dh) ~ b0 + b1 * ( inv(age)^b2 ) ,
mod_start = c( b0=3, b1=-130, b2 = 1.5),.groups = "strata",
output ="merge_est",est.name = "Bailey-Clutter") %>%
lm_table(dh ~ inv(age), .groups = "strata",
output = "merge_est", est.name = "Curtis")
head(data_ex_est)
#> strata plot age dh Schumacher Chapman-Richards Bailey-Clutter Curtis
#> 1 1 2 30 12.29841 13.64110 13.10199 12.58661 13.61765
#> 2 1 2 42 16.32000 17.12709 17.86289 18.23334 17.58237
#> 3 1 2 54 17.58000 19.43531 20.31013 20.32246 19.78499
#> 4 1 2 66 18.94000 21.06362 21.45677 21.20276 21.18665
#> 5 1 2 78 21.04000 22.27015 21.97365 21.62661 22.15704
#> 6 1 2 90 20.48000 23.19865 22.20283 21.85316 22.86866
Ps: The lm_table
function checks if the model has log in the y variable, and if it does, it removes it automatically when estimating variables. Because of that, there’s no need to calculate the exponential for the estimated variables.
In order to compare these models, we’ll calculate the root mean square error and bias for all models. To do this, we’ll gather all estimated variables in a single column using tidyr::gather
, group by model, and use the rmse_per
and bias_per
functions:
%>%
data_ex_est gather(Model, Value,
`Chapman-Richards`, `Bailey-Clutter`, Curtis) %>%
Schumacher, group_by(Model) %>%
summarise(
RMSE = rmse_per(y = dh, yhat = Value),
BIAS = bias_per(y = dh, yhat = Value) )
#> Warning: attributes are not identical across measure variables;
#> they will be dropped
#> # A tibble: 4 x 3
#> Model RMSE BIAS
#> <chr> <dbl> <dbl>
#> 1 Bailey-Clutter 14.8 1.03e+ 0
#> 2 Chapman-Richards 14.7 -7.37e- 4
#> 3 Curtis 14.8 2.94e-15
#> 4 Schumacher 15.0 1.03e+ 0
Another way of comparing and evaluating these models is using residual graphical analysis. The function resid_plot
can help us with that:
resid_plot(data_ex_est, "dh", "Schumacher", "Chapman-Richards", "Bailey-Clutter", "Curtis")
There are other types of plots avaiable, such as histogram:
resid_plot(data_ex_est, "dh", "Schumacher","Chapman-Richards","Bailey-Clutter", "Curtis",
type = "histogram_curve")
And versus:
resid_plot(data_ex_est, "dh", "Schumacher", "Chapman-Richards", "Bailey-Clutter", "Curtis",
type = "versus")