Multiple Models

Rarely do we ever work with one model. Instead, you will typically want to evaluate how the posterior of a parameter changes with changes to data or model. It would be nice to scale up the functionality of ‘postpack’ to allow simple or complex post-processing tasks from posterior samples from similar models using the same consistent workflow. This vignette illustrates how this can be acheived using ‘postpack’.

I have only recently began working with the ‘postpack’ functions in this regard efficiently, so the ideas are not fully fleshed out yet. In future versions of ‘postpack’, more functionality may be added in this regard.

The most significant trick to use is to store multiple mcmc.list objects as elements of a larger list object. Suppose you have two mcmc.list objects from two highly similar models, named cjs and cjs_no_rho (see vignette("example-mcmclists") or ?cjs for more details).

Load these objects into your session:

library(postpack)
data(cjs)
data(cjs_no_rho)

And create a list object with them, where each element is an mcmc.list object:

post_list = list(cjs, cjs_no_rho)

Be sure to assign element names, which allows tracking which output is from which model later on:

names(post_list) = c("est_rho", "no_rho")

From here, the world is wide open to you. Anything you would do with one mcmc.list object, you can do with two (or any number) through the use of the base R apply() family of functions. For example, extract the dimensions of the saved output for each model:

sapply(post_list, post_dim)

##           est_rho no_rho
## burn        11000  11000
## post_burn   50000  50000
## thin          200    200
## chains          2      2
## saved         500    500
## params         21     21

Notice the two have identical dimensions. You can see that each model has the same parameters saved:

sapply(post_list, get_params, type = "base_index")

##       est_rho    no_rho    
##  [1,] "B0"       "B0"      
##  [2,] "sig_B0"   "sig_B0"  
##  [3,] "B1"       "B1"      
##  [4,] "sig_B1"   "sig_B1"  
##  [5,] "b0[1]"    "b0[1]"   
##  [6,] "b0[2]"    "b0[2]"   
##  [7,] "b0[3]"    "b0[3]"   
##  [8,] "b0[4]"    "b0[4]"   
##  [9,] "b0[5]"    "b0[5]"   
## [10,] "b1[1]"    "b1[1]"   
## [11,] "b1[2]"    "b1[2]"   
## [12,] "b1[3]"    "b1[3]"   
## [13,] "b1[4]"    "b1[4]"   
## [14,] "b1[5]"    "b1[5]"   
## [15,] "SIG[1,1]" "SIG[1,1]"
## [16,] "SIG[2,1]" "SIG[2,1]"
## [17,] "SIG[1,2]" "SIG[1,2]"
## [18,] "SIG[2,2]" "SIG[2,2]"
## [19,] "p[2]"     "p[2]"    
## [20,] "p[3]"     "p[3]"    
## [21,] "p[4]"     "p[4]"

You could verify that all parameters in both models converged well according to the Rhat statistic:

sapply(post_list, function(model) post_summ(model, "", Rhat = TRUE)["Rhat",])

##          est_rho no_rho
## B0         1.011  1.020
## sig_B0     1.022  1.030
## B1         1.033  1.020
## sig_B1     1.043  1.010
## b0[1]      1.000  1.004
## b0[2]      0.998  0.998
## b0[3]      1.004  1.008
## b0[4]      0.999  1.002
## b0[5]      0.999  1.008
## b1[1]      1.005  0.999
## b1[2]      1.006  1.011
## b1[3]      1.007  1.009
## b1[4]      1.000  1.013
## b1[5]      1.017  0.999
## SIG[1,1]   1.182  1.141
## SIG[2,1]   1.009    NaN
## SIG[1,2]   1.009    NaN
## SIG[2,2]   1.199  1.074
## p[2]       1.000  1.000
## p[3]       0.999  1.004
## p[4]       1.005  1.003

(The NaN values for "SIG[1,2]" and "SIG[2,1]" in the no_rho model are because those had the same value each MCMC iteration).

You could extract the summaries of the global survival coefficients from each model to see that the qualitative inference does not depend on the feature that differs between these two models:

lapply(post_list, function(model) post_summ(model, "^B", digits = 2))

## $est_rho
##         B0   B1
## mean  1.59 0.42
## sd    0.49 0.21
## 50%   1.57 0.40
## 2.5%  0.55 0.06
## 97.5% 2.59 0.86
## 
## $no_rho
##         B0    B1
## mean  1.62  0.38
## sd    0.47  0.19
## 50%   1.60  0.39
## 2.5%  0.81 -0.02
## 97.5% 2.48  0.69

Or verify that the detection probabilities are not affected either:

lapply(post_list, function(model) post_summ(model, "p", digits = 2))

## $est_rho
##       p[2] p[3] p[4]
## mean  0.74 0.70 0.63
## sd    0.02 0.03 0.04
## 50%   0.74 0.70 0.63
## 2.5%  0.69 0.64 0.55
## 97.5% 0.79 0.76 0.72
## 
## $no_rho
##       p[2] p[3] p[4]
## mean  0.74 0.70 0.64
## sd    0.03 0.03 0.05
## 50%   0.75 0.70 0.64
## 2.5%  0.69 0.64 0.56
## 97.5% 0.79 0.76 0.73

You can embed more steps in the function that is applied to each mcmc.list object. For example, the code below obtains the posterior mean correlation matrix for each model:

lapply(post_list, function(model) {
  SIG_decomp = vcov_decomp(model, "SIG")
  rho_mean = post_summ(SIG_decomp, "rho")["mean",]
  array_format(rho_mean)
})

## $est_rho
##           [,1]      [,2]
## [1,] 1.0000000 0.1847515
## [2,] 0.1847515 1.0000000
## 
## $no_rho
##      [,1] [,2]
## [1,]    1    0
## [2,]    0    1

Or more complex still, to predict the survival probability between two consecutive detection arrays for fish of different sizes in each year and model:

lapply(post_list, function(model) {
  # 2SDs below average length, average length, and 2SDs above average length
  # model was fitted with length scaled and centered
  pred_length = c(-2,0,2)  

  # extract posterior mean of random coefficients
  b0_mean = post_summ(model, "b0")["mean",]
  b1_mean = post_summ(model, "b1")["mean",]

  # predict survival each year from coefficients at each length
  pred_phi = sapply(1:5, function(y) {
    logit_phi = b0_mean[y] + b1_mean[y] * pred_length
    phi = exp(logit_phi)/(1 + exp(logit_phi))
    round(phi, 2)
  })
  
  # give dimension names
  dimnames(pred_phi) = list(c("small", "average", "large"), paste0("y", 1:5))
  
  # return the predicted survival
  return(pred_phi)
})

## $est_rho
##           y1   y2   y3   y4   y5
## small   0.66 0.81 0.67 0.70 0.60
## average 0.80 0.91 0.85 0.82 0.75
## large   0.89 0.96 0.94 0.90 0.86
## 
## $no_rho
##           y1   y2   y3   y4   y5
## small   0.66 0.83 0.66 0.70 0.59
## average 0.80 0.91 0.85 0.82 0.75
## large   0.89 0.96 0.95 0.89 0.86