The clmplus
package was created to provide actuarial
scientists the tool-box that we illustrate in our theoretical paper.
This vignette is made to make the readers able to recreate the type of
analysis we make in the data application section of our paper. We
believe it is extremely important to provide this vignette to disclose
to the public the code we used to perform our analysis. Our results can
be replicated by just running this vignette with the following seed.
This vignette is organized as follows:
In the bake-off section we provide numerical results to compare
clmplus
models performances to the apc
package
models performances on the test set.
In the rankings section we try to rank clmplus
models and the apc
package models based on their
performances on the validation set.
During the analysis we suggest how practitioners should split the data in order to perform the studies we just described.
For the case study in the paper we want to make our analysis as close
as possible to a real life reserving problem. In order to do so, we
decided to select 30 data sets available to the public from the R
packages clmplus
, ChainLadder
,
apc
and CASdatasets
.
<- list(
list.of.datasets GenIns=GenIns,
sifa.mod=sifa.mod,
sifa.gtpl=sifa.gtpl,
sifa.mtpl=sifa.mtpl,
amases.gtpl=amases.gtpl,
amases.mod=amases.mod,
amases.mtpl=amases.mtpl,
bz = incr2cum(data.loss.BZ()$response),
ta = incr2cum(data.loss.TA()$response),
xl = incr2cum(data.loss.XL()$response),
vnj = incr2cum(data.loss.VNJ()$response),
abc=ABC,
autoC= auto$CommercialAutoPaid,
autoP = auto$PersonalAutoPaid,
autoBI = AutoBI$AutoBIPaid,
mclpaid= MCLpaid,
medmal=MedMal$MedMalPaid,
mortgage=Mortgage,
mw08=MW2008,
mw14=MW2014,
ukmotor = UKMotor,
usapaid=USAApaid
)
Within this section we provide the code to measure the models extrapolation accuracy. In order to do so, we need to coherently split the data set into training, validation and testing. We show an example for a 12x12 run-off triangle.
=12
J<-data.frame(expand.grid(c(0:(J-1)),c(0:(J-1))),c(1:(J^2)))
dfcolnames(df) <- c("origin","dev","value")
$value[df$origin+df$dev==(J-1)]=c(3)
df$value[df$origin+df$dev<(J-2)]=c(1)
df$value[df$origin+df$dev==(J-2)]=c(2)
df$value[df$origin+df$dev>=J]=c(NA)
df#nas in the lower
3]=c(NA)
df[J,-1,3]=c(NA)
df[J+J-1,3]=c(NA)
df[J*J-J+1,3]=c(NA)
df[J*J-J+1,3]=c(NA)
df[J#nas in the upper tail
*J-J+1-12,3]=c(NA)
df[J*J-J+2-12,3]=c(NA)
df[J
ggplot(data=df, aes(x=as.integer(dev), y=as.integer(origin))) +
geom_tile(aes(fill = as.factor(value),color="#000000"))+scale_y_reverse()+
scale_fill_manual(values=c("royalblue", "darkred", "darkgreen","white"),
na.value = "white",
labels=c("Train","Validation","Test",""))+
theme_classic()+
labs(x="Development year", y="Accident year",fill="")+
theme(axis.title.x = element_text(size=8), axis.text.x = element_text(size=7))+
theme(axis.title.y = element_text(size=8), axis.text.y = element_text(size=7))+
scale_color_brewer(guide = 'none')
<- function(df1,df2){
best.of.the.bests "
Util to turn character columns values into numeric.
"
=apply(df1,MARGIN=2,FUN=as.numeric)
df1=apply(df2,MARGIN=2,FUN=as.numeric)
df2<- rbind(df1,df2)
df3 =apply(df3,FUN=abs.min,MARGIN = 2)
df3
return(df3)
}
.1d <- function(cumulative.payments.triangle){
modelcomparison"
Function to compare the clmplus package age-period-cohort models with apc package age-period-cohort models performances across different triangles.
This function takes a triangle of cumulative payments as input.
It returns the accuracy measures for the two families on the triangle.
"
# function internal variables
=2
leave.out
= NULL
rmse = NULL
mae = NULL
error.pc = NULL
model.name = NULL
error.incidence = NULL
model.family = NULL
mre
# data pre-precessing ----
<- dim(cumulative.payments.triangle)[2]
J <- c2t(t2c(cumulative.payments.triangle)[1:(J-leave.out),1:(J-leave.out)])
reduced.triangle <- RtTriangle(reduced.triangle)
newt.rtt
<- apc.data.list(response=newt.rtt$incremental.payments.triangle,
newt.apc data.format="CL")
## stmomo -----
<- t2c(cumulative.payments.triangle)[1:(J-leave.out-1),J-leave.out]
to.project <- t2c(cumulative.payments.triangle)[2:(J-leave.out),(J-leave.out+1):J]
true.values
for(ix in c('a','ac','ap','apc')){ ##names(models)
<- StMoMo::fit(models[[ix]],
hz.fit Dxt = newt.rtt$occurrance,
Ext = newt.rtt$exposure,
iterMax=as.integer(1e+05))
= forecast::forecast(hz.fit,h=leave.out)$rates
hz.rate
=dim(reduced.triangle)[2]
J.new= (2+hz.rate)/(2-hz.rate)
fij = fij
pred.mx 1]=fij[,1]*c(NA,to.project)
pred.mx[,=unname(pred.mx[1:(J.new-1),1][!is.na(pred.mx[1:(J.new-1),1])])
temp2]=fij[,2]*c(rep(NA,J.new-length(temp)),temp)
pred.mx[,= rbind(rep(NA,2),true.values)
true.mx# this is meant to be NA
2,2]=NA
true.mx[
= (pred.mx-true.mx)^2
sq.errors = abs(pred.mx-true.mx)
abs.errors = (pred.mx-true.mx)/true.mx
r.errors = apply(pred.mx-true.mx,sum,MARGIN=2,na.rm=T)
error.inc.num = apply(true.mx,sum,MARGIN=2,na.rm=T)
error.inc.den = c(paste0(ix,".val"),paste0(ix,".test"))
model.name.ix
= c(model.name,model.name.ix)
model.name = c(model.family,rep(ix,2))
model.family = c(rmse,sqrt(apply(sq.errors,MARGIN = 2,mean,na.rm=T)))
rmse = c(mae,apply(abs.errors,MARGIN = 2,mean,na.rm=T))
mae = c(mre,apply(r.errors,MARGIN = 2,mean,na.rm=T))
mre = c(error.incidence,error.inc.num/error.inc.den)
error.incidence
}='lc'
ix<- fit.lc.nr(data.T = newt.rtt)
hz.fit if(hz.fit$converged==TRUE){hz.rate = forecast.lc.nr(hz.fit,J=dim(newt.rtt$cumulative.payments.triangle)[2])$rates[,1:leave.out]
=dim(reduced.triangle)[2]
J.new= (2+hz.rate)/(2-hz.rate)
fij = fij
pred.mx 1]=fij[,1]*c(NA,to.project)
pred.mx[,=unname(pred.mx[1:(J.new-1),1][!is.na(pred.mx[1:(J.new-1),1])])
temp2]=fij[,2]*c(rep(NA,J.new-length(temp)),temp)
pred.mx[,= rbind(rep(NA,2),true.values)
true.mx# this is meant to be NA
2,2]=NA
true.mx[
= (pred.mx-true.mx)^2
sq.errors = abs(pred.mx-true.mx)
abs.errors = (pred.mx-true.mx)/true.mx
r.errors = apply(pred.mx-true.mx,sum,MARGIN=2,na.rm=T)
error.inc.num = apply(true.mx,sum,MARGIN=2,na.rm=T)
error.inc.den = c(paste0(ix,".val"),paste0(ix,".test"))
model.name.ix
= c(model.name,model.name.ix)
model.name = c(model.family,rep(ix,2))
model.family = c(rmse,sqrt(apply(sq.errors,MARGIN = 2,mean,na.rm=T)))
rmse = c(mae,apply(abs.errors,MARGIN = 2,mean,na.rm=T))
mae = c(mre,apply(r.errors,MARGIN = 2,mean,na.rm=T))
mre = c(error.incidence,error.inc.num/error.inc.den)}
error.incidence
## stmomo results ----
<- data.frame(
out1
model.name,
model.family,
mre,
error.incidence,
rmse,
mae)
<- grepl(".val", model.name)
temp.ix <- out1[temp.ix,]
temp.df
<- data.frame(
out2 rmse=temp.df$model.name[which(abs(temp.df$rmse)==min(abs(temp.df$rmse)))],
mre=temp.df$model.name[which(abs(temp.df$mre)==min(abs(temp.df$mre)))],
mae=temp.df$model.name[which(abs(temp.df$mae)==min(abs(temp.df$mae)))],
error.incidence=temp.df$model.name[which(abs(temp.df$error.incidence)==min(abs(temp.df$error.incidence)))])
<- grepl(".test", model.name)
temp.ix <- out1[temp.ix,]
out3
= out2
best.df 1,]=NA
best.df[
<- data.frame(
out.test.min rmse=out3$model.name[which(abs(out3$rmse)==min(abs(out3$rmse)))],
mre=out3$model.name[which(abs(out3$mre)==min(abs(out3$mre)))],
mae=out3$model.name[which(abs(out3$mae)==min(abs(out3$mae)))],
error.incidence=out3$model.name[which(abs(out3$error.incidence)==min(abs(out3$error.incidence)))])
=matrix((sub("\\..*", "", out2) == sub("\\..*", "", out.test.min)),nrow=1)
temp.mx=matrix(sub("\\..*", "", out2),nrow=1)
choices.mx.fchl
=data.frame(temp.mx)
agreement.frame.fchl=data.frame(choices.mx.fchl)
choices.frame.fchl
colnames(agreement.frame.fchl)=colnames(out2)
colnames(choices.frame.fchl)=colnames(out2)
for(col.ix in colnames(out2)){
=out1$model.family[out1$model.name == out2[1,col.ix]]
res= out3$model.family == res
res.test 1,col.ix] = out3[res.test,col.ix]}
best.df[
=c('a','apc') #'ap',
families.set= out3$model.family %in% families.set
temp.ix = out3[temp.ix,]
comparison.df = cbind(comparison.df,
comparison.df approach=rep('clmplus',length(families.set)))
## apc ----
= NULL
rmse = NULL
mae = NULL
error.pc = NULL
model.name = NULL
error.incidence = NULL
model.family = NULL
mre
<- t2c(cum2incr(cumulative.payments.triangle))[2:(J-leave.out),(J-leave.out+1):J]
true.inc.values
for(apc.mods in c("AC","APC")){ #,"AP"
<- apc.fit.model(newt.apc,
fit model.family = "od.poisson.response",
model.design = apc.mods)
if(apc.mods == "AC"){fcst <- apc.forecast.ac(fit)$trap.response.forecast}
# if(apc.mods == "AP"){fcst <- apc.forecast.ap(fit)$trap.response.forecast}
if(apc.mods == "APC"){fcst <- apc.forecast.apc(fit)$trap.response.forecast}
= t2c.full.square(incr2cum(t(fcst)))
plogram.hat = plogram.hat[,(J-leave.out+1):J]
pred.mx
# true.mx= rbind(rep(NA,2),true.inc.values)
# # this is meant to be NA
# true.mx[2,2]=NA
= (pred.mx-true.mx)^2
sq.errors = abs(pred.mx-true.mx)
abs.errors = (pred.mx-true.mx)/true.mx #use same benchmark
r.errors = apply(pred.mx-true.mx,sum,MARGIN=2,na.rm=T)
error.inc.num = apply(true.mx,sum,MARGIN=2,na.rm=T) #use same benchmark
error.inc.den = c(paste0(apc.mods,".val"),paste0(apc.mods,".test"))
model.name.ix
= c(model.name,tolower(model.name.ix))
model.name = c(model.family,tolower(rep(apc.mods,2)))
model.family = c(rmse,sqrt(apply(sq.errors,MARGIN = 2,mean,na.rm=T)))
rmse = c(mae,apply(abs.errors,MARGIN = 2,mean,na.rm=T))
mae = c(mre,apply(r.errors,MARGIN = 2,mean,na.rm=T))
mre = c(error.incidence,error.inc.num/error.inc.den)}
error.incidence
<- data.frame(
out4
model.name,
model.family,
mre,
error.incidence,
rmse,
mae)
<- grepl(".val", model.name)
temp.ix <- out4[temp.ix,]
temp.df
<- data.frame(
out5 rmse=temp.df$model.name[which(abs(temp.df$rmse)==min(abs(temp.df$rmse)))],
mre=temp.df$model.name[which(abs(temp.df$mre)==min(abs(temp.df$mre)))],
mae=temp.df$model.name[which(abs(temp.df$mae)==min(abs(temp.df$mae)))],
error.incidence=temp.df$model.name[which(abs(temp.df$error.incidence)==min(abs(temp.df$error.incidence)))])
<- grepl(".test", model.name)
temp.ix <- out4[temp.ix,]
out6
<- data.frame(
out.test.min2 rmse=out6$model.name[which(abs(out6$rmse)==min(abs(out6$rmse)))],
mre=out6$model.name[which(abs(out6$mre)==min(abs(out6$mre)))],
mae=out6$model.name[which(abs(out6$mae)==min(abs(out6$mae)))],
error.incidence=out6$model.name[which(abs(out6$error.incidence)==min(abs(out6$error.incidence)))])
=matrix((sub("\\..*", "", out5) == sub("\\..*", "", out.test.min2)),nrow=1)
temp.mx=matrix(sub("\\..*", "", out5),nrow=1)
choices.mx.apc
=data.frame(choices.mx.apc)
choices.frame.apc=data.frame(temp.mx)
agreement.frame.apc
colnames(agreement.frame.apc)=colnames(out5)
colnames(choices.frame.apc)=colnames(out5)
= out5
best.df.apc 1,]=NA
best.df.apc[
for(col.ix in colnames(out5)){
=out4$model.family[out4$model.name == out5[1,col.ix]]
res= out6$model.family == res
res.test 1,col.ix] = out6[res.test,col.ix]}
best.df.apc[
=c('ac','apc') #'ap',
families.set= out6$model.family %in% families.set
temp.ix = out6[temp.ix,]
comparison.df.apc = cbind(comparison.df.apc,
comparison.df.apc approach=rep('apc',length(families.set)))
= list(
out best.model.fchl = best.df,
best.model.apc = best.df.apc,
agreement.frame.fchl=agreement.frame.fchl,
agreement.frame.apc=agreement.frame.apc,
choices.frame.fchl=choices.frame.fchl,
choices.frame.apc=choices.frame.apc,
comparison.df = rbind(comparison.df,
comparison.df.apc))
return(out)}
<-function(list.of.datasets){
modelcomparison"This functions returns the datasets to plot the bake-off section of the paper.
The input is a list of datasets that constitue the sample.
The output is datasets that contain accuracy measures.
"
=NULL
best.fit=NULL
families.fit=NULL
agreement.fchl=NULL
agreement.apc=NULL
choices.fchl=NULL
choices.apc
for(df.ix in names(list.of.datasets)){
cat(paste0(".. Comparison on dataset: ",df.ix))
= modelcomparison.1d(list.of.datasets[[df.ix]])
out.ix
=best.of.the.bests(out.ix$best.model.fchl,
best.of.the.bests.df$best.model.apc)
out.ix
$best.model.fchl['package']= 'clmplus'
out.ix$best.model.apc['package']= 'apc'
out.ix'package']='overall.best'
best.of.the.bests.df[
=rbind(best.fit,
best.fit$best.model.fchl,
out.ix$best.model.apc,
out.ix
best.of.the.bests.df)
=rbind(families.fit,
families.fit$comparison.df)
out.ix
=rbind(agreement.fchl,
agreement.fchl$agreement.frame.fchl)
out.ix
=rbind(agreement.apc,
agreement.apc$agreement.frame.apc)
out.ix
=rbind(choices.fchl,
choices.fchl$choices.frame.fchl)
out.ix
=rbind(choices.apc,
choices.apc$choices.frame.apc)
out.ix
}
1:4]=apply(best.fit[,1:4],MARGIN = 2,FUN = as.numeric)
best.fit[,
c('mre',
families.fit[,'error.incidence',
'rmse',
'mae')]=apply(
c('mre',
families.fit[,'error.incidence',
'rmse',
'mae')],
MARGIN = 2,
FUN = as.numeric)
= list(best.fit=best.fit,
out families.fit=families.fit,
agreement.fchl=agreement.fchl,
agreement.apc=agreement.apc,
choices.fchl=choices.fchl,
choices.apc=choices.apc)
return(out)
}
<- function(models.comparison){
bake.off "
This function plots out the results from the previous computations.
It takes as input the resulting dataframes of model.comparison.
The output is the boxplots of the paper's bake-off section.
"
<- models.comparison$best.fit[,c("rmse","mae","package")] %>%
p1::pivot_longer(-c(package)) %>%
tidyrggplot(aes(x=package,y=value))+
geom_boxplot()+
facet_wrap(.~name,nrow = 1,strip.position = 'bottom')+
theme_bw()+
theme(strip.placement = 'outside',strip.background = element_blank())
<- models.comparison$best.fit[,c("mre","error.incidence","package")] %>%
p2::pivot_longer(-c(package)) %>%
tidyrggplot(aes(x=package,y=value))+
geom_boxplot()+
facet_wrap(.~name,nrow = 1,strip.position = 'bottom')+
theme_bw()+
theme(strip.placement = 'outside',strip.background = element_blank())
=models.comparison$best.fit[,c("mre","error.incidence","package")]
abs.bestc("mre","error.incidence")]=apply(abs.best[,c("mre","error.incidence")],
abs.best[,MARGIN=2,
FUN=abs)
<- abs.best %>%
p3::pivot_longer(-c(package)) %>%
tidyrggplot(aes(x=package,y=value))+
geom_boxplot()+
facet_wrap(.~name,nrow = 1,strip.position = 'bottom')+
theme_bw()+
theme(strip.placement = 'outside',strip.background = element_blank())
=models.comparison$best.fit[,c("error.incidence","package")]
only.eic("error.incidence")]=abs(only.ei[,c("error.incidence")])
only.ei[,
<- abs.best %>%
p4::pivot_longer(-c(package)) %>%
tidyrggplot(aes(x=package,y=value))+
geom_boxplot()+
# facet_wrap(.~name,nrow = 1,strip.position = 'bottom')+
theme_bw()+
theme(strip.placement = 'outside',strip.background = element_blank())
= list(p1=p1,
out p2=p2,
p3=p3,
p4=p4)
return(out)
}
The models in the clmplus
package are compared to those
in the apc
package. Below it can be found the code we used
to create the bake-off plot in our paper.
=modelcomparison(list.of.datasets = list.of.datasets)
out
= bake.off(out) cake
$p3 cake
Within this section we provide practitioners with the code to perform models ranking. Please observe that now we have a different training validation split as pointed out in the paper. The training validation split we use is represented in the following picture.
# models ranking
=12
J<-data.frame(expand.grid(c(0:(J-1)),c(0:(J-1))),c(1:(J^2)))
dfcolnames(df) <- c("origin","dev","value")
$value[df$origin+df$dev==(J-1)]=c(2)
df$value[df$origin+df$dev<(J-1)]=c(1)
df$value[df$origin+df$dev>=J]=c(NA)
df3]=c(NA)
df[J,*J-J+1,3]=c(NA)
df[J
ggplot(data=df, aes(x=as.integer(dev), y=as.integer(origin))) +
geom_tile(aes(fill = as.factor(value),color="#000000"))+scale_y_reverse()+
scale_fill_manual(values=c("royalblue", "darkred", "white"),
na.value = "white",
labels=c("Train","Validation",""))+
theme_classic()+
labs(x="Development year", y="Accident year",fill="")+
theme(axis.title.x = element_text(size=8), axis.text.x = element_text(size=7))+
theme(axis.title.y = element_text(size=8), axis.text.y = element_text(size=7))+
scale_color_brewer(guide = 'none')
.1d <- function(data.T){
modelsranking"
Function to rank the clmplus package and apc package age-period-cohort models.
This function takes a triangle of cumulative payments as input.
It returns the ranking on the triangle.
"
=1
leave.out
= NULL
model.name = NULL
error.incidence = NULL
mre
#pre-processing
<- data.T$cumulative.payments.triangle
triangle <- dim(triangle)[2]
J <- c2t(t2c(triangle)[1:(J-leave.out),1:(J-leave.out)])
reduced.triangle <- RtTriangle(reduced.triangle)
newt.rtt <- t2c(triangle)[1:(J-leave.out-1),J-leave.out]
to.project <- t2c(triangle)[2:(J-leave.out),J]
true.values
for(ix in c('a','ac','ap','apc')){
<- StMoMo::fit(models[[ix]],
hz.fit Dxt = newt.rtt$occurrance,
Ext = newt.rtt$exposure,
iterMax=as.integer(1e+05))
= forecast::forecast(hz.fit,h=leave.out)$rates
hz.rate
= (2+hz.rate)/(2-hz.rate)
fij = fij[(leave.out+1):length(fij)]
pred.fij =to.project*pred.fij
pred.v
= (pred.v-true.values)/true.values
r.errors = sum(pred.v-true.values,na.rm = T)
error.inc.num = sum(true.values)
error.inc.den
= c(model.name,
model.name paste0('clmplus.',ix))
= c(error.incidence,error.inc.num/error.inc.den)
error.incidence = c(mre,mean(r.errors))
mre
}='lc'
ix<- fit.lc.nr(data.T = newt.rtt,
hz.fit iter.max = 3e+04)
if(hz.fit$converged==TRUE){hz.rate = forecast.lc.nr(hz.fit,J=dim(newt.rtt$cumulative.payments.triangle)[2])$rates[,1:leave.out]
= (2+hz.rate)/(2-hz.rate)
fij = fij[(leave.out+1):length(fij)]
pred.fij =to.project*pred.fij
pred.v= (pred.v-true.values)/true.values
r.errors
= sum(pred.v-true.values,na.rm = T)
error.inc.num = sum(true.values)
error.inc.den
= c(model.name,
model.name paste0('clmplus.',ix))
= c(error.incidence,error.inc.num/error.inc.den)
error.incidence = c(mre,mean(r.errors))
mre
}<- data.frame(
out1
model.name,# mre,
error.incidence)
## APC package
<- apc.data.list(response=newt.rtt$incremental.payments.triangle,
newt.apc data.format="CL")
## apc ----
= NULL
rmse = NULL
mae = NULL
error.pc = NULL
model.name = NULL
error.incidence = NULL
model.family = NULL
mre
<- t2c(data.T$incremental.payments.triangle)[2:(J-leave.out),(J-leave.out+1):J]
true.inc.values
for(apc.mods in c("AC","APC")){ #,"AP"
<- apc.fit.model(newt.apc,
fit model.family = "od.poisson.response",
model.design = apc.mods)
if(apc.mods == "AC"){fcst <- apc.forecast.ac(fit)$trap.response.forecast}
# if(apc.mods == "AP"){fcst <- apc.forecast.ap(fit)$trap.response.forecast}
if(apc.mods == "APC"){fcst <- apc.forecast.apc(fit)$trap.response.forecast}
= t2c.full.square(incr2cum(t(fcst)))
plogram.hat = plogram.hat[,(J-leave.out+1):J]
pred.v = pred.v[2:length(pred.v)]
pred.v
= (pred.v-true.values)/true.values
r.errors = sum(pred.v-true.values)
error.inc.num = sum(true.values)
error.inc.den
= c(model.name,
model.name paste0('apc.',tolower(apc.mods)))
= c(error.incidence,error.inc.num/error.inc.den)
error.incidence = c(mre,mean(r.errors))
mre
}
<- data.frame(
out2
model.name,# mre,
error.incidence)
<- rbind(out1,out2)
out3
<- out3[order(abs(out3$error.incidence),decreasing = F),]
out3 'ei.rank']=c(1:dim(out3)[1])
out3[,# out3[,'mre.rank']=order(abs(out3$mre),decreasing = F)
#fix it manually
=min(out3$ei.rank[out3$model.name=='apc.ac'],
r2set$ei.rank[out3$model.name=='clmplus.a'])
out3
$ei.rank[out3$model.name=='apc.ac']=r2set
out3$ei.rank[out3$model.name=='clmplus.a']=r2set
out3
if( out3$ei.rank[out3$model.name=='apc.ac'] < max(out3$ei.rank)){
=out3$ei.rank>out3$ei.rank[out3$model.name=='apc.ac']
cond$ei.rank[cond]=out3$ei.rank[cond]-1
out3
}
return(list(models.ranks=out3))
}
<- function(list.of.datasets){
modelsranking
"
This functions returns the datasets to plot in the ranking section of the paper.
The input is a list of datasets that constitue the sample.
The output is the rankings across different data sources.
"
=NULL
full.ranks
for(df.ix in names(list.of.datasets)){
=modelsranking.1d(RtTriangle(list.of.datasets[[df.ix]]))
out.df$models.ranks[,'data.source']=rep(df.ix,dim(out.df$models.ranks)[1])
out.df=rbind(full.ranks,out.df$models.ranks)
full.ranks
}
return(list(full.ranks=full.ranks))
}
=modelsranking(list.of.datasets) full.ranks
The following picture is the models ranks plot we included in the paper.
<- ggplot(full.ranks$full.ranks, aes(model.name, data.source)) +
p_min_expd0 geom_tile(aes(fill = cut(ei.rank, breaks=0:6, labels=1:6)), colour = "grey") +
ggtitle(" ") +
theme_classic()+
geom_text(aes(label = ei.rank))+
scale_y_discrete(limits=names(list.of.datasets)) +
scale_fill_manual(drop=FALSE, values=colorRampPalette(c("white","#6699CC"))(6), na.value="#EEEEEE", name="Rank") +
xlab("Model") + ylab("Data source")
p_min_expd0
It can be useful to inspect the average rank of the different models we were comparing.
=full.ranks$full.ranks %>%
tbl::group_by(model.name) %>%
dplyr::summarise(mean.rank = mean(ei.rank))
dplyr
tbl
#Output
# A tibble: 7 × 2
# model.name mean.rank
# <chr> <dbl>
# 1 apc.ac 3.77
# 2 apc.apc 4.59
# 3 clmplus.a 3.77
# 4 clmplus.ac 3.36
# 5 clmplus.ap 2.41
# 6 clmplus.apc 2.41
# 7 clmplus.lc 3.38
library(dplyr)
=full.ranks$full.ranks[,c('model.name','ei.rank')] %>%
temp.dfgroup_by(model.name, ei.rank) %>% summarise(count = n())
The following picture was not included in the paper but it shows the models ranks counts. It provides additional consistency to the results we included in our work.
ggplot(temp.df, aes(y=count, x=factor(ei.rank))) +
geom_bar(position="stack", stat="identity",fill='#6699CC') +
scale_y_continuous(limits=c(0,15))+
facet_wrap(~model.name, scales='free')+
theme_classic()+
ylab("")+
xlab("Rank")
In this vignette we wanted to show how the tool-box of actuarial
scientists is enriched thanks to clmplus
.
It is important to perform different analysis to validate results. In this work we showed two.
There is no best model in absolute terms: our purpose is to show that more options can improve claims reserving.