To demonstrate how the medicalrisk package can be useful, this vignette shows the kinds of descriptive statistics and inferences that can be made from a simple administrative dataset.
This vignette assumes you have read the introductory vignette for medicalrisk.
First, use the medicalrisk package to create a single dataframe with information on each patient:
library(medicalrisk)
library(plyr)
data(vt_inp_sample)
cm_df <- generate_comorbidity_df(vt_inp_sample, icd9mapfn=icd9cm_charlson_quan)
cci_df <- generate_charlson_index_df(cm_df)
rsi_df <- ddply(vt_inp_sample, .(id), function(x) { icd9cm_sessler_rsi(x$icd9cm) } )
num_icd9_df <- count(vt_inp_sample, c('id'))
num_icd9_df <- rename(num_icd9_df, c("freq" = "num_icd9"))
wide_df <- merge(merge(merge(merge(
rsi_df, cci_df),
cm_df),
unique(vt_inp_sample[,c('id','scu_days','drg','mdc')])),
num_icd9_df)
id | rsi_1yrpod | rsi_30dlos | rsi_30dpod | rsi_inhosp | index | mi | chf | perivasc | cvd | dementia | chrnlung | rheum |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | -2.019 | 0.156 | -1.699 | -1.848 | 2 | FALSE | TRUE | FALSE | FALSE | FALSE | FALSE | FALSE |
2 | -4.142 | 0.893 | -3.802 | -3.543 | 0 | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE |
3 | -2.627 | 0.831 | -2.911 | -2.861 | 0 | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE |
4 | -0.798 | 0.336 | -1.551 | -0.267 | 4 | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE |
5 | 2.580 | -1.790 | 2.455 | 1.762 | 2 | FALSE | FALSE | FALSE | FALSE | FALSE | TRUE | FALSE |
id | ulcer | liver | dm | dmcx | para | renal | tumor | modliver | mets | aids | scu_days | drg | mdc | num_icd9 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | 0 | 470 | 8 | 12 |
2 | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | 0 | 470 | 8 | 15 |
3 | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | 0 | 462 | 8 | 7 |
4 | FALSE | FALSE | TRUE | FALSE | FALSE | TRUE | FALSE | FALSE | FALSE | FALSE | 0 | 470 | 8 | 10 |
5 | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | 0 | 689 | 11 | 24 |
Let’s explore the data here with some graphs. First, a histogram:
library(reshape2)
library(ggplot2)
# generate a 100 pt x 17 comorbidity table (1700 rows)
cm_melted <- melt(cm_df, id.vars=c('id'), variable.name='cm')
# get rid of all the false entries
cm_melted <- cm_melted[cm_melted$value,]
## count only flags that are true
ggplot(cm_melted, aes(cm, fill=cm)) +
geom_bar() +
scale_fill_discrete()
The chrnlung comorbidity seems well represented. Let’s create a histogram breaking down which ICD-9-CM codes are mapping to chrnlung in this dataset:
# make a histogram dataframe for all the icd-9 codes
icd9cm_df <- count(vt_inp_sample, vars='icd9cm')
# create a charlson comorbidity map for all icd-9 codes
icd9cm_charlson_df <- icd9cm_charlson_quan(icd9cm_df$icd9cm)
# isolate just the chrnlung icd_9_cm codes
icd9cm_chrnlung <- row.names(icd9cm_charlson_df[icd9cm_charlson_df$chrnlung,])
# create a hist df
icd9cm_chrnlung_hist <- icd9cm_df[icd9cm_df$icd9cm %in% icd9cm_chrnlung,]
# plot it
ggplot(icd9cm_chrnlung_hist, aes(icd9cm, freq)) +
geom_bar(stat="identity")
Let’s see how often ICD-9-CM codes used for chrnlung coincide within patients:
# create base dataset
pairs <- unique(
vt_inp_sample[vt_inp_sample$icd9cm %in% icd9cm_chrnlung, c('id','icd9cm')])
# create coincidence matrix
t <- table(
ddply(pairs, c('id'), function(x) { if (length(x$icd9cm) > 1) {
data.frame(t(combn(as.character(x$icd9cm),2))) } })[c('X1','X2')])
D4168 | D49390 | |
---|---|---|
D49122 | 1 | 0 |
D4168 | 0 | 1 |
How often do comorbidities coincide?
# create coincidence matrix
t <- table(
ddply(cm_melted, c('id'), function(x) { if (length(x$cm) > 1) {
data.frame(t(combn(as.character(x$cm),2))) } })[c('X1','X2')])
# sort it
t <- t[order(rownames(t)),order(colnames(t))]
chf | chrnlung | cvd | dementia | dm | liver | mets | para | perivasc | renal | rheum | tumor | ulcer | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
chf | 0 | 8 | 0 | 0 | 3 | 0 | 1 | 0 | 4 | 3 | 3 | 1 | 1 |
chrnlung | 0 | 0 | 0 | 0 | 7 | 1 | 0 | 1 | 0 | 5 | 3 | 1 | 0 |
cvd | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 |
dementia | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 |
dm | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 5 | 0 | 2 | 0 |
liver | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
mi | 4 | 6 | 1 | 1 | 4 | 0 | 0 | 0 | 0 | 1 | 3 | 1 | 1 |
perivasc | 0 | 3 | 1 | 1 | 2 | 0 | 0 | 0 | 0 | 3 | 1 | 1 | 0 |
rheum | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
ulcer | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
Plot the above table:
This is a scatterplot of the Charlson Comorbidity Index versus each RSI mortality estimate. A linear regression line is superimposed:
## Warning: package 'gridExtra' was built under R version 3.6.2
p.inhosp <- ggplot(wide_df, aes(rsi_inhosp, index)) + geom_point() + geom_smooth(method=lm) +
scale_y_continuous(limits=c(-3,10))
p.30dpod <- ggplot(wide_df, aes(rsi_30dpod, index)) + geom_point() + geom_smooth(method=lm) +
scale_y_continuous(limits=c(-3,10))
p.1yrpod <- ggplot(wide_df, aes(rsi_1yrpod, index)) + geom_point() + geom_smooth(method=lm) +
scale_y_continuous(limits=c(-3,10))
grid.arrange(p.inhosp, p.30dpod, p.1yrpod, nrow=1)
As expected, the Risk Stratification Index is correlated with an increased Charlson Comorbidity Index.