Tutorial: QHScrnomo

Example data set

We’ll be using the prostate.dat data set throughout this example.

This is an artificial prostate cancer dataset used for illustrating the usages of functions in QHScrnomo
This is a data frame with 2000 observations on the following 9 variables:
- UNIQID: patient id
- TX: Treatment options of prostate cancer with levels EBRT, PI, RP
- PSA: Pre-treatment PSA levels
- BX_GLSN_CAT: Biopsy Gleason Score Sum. a factor with levels 1 for 2-6 2 for 7 and 3 for 8-10
- CLIN_STG: Clinical stage with levels T1, T2, T3
- Age: Age at treatment date
- RACE_AA: patient ethnicity.a factor with levels 0 for other and 1 for African American
- TIME_EVENT: follow up time in months
- EVENT_DOD: follow status, 0 - censored, 1 - died of prostate cancer, 2 - died of other causes

data("prostate.dat")
str(prostate.dat)
#> 'data.frame':    2000 obs. of  9 variables:
#>  $ UNIQID     : int  23982340 17299867 35653287 34821801 24377202 20243570 25179226 30671082 26178029 35519165 ...
#>  $ TX         : Factor w/ 3 levels "EBRT","PI","RP": 3 3 3 2 2 3 3 3 3 3 ...
#>  $ PSA        : num  25.2 3.3 7.6 5.6 4.6 4.72 8.2 4.6 3.7 4.23 ...
#>  $ BX_GLSN_CAT: Factor w/ 3 levels "1","2","3": 3 1 1 1 1 1 1 1 1 1 ...
#>  $ CLIN_STG   : Factor w/ 3 levels "T1","T2","T3": 2 1 1 1 1 1 1 1 1 1 ...
#>  $ AGE        : num  57.9 64.6 70.6 65.4 63.8 ...
#>  $ RACE_AA    : num  1 0 0 0 0 0 0 0 0 0 ...
#>  $ TIME_EVENT : num  136.9 87.8 18.7 10.3 7.2 ...
#>  $ EVENT_DOD  : num  0 1 1 0 2 0 1 2 0 0 ...

Fit a competing risk model

The function crr.fit uses a Cox proportional hazards regression model constructed from cph in rms library (by Frank Harrell).

dd <- datadist(prostate.dat)
options(datadist = "dd")
prostate.f <- cph(Surv(TIME_EVENT,EVENT_DOD == 1) ~ TX  + rcs(PSA,3) +
           BX_GLSN_CAT + CLIN_STG + rcs(AGE,3) +
           RACE_AA, data = prostate.dat,
           x = TRUE, y= TRUE, surv=TRUE, time.inc = 144)
prostate.crr <- crr.fit(prostate.f, cencode = 0, failcode = 1)
summary(prostate.crr)
#>              Effects              Response : Surv(TIME_EVENT, EVENT_DOD == 1) 
#> 
#>  Factor            Low    High   Diff.  Effect    S.E.     Lower 0.95
#>  PSA                5.000  9.400  4.400 -0.145130 0.074520 -0.291190 
#>   Hazard Ratio      5.000  9.400  4.400  0.864910       NA  0.747380 
#>  AGE               58.137 69.562 11.425 -0.184680 0.077432 -0.336440 
#>   Hazard Ratio     58.137 69.562 11.425  0.831370       NA  0.714310 
#>  RACE_AA            0.000  1.000  1.000 -0.252210 0.140700 -0.527970 
#>   Hazard Ratio      0.000  1.000  1.000  0.777080       NA  0.589800 
#>  TX - EBRT:RP       3.000  1.000     NA -0.066323 0.118050 -0.297700 
#>   Hazard Ratio      3.000  1.000     NA  0.935830       NA  0.742530 
#>  TX - PI:RP         3.000  2.000     NA  0.238490 0.132460 -0.021125 
#>   Hazard Ratio      3.000  2.000     NA  1.269300       NA  0.979100 
#>  BX_GLSN_CAT - 2:1  1.000  2.000     NA  0.125530 0.114610 -0.099103 
#>   Hazard Ratio      1.000  2.000     NA  1.133800       NA  0.905650 
#>  BX_GLSN_CAT - 3:1  1.000  3.000     NA  0.630090 0.161180  0.314180 
#>   Hazard Ratio      1.000  3.000     NA  1.877800       NA  1.369100 
#>  CLIN_STG - T2:T1   1.000  2.000     NA -0.264760 0.109220 -0.478820 
#>   Hazard Ratio      1.000  2.000     NA  0.767390       NA  0.619510 
#>  CLIN_STG - T3:T1   1.000  3.000     NA  0.083704 0.214990 -0.337680 
#>   Hazard Ratio      1.000  3.000     NA  1.087300       NA  0.713430 
#>  Upper 0.95 
#>   0.00092617
#>   1.00090000
#>  -0.03291100
#>   0.96762000
#>   0.02355300
#>   1.02380000
#>   0.16505000
#>   1.17950000
#>   0.49809000
#>   1.64560000
#>   0.35016000
#>   1.41930000
#>   0.94600000
#>   2.57540000
#>  -0.05069400
#>   0.95057000
#>   0.50509000
#>   1.65710000

Create nomogram

prostate.g <- Newlabels(prostate.crr,
                        c(TX = 'Treatment options', 
                          BX_GLSN_CAT = 'Biopsy Gleason Score Sum',
                          CLIN_STG = 'Clinical stage'))
nomogram.crr(prostate.g,
             failtime = 120,
             lp=FALSE,
             xfrac=0.65,
             fun.at = seq(0.2, 0.45, 0.05),
             funlabel = "Predicted 10-year cumulative incidence")

# output a math formula
sas.cmprsk(prostate.crr, time = 120)
#> Base is  0.03949825 
#>  0.30480861 * (TX = "PI") + 0.066323409 * (TX = "RP") - 
#>     0.05173739 * PSA + 0.00060026412 * max(PSA - 3.8, 0)**3 - 
#>     0.00075935137 * max(PSA - 6.335, 0)**3 + 0.00015908725 * 
#>     max(PSA - 15.9, 0)**3 + 0.12553095 * (BX_GLSN_CAT = "2") + 
#>     0.63008736 * (BX_GLSN_CAT = "3") - 0.26475762 * (CLIN_STG = 
#>     "T2") + 0.083704451 * (CLIN_STG = "T3") - 0.030615192 * 
#>     AGE + 4.2897617e-05 * max(AGE - 53.318904, 0)**3 - 8.993847e-05 * 
#>     max(AGE - 64.190411, 0)**3 + 4.7040853e-05 * max(AGE - 74.104384, 
#>     0)**3 - 0.25220729 * RACE_AA

Model evaluation

# anova table
anova(prostate.crr)
#>                 Wald Statistics          Response: Surv(TIME_EVENT, EVENT_DOD == 1) 
#> 
#>  Factor          Chi-Square d.f. P     
#>  TX               5.21       2   0.0739
#>  PSA              3.85       2   0.1458
#>   Nonlinear       3.79       1   0.0515
#>  BX_GLSN_CAT     15.29       2   0.0005
#>  CLIN_STG         6.88       2   0.0320
#>  AGE              9.27       2   0.0097
#>   Nonlinear       1.35       1   0.2445
#>  RACE_AA          3.21       1   0.0730
#>  TOTAL NONLINEAR  5.16       2   0.0758
#>  TOTAL           44.64      11   <.0001

# prediction from 10-fold cross validation
prostate.dat$preds.tenf <- tenf.crr(prostate.crr, time=120, fold = 10)
#> 1  2  3  4  5  6  7  8  9  10
str(prostate.dat$preds.tenf)
#>  num [1:2000] 0.346 0.336 0.29 0.368 0.4 ...

## calculate the CRR version of concordance index
with(prostate.dat, cindex(preds.tenf,
                          ftime = TIME_EVENT,
                          fstatus =EVENT_DOD, type = "crr"))["cindex"]
#>    cindex 
#> 0.5734681

## generate the calibration curve for predicted 10-year cancer
## specific mortality
with(prostate.dat,
     groupci(
         preds.tenf, 
         ftime = TIME_EVENT,
         fstatus =EVENT_DOD, g = 5, u = 120,
         xlab = "Nomogram predicted 10-year cancerspecific mortality",
         ylab = "Observed predicted 10-year cancerspecific mortality")
)

#>              x   n events        ci    std.err
#> [1,] 0.2387561 400     87 0.2430994 0.02614273
#> [2,] 0.2954839 400     84 0.2815496 0.02987646
#> [3,] 0.3321609 400    107 0.3890442 0.03467815
#> [4,] 0.3695668 400     85 0.3628186 0.03708080
#> [5,] 0.4472728 400    101 0.3928132 0.03654195

Tutorial: QHScrnomo

Introduction

Setup

Example data set

Fit a competing risk model

Create nomogram

Model evaluation