R code for ‘Weighted Cox Regression using the R package coxphw’

Daniela Dunkler

2020-06-22

Introduction

This is the R example code from ‘Weighted Cox Regression Using the R Package coxphw’ by Dunkler, Ploner, Schemper and Heinze (Journal of Statistical Software, 2018, 84: 1-26, doi: 10.18637/jss.v084.i02). It works with R >=3.2.2 and coxphw package >=4.0.0.

#########################################################################################
## R code for
## Weighted Cox Regression using the R package coxphw
## written by Daniela Dunkler
#########################################################################################

## This R example code works with R >=3.4.2 and coxphw-package >=4.0.0.

## load R packages
library("coxphw")
## Loading required package: survival
library("survival")
library("splines")                              # for splines::ns used in plotzph

pdfind <- FALSE                                 # indicator, if plots should be saved as pdf
runSimulation <- FALSE                          # indicator, if simulation should be run
                                                # Running the simulation is time-consuming.

#########################################################################################
## additional function for nice plots of scaled Schoenfeld residuals versus time
#########################################################################################

plotcoxzph <- function(x, resid = TRUE, se = TRUE, df = 4, nsmo = 40, var, wd = 1,
                       limits = NULL, ...)
{
  ## plot.cox.zph function from survival package 2.37-4 slightly adapted

  xx <- x$x
  yy <- x$y
  d <- nrow(yy)
  df <- max(df)
  nvar <- ncol(yy)
  pred.x <- seq(from = min(xx), to = max(xx), length = nsmo)
  temp <- c(pred.x, xx)
  lmat <- ns(temp, df = df, intercept = TRUE)
  pmat <- lmat[1:nsmo, ]
  xmat <- lmat[-(1:nsmo), ]
  qmat <- qr(xmat)
  if (qmat$rank < df)
    stop("Spline fit is singular, try a smaller degrees of freedom")

  if (se) {
    bk <- backsolve(qmat$qr[1:df, 1:df], diag(df))
    xtx <- bk %*% t(bk)
    seval <- d*((pmat%*% xtx) *pmat) %*% rep(1, df)
  }

  ylab <- paste("Beta(t) for", dimnames(yy)[[2]])
  if (missing(var)) var <- 1:nvar
  else {
    if (is.character(var)) var <- match(var, dimnames(yy)[[2]])
    if  (any(is.na(var)) || max(var)>nvar || min(var) <1)
      stop("Invalid variable requested")
  }

  if (x$transform == "log") {
    xx <- exp(xx)
    pred.x <- exp(pred.x)
  }
  else if (x$transform != "identity") {
    xtime <- as.numeric(dimnames(yy)[[1]])
    indx <- !duplicated(xx)
    apr1  <- approx(xx[indx], xtime[indx],
                    seq(min(xx), max(xx), length = 17)[2*(1:8)])
    temp <- signif(apr1$y, 2)
    apr2  <- approx(xtime[indx], xx[indx], temp)
    xaxisval <- apr2$y
    xaxislab <- rep("", 8)
    for (i in 1:8) xaxislab[i] <- format(temp[i])
  }

  for (i in var) {
    y <- yy[,i]
    yhat <- pmat %*% qr.coef(qmat, y)
    if (resid) yr <-range(yhat, y)
    else       yr <-range(yhat)
    if (se) {
      temp <- 2* sqrt(x$var[i,i] * seval)
      yup <- yhat + temp
      ylow<- yhat - temp
      yr <- range(yr, yup, ylow)
    }

    if (is.null(limits)) { limits <- yr }

    if (x$transform == "identity")
      plot(range(xx), limits, type = "n", xlab = "", ylab = "", lwd = 2, las = 1, ...)
    else if (x$transform == "log")
      plot(range(xx), limits, type = "n", xlab = "", ylab = "", log = "x", ...)
    else {
      plot(range(xx), limits, type ="n", xlab = "", ylab = "", lwd = 2, axes = FALSE, ...)
      axis(1, xaxisval, xaxislab)
      axis(2, las = 1)
      box()
    }
    if (resid) points(xx, y)
    lines(pred.x, yhat, lwd = wd, ...)
    if (se) {
      lines(pred.x, yup,lty = 2)
      lines(pred.x, ylow, lty = 2)
    }
  }
}

Section 2.1: Gastric cancer study

## ------------------------------------------
data("gastric", package = "coxphw")
head(gastric)
##   id radiation time status
## 1  1         0    1      1
## 2  2         1   17      1
## 3  3         1   42      1
## 4  4         1   44      1
## 5  5         1   48      1
## 6  6         1   60      1
## time in years
gastric$yrs <- gastric$time / 365.25

nrow(gastric)
## [1] 90
## follow-up/observation time
survfit(Surv(yrs, abs(1 - status)) ~ 1, data = gastric)
## Call: survfit(formula = Surv(yrs, abs(1 - status)) ~ 1, data = gastric)
## 
##       n  events  median 0.95LCL 0.95UCL 
##   90.00   11.00    4.34    4.00      NA
## descriptive analysis
gtable0 <- table(gastric$status, deparse.level = 2)
gtable0
## gastric$status
##  0  1 
## 11 79
round(prop.table(gtable0) * 100, digits = 2)
## gastric$status
##     0     1 
## 12.22 87.78
gtable1 <- table(gastric$radiation, gastric$status, deparse.level = 2)
addmargins(gtable1)
##                  gastric$status
## gastric$radiation  0  1 Sum
##               0    3 42  45
##               1    8 37  45
##               Sum 11 79  90
round(prop.table(gtable1, margin = 1) * 100, digits = 2)
##                  gastric$status
## gastric$radiation     0     1
##                 0  6.67 93.33
##                 1 17.78 82.22
## check assumption of proportional hazards
gsurv <- survfit(Surv(yrs, status) ~ radiation, data = gastric)
summary(gsurv)
## Call: survfit(formula = Surv(yrs, status) ~ radiation, data = gastric)
## 
##                 radiation=0 
##     time n.risk n.event survival std.err lower 95% CI upper 95% CI
##  0.00274     45       1   0.9778  0.0220       0.9356        1.000
##  0.17248     44       1   0.9556  0.0307       0.8972        1.000
##  0.28747     43       1   0.9333  0.0372       0.8632        1.000
##  0.34223     42       1   0.9111  0.0424       0.8316        0.998
##  0.49829     41       1   0.8889  0.0468       0.8017        0.986
##  0.59138     40       1   0.8667  0.0507       0.7728        0.972
##  0.68446     39       1   0.8444  0.0540       0.7449        0.957
##  0.71732     38       1   0.8222  0.0570       0.7178        0.942
##  0.82409     37       2   0.7778  0.0620       0.6653        0.909
##  0.93634     35       1   0.7556  0.0641       0.6399        0.892
##  0.96920     34       1   0.7333  0.0659       0.6149        0.875
##  0.97467     33       1   0.7111  0.0676       0.5903        0.857
##  0.98015     32       1   0.6889  0.0690       0.5661        0.838
##  1.04038     31       1   0.6667  0.0703       0.5422        0.820
##  1.04860     30       2   0.6222  0.0723       0.4955        0.781
##  1.06229     28       1   0.6000  0.0730       0.4727        0.762
##  1.07871     27       1   0.5778  0.0736       0.4501        0.742
##  1.11704     26       1   0.5556  0.0741       0.4278        0.721
##  1.25941     25       1   0.5333  0.0744       0.4058        0.701
##  1.33881     24       1   0.5111  0.0745       0.3841        0.680
##  1.36619     23       1   0.4889  0.0745       0.3626        0.659
##  1.43190     22       1   0.4667  0.0744       0.3415        0.638
##  1.43463     21       1   0.4444  0.0741       0.3206        0.616
##  1.46475     20       1   0.4222  0.0736       0.3000        0.594
##  1.53867     19       1   0.4000  0.0730       0.2797        0.572
##  1.55784     18       1   0.3778  0.0723       0.2597        0.550
##  1.84805     17       1   0.3556  0.0714       0.2399        0.527
##  1.85079     16       1   0.3333  0.0703       0.2205        0.504
##  2.04791     15       1   0.3111  0.0690       0.2014        0.481
##  2.13005     14       1   0.2889  0.0676       0.1827        0.457
##  2.15195     13       1   0.2667  0.0659       0.1643        0.433
##  2.18207     12       1   0.2444  0.0641       0.1462        0.409
##  2.61465     11       1   0.2222  0.0620       0.1286        0.384
##  2.65024     10       1   0.2000  0.0596       0.1115        0.359
##  2.67488      9       1   0.1778  0.0570       0.0948        0.333
##  3.40862      8       1   0.1556  0.0540       0.0787        0.307
##  3.47981      7       1   0.1333  0.0507       0.0633        0.281
##  3.88775      6       1   0.1111  0.0468       0.0486        0.254
##  4.24641      3       1   0.0741  0.0435       0.0234        0.234
##  4.63792      1       1   0.0000     NaN           NA           NA
## 
##                 radiation=1 
##    time n.risk n.event survival std.err lower 95% CI upper 95% CI
##  0.0465     45       1    0.978  0.0220        0.936        1.000
##  0.1150     44       1    0.956  0.0307        0.897        1.000
##  0.1205     43       1    0.933  0.0372        0.863        1.000
##  0.1314     42       1    0.911  0.0424        0.832        0.998
##  0.1643     41       1    0.889  0.0468        0.802        0.986
##  0.1971     40       1    0.867  0.0507        0.773        0.972
##  0.2026     39       1    0.844  0.0540        0.745        0.957
##  0.2601     38       1    0.822  0.0570        0.718        0.942
##  0.2820     37       1    0.800  0.0596        0.691        0.926
##  0.2957     36       1    0.778  0.0620        0.665        0.909
##  0.3340     35       1    0.756  0.0641        0.640        0.892
##  0.3943     34       1    0.733  0.0659        0.615        0.875
##  0.4572     33       1    0.711  0.0676        0.590        0.857
##  0.4654     32       1    0.689  0.0690        0.566        0.838
##  0.5010     31       1    0.667  0.0703        0.542        0.820
##  0.5065     30       1    0.644  0.0714        0.519        0.801
##  0.5284     29       1    0.622  0.0723        0.496        0.781
##  0.5339     28       1    0.600  0.0730        0.473        0.762
##  0.5394     27       1    0.578  0.0736        0.450        0.742
##  0.5695     26       1    0.556  0.0741        0.428        0.721
##  0.6407     25       1    0.533  0.0744        0.406        0.701
##  0.6434     24       1    0.511  0.0745        0.384        0.680
##  0.6954     23       1    0.489  0.0745        0.363        0.659
##  0.8405     22       1    0.467  0.0744        0.341        0.638
##  0.8624     21       1    0.444  0.0741        0.321        0.616
##  1.0979     20       1    0.422  0.0736        0.300        0.594
##  1.2183     19       1    0.400  0.0730        0.280        0.572
##  1.2704     18       1    0.378  0.0723        0.260        0.550
##  1.3251     17       1    0.356  0.0714        0.240        0.527
##  1.4456     16       1    0.333  0.0703        0.221        0.504
##  1.4839     15       1    0.311  0.0690        0.201        0.481
##  1.5524     14       1    0.289  0.0676        0.183        0.457
##  1.5797     13       1    0.267  0.0659        0.164        0.433
##  1.5880     12       1    0.244  0.0641        0.146        0.409
##  2.1766     11       1    0.222  0.0620        0.129        0.384
##  2.3409     10       1    0.200  0.0596        0.111        0.359
##  3.7399      6       1    0.167  0.0583        0.084        0.331
## plot of cumulative survival probabilities
if (pdfind) {  pdf(file = "figure1A.pdf", width = 10.2, height = 5)  }
  par(oma = c(2, 2, 0.5, 0.5), mar = c(2, 2, 0, 0))
  plot(gsurv, lty = 1:2, las = 1, lwd = 2)
  mtext(side = 1, line = 2.5, text = "time (years)", cex = 1.2)
  mtext(side = 2, line = 3, text = "survival distribution function", cex = 1.2)
  legend("topright", title = "radiation", legend = c("no", "yes"),
         lty = 1:2, inset = 0.05, bty = "n", cex = 1.4)

if (pdfind) {  dev.off() }


## plots of scaled Schoenfeld residuals and test departure from proportional hazards
gfit1 <- coxph(Surv(yrs, status) ~ radiation + cluster(id), data = gastric, x = TRUE,
               method = "breslow")
gfit1.ph <- cox.zph(fit = gfit1, transform = "km")
gfit1.ph
##           chisq df       p
## radiation  12.4  1 0.00042
## GLOBAL     12.4  1 0.00042
if (pdfind) {  pdf(file = "figure1B.pdf", width = 5, height = 5) }
  par(oma = c(2, 2, 0.5, 0.5), mar = c(2, 2, 0, 0))
  plotcoxzph(x = gfit1.ph, wd = 2, limits = c(-3, 4.3))
  abline(a = 0, b = 0, lty = 3)
  mtext(side = 1, line = 2.5, cex = 1.2,
        text = expression(paste("time (years, ", hat(F), "(t) transformation)")))
  mtext(side = 2, line = 2.2, cex = 1.2,
        text = expression(paste(hat(beta), "(t) for radiation")))
  ## add the linear fit
  abline(lm(gfit1.ph$y ~ gfit1.ph$x)$coefficients, lty = 3, col = "red", lwd = 2)

if (pdfind) {  dev.off() }

gfit1.ph2 <- cox.zph(fit = gfit1, transform = "identity")

if (pdfind) {  pdf(file = "figure1C.pdf", width = 5.2, height = 5) }
  par(oma = c(2, 2, 0.5, 0.5), mar = c(2, 2, 0, 2))
  plotcoxzph(x = gfit1.ph2, wd = 2, limits = c(-3, 4.3))
  mtext(side = 1, line = 2.5, text = "time (years)", cex = 1.2)
  mtext(side = 2, line = 2.2, cex = 1.2,
        text = expression(paste(hat(beta), "(t) for radiation")))
  abline(a = 0, b = 0, lty = 3)
  mtext(text = "radiation...", side = 4, line = 0.1, font = 3)
  mtext(text = "protective",  side = 4, line = 1, adj = 0, font = 3)
  mtext(text = "    harmful",     side = 4, line = 1, adj = 1, font = 3)

if (pdfind) {  dev.off() }

## ignore non-proportional hazards and apply a standard Cox proportional hazards model
summary(coxph(Surv(yrs, status) ~ radiation + cluster(id), data = gastric, x = TRUE,
              method = "breslow"))
## Call:
## coxph(formula = Surv(yrs, status) ~ radiation, data = gastric, 
##     x = TRUE, method = "breslow", cluster = id)
## 
##   n= 90, number of events= 79 
## 
##             coef exp(coef) se(coef) robust se     z Pr(>|z|)
## radiation 0.1415    1.1520   0.2263    0.2292 0.617    0.537
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## radiation     1.152     0.8681    0.7351     1.805
## 
## Concordance= 0.565  (se = 0.031 )
## Likelihood ratio test= 0.39  on 1 df,   p=0.5
## Wald test            = 0.38  on 1 df,   p=0.5
## Score (logrank) test = 0.39  on 1 df,   p=0.5,   Robust = 0.37  p=0.5
## 
##   (Note: the likelihood ratio and score tests assume independence of
##      observations within a cluster, the Wald and robust score tests do not).
## or equivalently
## coxphw(Surv(yrs, status) ~ radiation, data = gastric, template = "PH")

Section 2.2: Biofeedback therapy study

## ------------------------------------------
data("biofeedback", package = "coxphw")
head(biofeedback)
##   id success thdur bfb theal log2heal
## 1  1       1    25   0    17  4.08746
## 2  2       1     5   1    20  4.32193
## 3  3       0    53   0    81  6.33985
## 4  4       0   100   1   135  7.07682
## 5  5       0    30   0   730  9.51175
## 6  6       1    89   0    15  3.90689
## descriptive analysis
nrow(biofeedback)
## [1] 33
btable0 <- table(biofeedback$bfb, deparse.level = 2)
btable0
## biofeedback$bfb
##  0  1 
## 14 19
round(prop.table(btable0) * 100, digits = 2)
## biofeedback$bfb
##     0     1 
## 42.42 57.58
## follow-up/observation time
## survfit(Surv(thdur, abs(1-success)) ~ 1, data = biofeedback)
survfit(Surv(thdur, success) ~ 1, data = biofeedback)
## Call: survfit(formula = Surv(thdur, success) ~ 1, data = biofeedback)
## 
##       n  events  median 0.95LCL 0.95UCL 
##      33      23      25      21      89
btable1 <- table(biofeedback$bfb, biofeedback$success, deparse.level = 2)
addmargins(btable1)
##                biofeedback$success
## biofeedback$bfb  0  1 Sum
##             0    4 10  14
##             1    6 13  19
##             Sum 10 23  33
round(prop.table(btable1, margin = 1) * 100, digits = 2)
##                biofeedback$success
## biofeedback$bfb     0     1
##               0 28.57 71.43
##               1 31.58 68.42
## hist(biofeedback$theal)
## hist(biofeedback$log2heal)

## Kaplan-Meier analysis
bsurv <- survfit(Surv(thdur, success) ~ bfb, data = biofeedback)
summary(bsurv)
## Call: survfit(formula = Surv(thdur, success) ~ bfb, data = biofeedback)
## 
##                 bfb=0 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     9     14       1    0.929  0.0688       0.8030        1.000
##    18     13       1    0.857  0.0935       0.6921        1.000
##    24     12       1    0.786  0.1097       0.5977        1.000
##    25     11       2    0.643  0.1281       0.4351        0.950
##    32      8       1    0.562  0.1349       0.3515        0.900
##    58      6       1    0.469  0.1413       0.2596        0.846
##    84      5       1    0.375  0.1407       0.1797        0.783
##    85      4       1    0.281  0.1332       0.1112        0.711
##    89      3       1    0.188  0.1172       0.0551        0.639
## 
##                 bfb=1 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     5     19       1    0.947  0.0512        0.852        1.000
##     7     18       1    0.895  0.0704        0.767        1.000
##    11     17       1    0.842  0.0837        0.693        1.000
##    13     16       1    0.789  0.0935        0.626        0.996
##    14     15       2    0.684  0.1066        0.504        0.929
##    15     13       1    0.632  0.1107        0.448        0.890
##    17     12       1    0.579  0.1133        0.395        0.850
##    20     11       1    0.526  0.1145        0.344        0.806
##    21     10       1    0.474  0.1145        0.295        0.761
##    22      9       1    0.421  0.1133        0.249        0.713
##    23      8       1    0.368  0.1107        0.204        0.664
##    33      6       1    0.307  0.1079        0.154        0.611
if (pdfind) {  pdf(file = "figure2A.pdf", width = 10, height = 5) }
  par(oma  =c(2, 2, 0.5, 0.5), mar = c(2, 2, 0, 0))
  plot(bsurv, fun = "event", lty = 1:2, lwd = 2, las = 1, ylim = c(0, 1))
  mtext(side = 1, line = 2.5, text = "duration of therapy (days)", cex = 1.2)
  mtext(side = 2, line = 3, text = "cumulative propability of rehabilitation", cex = 1.2)
  legend("topleft", title = "biofeedback (bfb)", legend = c("no", "yes"), lty = 1:2,
         inset = 0.05, bty = "n", cex = 1.4)

if (pdfind) {  dev.off() }

bfit1 <- coxph(Surv(thdur, success) ~ bfb + log2heal + cluster(id), data = biofeedback,
               x = TRUE, method = "breslow")
summary(bfit1)
## Call:
## coxph(formula = Surv(thdur, success) ~ bfb + log2heal, data = biofeedback, 
##     x = TRUE, method = "breslow", cluster = id)
## 
##   n= 33, number of events= 23 
## 
##             coef exp(coef) se(coef) robust se      z Pr(>|z|)
## bfb       0.2700    1.3099   0.4273    0.3453  0.782    0.434
## log2heal -0.5267    0.5906   0.2543    0.3636 -1.448    0.148
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## bfb         1.3099     0.7634    0.6658     2.577
## log2heal    0.5906     1.6933    0.2896     1.205
## 
## Concordance= 0.665  (se = 0.061 )
## Likelihood ratio test= 7.19  on 2 df,   p=0.03
## Wald test            = 2.66  on 2 df,   p=0.3
## Score (logrank) test = 5.16  on 2 df,   p=0.08,   Robust = 4.72  p=0.09
## 
##   (Note: the likelihood ratio and score tests assume independence of
##      observations within a cluster, the Wald and robust score tests do not).
bfit1.ph <- cox.zph(bfit1, transform = "km")
bfit1.ph
##          chisq df      p
## bfb       7.44  1 0.0064
## log2heal  4.29  1 0.0384
## GLOBAL   11.10  2 0.0039
if (pdfind) {  pdf(file = "figure2B.pdf", width = 5, height = 5) }
  par(oma = c(2, 2, 0.5, 0.5), mar = c(2, 2, 0, 0))
  plotcoxzph(x =  bfit1.ph[1], wd = 2)
  mtext(side = 1, line = 2.5, text = "duration of therapy (days)", cex = 1.2)
  mtext(side = 2, line = 2.2, text = expression(hat(beta)), cex = 1.2)
  abline(a = 0, b = 0, lty = 3)
  abline(lm(bfit1.ph$y[, 1] ~ bfit1.ph$x)$coefficients, lty = 3, col = "red", lwd = 2)
  legend("bottomleft", legend = "bfb", bty = "n", inset = 0.08, cex = 1.5)

if (pdfind) {  dev.off() }

if (pdfind) {  pdf(file = "figure2C.pdf", width = 5, height = 5) }
  par(oma = c(2, 2, 0.5, 0.5), mar = c(2, 2, 0, 0))
  plotcoxzph(x = bfit1.ph[2], wd = 2, limits = c(-4.5, 4))
  mtext(side = 1, line = 2.5, text = "duration of therapy (days)", cex = 1.2)
    mtext(side = 2, line = 2.2, text = expression(hat(beta)), cex = 1.2)
  abline(a = 0, b = 0, lty = 3)
  abline(lm(bfit1.ph$y[, 2] ~ bfit1.ph$x)$coefficients, lty = 3, col = "red", lwd = 2)
  legend("topright", legend = "log2heal", bty = "n", inset = 0.08, cex = 1.5)

if (pdfind) {  dev.off() }

Section 5: Simulation

simulation <- function(n1 = 100, n2 = 100, sim = 10, seed = 123,
                       type = c("ph", "nph1", "nph2", "nph3"), scalewei = NULL,
                       shapewei = NULL, beta = NULL, scaleexp = NULL, shapewei2 = NULL,
                       scalewei2 = NULL, shapegom = NULL, scalegom = NULL, scaleexpC,
                       admincens, npop = 10000, xmaxplot = NULL, addconstant = 1e-4)
  {
    ## Simulate time-to-event data (following either an expoential, Weibuill or Gompertz
    ## distribution) with one binary explanatory variable, generate six versions of
    ## each simulated data set with differnt censoring patterns (no censoring, administrative
    ## censoring and loss-to-follow-up) and analyse these data sets with Cox regression
    ## and weighted Cox regression. Population-c is computed, as well.
    ##
    ## sim: number of simulations, 0 only population c is computed and plot is generated.
    ##
    ## type = "ph"   : Weibull distributed distributions, proportional hazards
    ##        "nph1" : exponential and Weibull distribution, non-proportional hazards
    ##        "nph2" : exponential and Weibull distribution, non-proportional hazards
    ##        "nph3" : exponential and Gompertz distribution, non-proportional hazards
    ##
    ## "ph" requires scalewei, shapewei and beta
    ## "nph1" requires scalewei, shapewei and scaleexp
    ## "nph2" requires scalewei, shapewei, scalewei2 and shapewei2
    ## "nph3" requires scaleexp, scalegom and shapegom
    ##
    ## scaleexpC and admincens: parameters for loss-to-follow-up and adminitrative censoring
    ##
    ## add.constant  : this number will be added to all times to prevent survival times of
    ##                 exactly 0.

    type <- match.arg(type)
    if (type == "ph")   {
        stopifnot(!is.null(scalewei),!is.null(shapewei),!is.null(beta))
    } else if (type == "nph1") {
        stopifnot(!is.null(scalewei),!is.null(shapewei),!is.null(scaleexp))
    } else if (type == "nph2") {
        stopifnot(!is.null(scalewei),!is.null(shapewei),!is.null(scalewei2),!is.null(shapewei2))
    } else if (type == "nph3") {
        stopifnot(!is.null(scaleexp),!is.null(scalegom),!is.null(shapegom))
    }

    set.seed(seed)

    ## 1) compute population c
    if (type != "ph") {
      if (type == "nph1") {
        integrandA <- function(x) { (scalewei * exp(-scalewei * x)) *
                                     exp(-scalewei * x ^ scalewei) }
      } else if (type == "nph2") {
        integrandA <- function(x) { (scalewei2 * shapewei2 * x ^ (shapewei2 - 1) *
                                     exp(-scalewei2 * x ^ shapewei2)) * exp(-scalewei *
                                     x ^ shapewei) }
      } else if (type == "nph3") {
        integrandA <- function(x) { scaleexp * exp(-scaleexp * x) *
                                    exp(scalegom / shapegom * (1 - exp(shapegom * x)))  }
      }
      popc100 <- rep(c(integrate(integrandA, lower = 0, upper = Inf)$value,
                       integrate(integrandA, lower = 0, upper = admincens[1])$value,
                       integrate(integrandA, lower = 0, upper = admincens[2])$value) * 100,
                     each = 2)
    } else { popc100 <- rep(exp(beta) / (1+exp(beta)), 6) * 100 }

    if (sim == 0) { output <- list(results = NA, olist = NA, popc100 = popc100) }

    ## 2) Kaplan-Meier plot of scenario
    xpop <- c(rep(0, npop / 2), rep(1, npop / 2))
    u <- runif(n = npop, min = 0, max = 1)

    if (type == "ph") {
      time1pop <- (-log(u[1:(npop / 2)]) / (scalewei * exp(beta * 0))) ^ (1 / shapewei)
      time2pop <- (-log(u[((npop / 2) + 1):npop]) / (scalewei * exp(beta * 1))) ^ (1 / shapewei)
    } else if (type == "nph1") {
      time1pop <- ((-log(u[1:(npop / 2)])) / scalewei) ^ (1 / shapewei)
      time2pop <- -log(u[((npop / 2) + 1):npop]) / scaleexp
    } else if (type == "nph2") {
      time1pop <- ((-log(u[1:(npop / 2)])) / scalewei) ^ (1 / shapewei)
      time2pop <- ((-log(u[((npop / 2) + 1):npop])) / scalewei2) ^ (1 / shapewei2)
    } else if (type == "nph3") {
      time1pop <- 1 / shapegom * log(1 - (shapegom * log(u[1:(npop / 2)])) / scalegom)
      time2pop <- -log(u[((npop / 2) + 1):npop]) / scaleexp
    }

    time1pop <- time1pop + addconstant
    time2pop <- time2pop + addconstant
    datapop <- data.frame(cbind(time = c(time1pop, time2pop), event = 1, x = xpop))

    fitpop <- coxph(Surv(time, event) ~ x, data = datapop)

    if (is.null(xmaxplot)) { xmaxplot <- max(datapop$time)  }

    par(oma = c(2, 2, 0.5, 0.5), mar = c(2, 2, 0, 0))
    plot(survfit(Surv(time, event) ~ x, data = datapop), lty = 1:2, lwd = 2,
         las = 1, xlim = c(0, xmaxplot))
    abline(v = admincens, col = "gray", lty = 2)
    mtext(side = 1, line = 2.5, text = "time", cex = 1.2)
    mtext(side = 2, line = 3, text = "survival distribution function",
          cex = 1.2)
    mtext(side = 3, line = -3, cex = 1.2, font = 2,
          text = if (type == "ph") { "proportional hazards scenario" } else {
          "non-proportional\nhazards scenario" } )

    ## 3) simulate data sets and analyse them
    if (sim != 0) {
      n <- n1 + n2
      out <- data.frame(matrix(NA, nrow = sim, ncol = 7, dimnames = list(1:(sim),
                        c("cens", "cox_beta", "cox_c100", "wcox_beta", "wcox_c100",
                          "wilcox100", "prt0st1"))))
      olist <- list(out = out, outC = out, out1 = out, outC1 = out, out2 = out, outC2 = out)
      x <- c(rep(0, n1), rep(1, n2))

      for (i in 1:sim) {
        cat(paste(".", sep = ""))

        ## simulate data without censoring (data), type 1
        u <- runif(n = n, min = 0, max = 1)

        if (type == "ph") {
          time1 <- (-log(u[1:n1]) / (scalewei * exp(beta * 0))) ^ (1 / shapewei)
          time2 <- (-log(u[(n1 + 1):n]) / (scalewei * exp(beta * 1))) ^ (1 / shapewei)
        } else if (type == "nph1") {
          time1 <- ((-log(u[1:n1])) / scalewei) ^ (1 / shapewei)
          time2 <- -log(u[(n1 + 1):n]) / scaleexp
        } else if (type == "nph2") {
          time1 <- ((-log(u[1:n1])) / scalewei) ^ (1 / shapewei)
          time2 <- ((-log(u[(n1 + 1):n])) / scalewei2) ^ (1 / shapewei2)
        } else if (type == "nph3") {
          time1 <- 1 / shapegom * log(1 - (shapegom * log(u[1:n1])) / scalegom)
          time2 <- -log(u[(n1 + 1):n]) / scaleexp
        }
        time1 <- time1 + addconstant
        time2 <- time2 + addconstant

        data <- data.frame(cbind(time = c(time1, time2), event = 1, x = x))

        fit1 <- coxph(Surv(time, event) ~ x, data = data)
        fit2 <-
          coxphw(Surv(time, event) ~ x, data = data)
        fit1zph <- cox.zph(fit = fit1, transform = "km")

        eg <- expand.grid(time1, time2)
        olist$out[i, ] <- c(
          0,
          fit1$coefficients, concord(fit1)[1],
          fit2$coefficients, concord(fit2)[1],
          wilcox.test(time ~ x, data = data, correct = FALSE)$statistic /
            (n1 * n2),
          1 - (sum(eg[, 1] < eg[, 2]) / nrow(eg)))

        ## follow-up distribution
        timecens <- (-log(runif(n = nrow(data), min = 0, max = 1)) / scaleexpC) + addconstant

        ## data with censoring (dataC), type 2
        dataC <- data
        dataC$time[data$time > timecens] <- timecens[data$time > timecens]
        dataC$event[data$time > timecens] <- 0
        censC <- sum(dataC$event == 0) / n * 100

        fit1C <- coxph(Surv(time, event) ~ x, data = dataC)
        fit2C <- coxphw(Surv(time, event) ~ x, data = dataC)

        dataC$id <- 1:nrow(dataC)

        wilcoxC <- wilcox.test(time ~ x, data = dataC, correct = FALSE)$statistic
        egC <- expand.grid(dataC$event[dataC$x == 0], dataC$event[dataC$x == 1])
        olist$outC[i, ] <- c(censC,
                             fit1C$coefficients, concord(fit1C)[1],
                             fit2C$coefficients, concord(fit2C)[1],
                             wilcoxC / (length(dataC$x[dataC$x == 0]) *
                                        length(dataC$x[dataC$x == 1])),
                             NA)

        ## data with administrative censoring 1 (data1), type 3
        data1 <- data
        data1$event[data$time > admincens[1]] <- 0
        data1$time[data$time > admincens[1]] <- admincens[1]
        cens1 <- sum(data1$event == 0) / n * 100

        fit11 <- coxph(Surv(time, event) ~ x, data = data1)
        fit21 <- coxphw(Surv(time, event) ~ x, data = data1)
        fit11zph <- cox.zph(fit = fit11, transform = "km")

        eg1 <- eg[!(eg[, 1] >= admincens[1] & eg[, 2] >= admincens[1]), ]
        wilcox1 <- wilcox.test(time ~ x, data = data1, correct = FALSE)$statistic
        olist$out1[i, ] <- c(cens1,
                             fit11$coefficients, concord(fit11)[1],
                             fit21$coefficients, concord(fit21)[1],
                             wilcox1 / (n1 * n2),
                             1 - ((sum(eg1[, 1] < eg1[, 2]) +
                                   sum(eg[, 1] >= admincens[1] &
                                       eg[, 2] >= admincens[1]) / 2) / nrow(eg)))

        ## data1 with censoring (datacens1), type 4
        dataC1 <- data1
        dataC1$time[data1$time > timecens] <- timecens[data1$time > timecens]
        dataC1$event[data1$time > timecens] <- 0
        censC1 <- sum(dataC1$event == 0) / n * 100

        fit1C1 <- coxph(Surv(time, event) ~ x, data = dataC1)
        fit2C1 <- coxphw(Surv(time, event) ~ x, data = dataC1)

        wilcoxC1 <- wilcox.test(time ~ x, data = dataC1, correct = FALSE)$statistic
        egC1 <- expand.grid(dataC1$event[dataC1$x == 0], dataC1$event[dataC1$x == 1])
        olist$outC1[i, ] <- c(censC1,
                              fit1C1$coefficients, concord(fit1C1)[1],
                              fit2C1$coefficients, concord(fit2C1)[1],
                              wilcoxC1 / (length(dataC1$x[dataC1$x == 0]) *
                                          length(dataC1$x[dataC1$x == 1])),
                              NA)

        ## data with administrative censoring 2 (data2), type 5
        data2 <- data
        data2$event[data$time > admincens[2]] <- 0
        data2$time[data$time > admincens[2]] <- admincens[2]
        cens2 <- sum(data2$event == 0) / n * 100

        fit12 <- coxph(Surv(time, event) ~ x, data = data2)
        fit22 <- coxphw(Surv(time, event) ~ x, data = data2)
        fit12zph <- cox.zph(fit = fit12, transform = "km")

        eg2 <- eg[!(eg[, 1] >= admincens[2] & eg[, 2] >= admincens[2]), ]
        wilcox2 <- wilcox.test(time ~ x, data = data2, correct = FALSE)$statistic
        olist$out2[i, ] <- c(cens2,
                             fit12$coefficients, concord(fit12)[1],
                             fit22$coefficients, concord(fit22)[1],
                             wilcox2 / (n1 * n2),
                             1 - ((sum(eg2[, 1] < eg2[, 2]) +
                                   sum(eg[, 1] >= admincens[2] &
                                       eg[, 2] >= admincens[2]) / 2) / nrow(eg)))

        ## data2 with censoring (datacens2), type 6
        dataC2 <- data2
        dataC2$time[data2$time > timecens] <- timecens[data2$time > timecens]
        dataC2$event[data2$time > timecens] <- 0
        censC2 <- sum(dataC2$event == 0) / n * 100

        fit1C2 <- coxph(Surv(time, event) ~ x, data = dataC2)
        fit2C2 <- coxphw(Surv(time, event) ~ x, data = dataC2)

        wilcoxC2 <- wilcox.test(time ~ x, data = dataC2, correct = FALSE)$statistic
        egC2 <- expand.grid(dataC2$event[dataC2$x == 0], dataC2$event[dataC2$x == 1])
        olist$outC2[i, ] <- c(censC2,
                              fit1C2$coefficients, concord(fit1C2)[1],
                              fit2C2$coefficients, concord(fit2C2)[1],
                              wilcoxC2 / (length(dataC2$x[dataC2$x == 0]) *
                                          length(dataC2$x[dataC2$x == 1])),
                              NA)
      }

      results <- matrix(NA, nrow = 6, ncol = 7, dimnames = list(c("No", "Loss-to-fup",
          "Admin. 1", "Loss-to-fup & admin. 1", "Admin. 2", "Loss-to-fup & admin. 2"),
          colnames(olist$out)))

      for (j in 1:6) {
        olist[[j]][, c("cox_c100", "wcox_c100", "wilcox100", "prt0st1")] <-
          olist[[j]][, c("cox_c100", "wcox_c100", "wilcox100", "prt0st1")] * 100

        r1 <- round(apply(olist[[j]], 2, mean), 3)
        r2 <- round(apply(olist[[j]], 2, sd) / sqrt(sim), 3)

        results[j, ] <- t(paste(r1, " (", r2, ")", sep = ""))
      }

      output <- list(results = results, olist = olist, popc100 = popc100)
    }

    invisible(output)
  }

## Section 5. Simulation
if (runSimulation) {
## Figure 3 and Table 1
if (pdfind) { pdf("simph.pdf", width = 5, height = 5) }
sim1 <- simulation(n1 = 1000, n2 = 1000, sim = 2000, seed = 3460, type = "ph",
                   scalewei = 0.11, shapewei = 1.22, scaleexpC =0.06029,
                   beta = log(0.55/(1-0.55)), admincens = c(11.21083, 9.549136),
                   npop = 10000, xmaxplot = 23, addconstant = 1e-4)
if (pdfind) { dev.off() }
print(sim1$results[, 1:6])
print(round(sim1$popc100[1], 2))        # true population-c * 100

if (pdfind) { pdf("simnph.pdf", width = 5, height = 5) }
sim2 <- simulation(n1 = 1000, n2 = 1000, sim = 2000, seed = 3458, type ="nph3",
                   scaleexp = 0.35653, shapegom = 1.6, scalegom = 0.0228,
                   scaleexpC = 0.122, admincens = c(4.506223, 3.535000),
                   npop = 10000,  xmaxplot = 6)
if (pdfind) { dev.off() }
print(sim2$results[, 1:6])
print(round(sim2$popc[1], 2))          # true population-c * 100
}

Section 6.1: Gastric cancer study

## prepare Table 2
models <- c("Ignoring non-proportional hazards *", "HR Cox regression",
            "Estimating piecewise constant HRs *", "HR 1st year", "HR >1st year",
            "Including a time-by-covariate interaction", "HR at 0.5 years", "HR at 1 year",
            "HR at 2 years", "Weighted Cox regression", "average HR", "c'%")
Table2 <- data.frame(matrix(NA, nrow = length(models), ncol = 4, dimnames = list(models,
                     c("HR", "HRlower", "HRupper", "p"))))

## ignore non-proportional hazards and apply a Cox proportional hazards model
gfit2 <- coxphw(Surv(yrs, status) ~ radiation, data = gastric, template = "PH",
                robust = TRUE)

## or equivalently
coxph(Surv(yrs, status) ~ radiation + cluster(id), data = gastric, x = TRUE,
      method = "breslow")
## Call:
## coxph(formula = Surv(yrs, status) ~ radiation, data = gastric, 
##     x = TRUE, method = "breslow", cluster = id)
## 
##             coef exp(coef) se(coef) robust se     z     p
## radiation 0.1415    1.1520   0.2263    0.2292 0.617 0.537
## 
## Likelihood ratio test=0.39  on 1 df, p=0.5325
## n= 90, number of events= 79
## extract estimates for Table 1: HR, 95% CI, p-value
Table2["HR Cox regression", ] <- c(exp(gfit2$coeff),
                                   exp(confint(gfit2)),
                                   summary(gfit2)$prob)
## coxphw(formula = Surv(yrs, status) ~ radiation, data = gastric, 
##     template = "PH", robust = TRUE)
## 
## Model fitted by unweighted estimation (PH template) 
## 
##               coef  se(coef) exp(coef) lower 0.95 upper 0.95         z
## radiation 0.141495 0.2292141  1.151995  0.7350944   1.805335 0.6173052
##                   p
## radiation 0.5370334
## 
## Wald Chi-square = 0.3810657 on 1  df  p = 0.5370334  n = 90
## 
## Covariance-Matrix:
##            radiation
## radiation 0.05253909
## 
## Generalized concordance probability:   Estimates may be biased!
##           concordance prob. lower 0.95 upper 0.95
## radiation            0.5353     0.4237     0.6435
## estimating piecewise constant hazard ratios (by two separate Cox models)
## (two time periods with equal number of events)

table(gastric$status)
## 
##  0  1 
## 11 79
## 79/2
nrow(subset(gastric, status == 1 & yrs < 1))          # breakpoint = 1 year
## [1] 39
## first time period
gastricp1 <- gastric
gastricp1$status[gastricp1$yrs > 1] <- 0
gastricp1$yrs[gastricp1$yrs > 1] <- 1

nrow(gastricp1)
## [1] 90
gtable0 <- table(gastricp1$status, deparse.level = 2)
gtable0
## gastricp1$status
##  0  1 
## 51 39
round(prop.table(gtable0) * 100, digits = 2)
## gastricp1$status
##     0     1 
## 56.67 43.33
gtable1 <- table(gastricp1$radiation, gastricp1$status, deparse.level = 2)
addmargins(gtable1)
##                    gastricp1$status
## gastricp1$radiation  0  1 Sum
##                 0   31 14  45
##                 1   20 25  45
##                 Sum 51 39  90
round(prop.table(gtable1, margin = 1) * 100, digits = 2)
##                    gastricp1$status
## gastricp1$radiation     0     1
##                   0 68.89 31.11
##                   1 44.44 55.56
gfit3 <- coxph(Surv(yrs, status) ~ radiation + cluster(id), data = gastricp1,
               x = TRUE, method = "breslow")
gfit3.ph <- cox.zph(fit = gfit3, transform = "km")
gfit3.ph$table
##              chisq df          p
## radiation 2.836058  1 0.09217006
## GLOBAL    2.836058  1 0.09217006
## plot of scaled Schoenfeld residuals in the first time period
par(oma = c(2, 2, 0.5, 0.5), mar = c(2, 2, 0, 0))
plotcoxzph(x = gfit3.ph, wd = 2)
abline(a = 0, b = 0, lty = 3)
mtext(side = 1, line = 2.5, text = "time (years, KM-transformation)", cex = 1)
mtext(side = 2, line = 2.5, text = expression(paste(beta, "(t) for radiation")), cex = 1)
abline(lm(gfit3.ph$y ~ gfit3.ph$x)$coefficients, lty = 3, col = "red", lwd = 2)

## plot of cumulative survival probabilities
## gsurv2 <- survfit(Surv(yrs, status) ~ radiation, data = gastricp1)
## par(oma = c(2, 2, 0.5, 0.5), mar = c(2, 2, 0, 0))
## plot(gsurv2, lty = 1:2, las = 1, lwd = 2)
## mtext(side = 1, line = 2.5, text = "time (years)")
## mtext(side = 2, line = 3, text = "survival distribution function")
## legend("bottomleft", title = "radiation", legend = c("yes", "no"),
##        lty = 2:1, inset = 0.02, bty = "n", cex = 1.2)

## Cox proportional hazards model for the first time period
gfit4 <- coxphw(Surv(yrs, status) ~ radiation, data = gastricp1, template = "PH")
summary(gfit4)
## coxphw(formula = Surv(yrs, status) ~ radiation, data = gastricp1, 
##     template = "PH")
## 
## Model fitted by unweighted estimation (PH template) 
## 
##                coef se(coef) exp(coef) lower 0.95 upper 0.95        z
## radiation 0.8774141 0.325826  2.404673   1.269733    4.55407 2.692892
##                     p
## radiation 0.007083531
## 
## Wald Chi-square = 7.251665 on 1  df  p = 0.007083531  n = 90
## 
## Covariance-Matrix:
##           radiation
## radiation 0.1061626
## 
## Generalized concordance probability:   Estimates may be biased!
##           concordance prob. lower 0.95 upper 0.95
## radiation            0.7063     0.5594       0.82
Table2["HR 1st year", ] <- c(exp(gfit4$coeff),
                             exp(confint(gfit4)),
                             summary(gfit4, print = FALSE)$prob)

## or equivalently
## coxph(Surv(yrs, status) ~ radiation + cluster(id), data = gastricp1, method = "breslow")

## second time period
gastricp2 <- subset(gastric, yrs > 1)

nrow(gastricp2)
## [1] 51
gtable0 <- table(gastricp2$status, deparse.level = 2)
gtable0
## gastricp2$status
##  0  1 
## 11 40
round(prop.table(gtable0) * 100, digits = 2)
## gastricp2$status
##     0     1 
## 21.57 78.43
gtable1 <- table(gastricp2$radiation, gastricp2$status, deparse.level = 2)
addmargins(gtable1)
##                    gastricp2$status
## gastricp2$radiation  0  1 Sum
##                 0    3 28  31
##                 1    8 12  20
##                 Sum 11 40  51
round(prop.table(gtable1, margin = 1) * 100, digits = 2)
##                    gastricp2$status
## gastricp2$radiation     0     1
##                   0  9.68 90.32
##                   1 40.00 60.00
gfit5 <- coxph(Surv(yrs, status) ~ radiation + cluster(id), data = gastricp2, x = TRUE,
               method = "breslow")
gfit5.ph <- cox.zph(fit = gfit5, transform = "km")
gfit5.ph$table
##               chisq df         p
## radiation 0.5706304  1 0.4500085
## GLOBAL    0.5706304  1 0.4500085
## plot of scaled Schoenfeld residuals for the second time period
par(oma = c(2, 2, 0.5, 0.5), mar = c(2, 2, 0, 0))
plotcoxzph(x = gfit5.ph, wd = 2)
abline(a = 0, b = 0, lty = 3)
mtext(side = 1, line = 2.5, text = "time (years, KM-transformation)", cex = 1)
mtext(side = 2, line = 2.5, text = expression(paste(beta, "(t) for radiation")), cex = 1)
abline(lm(gfit5.ph$y ~ gfit5.ph$x)$coefficients, lty = 3, col = "red", lwd = 2)

## plot of cumulative survival probabilities
## gsurv3 <- survfit(Surv(yrs, status) ~ radiation, data = gastricp2)
## par(oma = c(2, 2, 0.5, 0.5), mar = c(2, 2, 0, 0))
## plot(gsurv3, lty = 1:2, las = 1, lwd = 2, xlim=c(1,5))
## mtext(side = 1, line = 2.5, text = "time (years)")
## mtext(side = 2, line = 3, text = "survival distribution function")
## legend("bottomleft", title = "radiation", legend = c("yes", "no"),
##        lty = 2:1, inset = 0.02, bty = "n", cex = 1.2)

## Cox proportional hazards model for the second time period
gfit6 <- coxphw(Surv(yrs, status) ~ radiation, data = gastricp2, template = "PH")
summary(gfit6)
## coxphw(formula = Surv(yrs, status) ~ radiation, data = gastricp2, 
##     template = "PH")
## 
## Model fitted by unweighted estimation (PH template) 
## 
##                 coef  se(coef) exp(coef) lower 0.95 upper 0.95         z
## radiation -0.6051688 0.3471071 0.5459823  0.2765161   1.078044 -1.743464
##                    p
## radiation 0.08125255
## 
## Wald Chi-square = 3.039668 on 1  df  p = 0.08125255  n = 51
## 
## Covariance-Matrix:
##           radiation
## radiation 0.1204833
## 
## Generalized concordance probability:   Estimates may be biased!
##           concordance prob. lower 0.95 upper 0.95
## radiation            0.3532     0.2166     0.5188
Table2["HR >1st year", ] <- c(exp(gfit6$coeff),
                              exp(confint(gfit6)),
                              summary(gfit6, print = FALSE)$prob)

## or equivalently
## coxph(Surv(yrs, status) ~ radiation + cluster(id), data = gastricp2, method = "breslow")

## including a time-by-covariate interaction
fun <- function(t) as.numeric(t > 1)

gfit7 <- coxphw(Surv(yrs, status) ~ radiation + fun(yrs):radiation, data = gastric, template = "PH")
summary(gfit7)
## coxphw(formula = Surv(yrs, status) ~ radiation + fun(yrs):radiation, 
##     data = gastric, template = "PH")
## 
## Model fitted by unweighted estimation (PH template) 
## 
##                          coef  se(coef) exp(coef) lower 0.95 upper 0.95
## radiation           0.8774141 0.3258260 2.4046734 1.26973327  4.5540699
## fun(yrs):radiation -1.4825829 0.4758515 0.2270505 0.08934636  0.5769896
##                            z           p
## radiation           2.692892 0.007083531
## fun(yrs):radiation -3.115642 0.001835452
## 
## Wald Chi-square = 10.30011 on 2  df  p = 0.005799087  n = 90
## 
## Covariance-Matrix:
##                     radiation fun(yrs):radiation
## radiation           0.1061626         -0.1060570
## fun(yrs):radiation -0.1060570          0.2264347
## 
## Generalized concordance probability:   Estimates may be biased!
##                    concordance prob. lower 0.95 upper 0.95
## radiation                     0.7063     0.5594     0.8200
## fun(yrs):radiation            0.1850     0.0820     0.3659
## 2.4046734 * 0.2270505
## exp(0.8774141 - 1.4825829)

## or equivalently
summary(coxph(Surv(yrs, status) ~ radiation + tt(radiation) + cluster(id), data = gastric,
              tt = function(x, t, ...) x * (t > 1), method = "breslow"))
## Call:
## coxph(formula = Surv(yrs, status) ~ radiation + tt(radiation), 
##     data = gastric, tt = function(x, t, ...) x * (t > 1), method = "breslow", 
##     cluster = id)
## 
##   n= 90, number of events= 79 
## 
##                  coef exp(coef) se(coef) robust se      z Pr(>|z|)   
## radiation      0.8774    2.4047   0.3351    0.3258  2.693  0.00708 **
## tt(radiation) -1.4826    0.2271   0.4816    0.4759 -3.116  0.00184 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##               exp(coef) exp(-coef) lower .95 upper .95
## radiation        2.4047     0.4159   1.26973     4.554
## tt(radiation)    0.2271     4.4043   0.08935     0.577
## 
## Concordance= 0.6  (se = 0.029 )
## Likelihood ratio test= 10.49  on 2 df,   p=0.005
## Wald test            = 10.3  on 2 df,   p=0.006
## Score (logrank) test = 10.45  on 2 df,   p=0.005,   Robust = 10.73  p=0.005
## 
##   (Note: the likelihood ratio and score tests assume independence of
##      observations within a cluster, the Wald and robust score tests do not).
## extended Cox model - assume a linear time-dependent effect
fit1 <- coxphw(Surv(yrs, status) ~ radiation + yrs:radiation, data = gastric, template = "PH")
summary(fit1)
## coxphw(formula = Surv(yrs, status) ~ radiation + yrs:radiation, 
##     data = gastric, template = "PH")
## 
## Model fitted by unweighted estimation (PH template) 
## 
##                     coef  se(coef) exp(coef) lower 0.95 upper 0.95         z
## radiation      1.2699606 0.4334889 3.5607124  1.5224762  8.3276657  2.929627
## yrs:radiation -0.9652886 0.3409321 0.3808733  0.1952444  0.7429892 -2.831322
##                         p
## radiation     0.003393692
## yrs:radiation 0.004635608
## 
## Wald Chi-square = 9.047941 on 2  df  p = 0.01084588  n = 90
## 
## Covariance-Matrix:
##                radiation yrs:radiation
## radiation      0.1879126    -0.1241715
## yrs:radiation -0.1241715     0.1162347
## 
## Generalized concordance probability:   Estimates may be biased!
##               concordance prob. lower 0.95 upper 0.95
## radiation                0.7807     0.6036     0.8928
## yrs:radiation            0.2758     0.1634     0.4263
## or equivalently
coxph(Surv(yrs, status) ~ radiation + tt(radiation) + cluster(id), tt = function(x,t, ...) x * t,
     data = gastric, method = "breslow")
## Call:
## coxph(formula = Surv(yrs, status) ~ radiation + tt(radiation), 
##     data = gastric, tt = function(x, t, ...) x * t, method = "breslow", 
##     cluster = id)
## 
##                  coef exp(coef) se(coef) robust se      z       p
## radiation      1.2700    3.5607   0.4186    0.4335  2.930 0.00339
## tt(radiation) -0.9653    0.3809   0.3177    0.3409 -2.831 0.00464
## 
## Likelihood ratio test=13.47  on 2 df, p=0.001188
## n= 90, number of events= 79
## extract HR at 0.5, 1 and 2 years
fit1est <- predict(fit1, type = "slice.time", x = "yrs", z = "radiation", newx = c(0.5, 1, 2),
                   exp = TRUE, verbose = TRUE, pval = TRUE)
##   yrs     HR HR lower 0.95 HR upper 0.95      p
## 1 0.5 2.1975        1.2096        3.9924 0.0098
## 2 1.0 1.3562        0.8536        2.1547 0.1971
## 3 2.0 0.5165        0.2381        1.1207 0.0946
Table2[c("HR at 0.5 years", "HR at 1 year",
         "HR at 2 years"), ] <- cbind(fit1est$estimates[, "HR"],
                                      fit1est$estimates[, "HR lower 0.95"],
                                      fit1est$estimates[, "HR upper 0.95"],
                                      fit1est$estimates[, "p"])

## visualize the estimated linear time-dependent effect
fit1est2 <- predict(fit1, type = "slice.time", x = "yrs", z = "radiation",
                    newx = seq(from = 0.1, to = 3, by = 0.1))

if (pdfind) { pdf("figure3.pdf", width = 7, height = 5) }
  par(oma = c(2, 2, 0.5, 0.5), mar=c(2, 2, 0, 0))
  plot(fit1est2, addci = TRUE)
  mtext(side = 1, line = 2.5, text = "time (yrs)", cex = 1.3)
  mtext(side = 2, line = 2.3, text = expression(paste(hat(beta), "(t) for radiation")),
        cex = 1.3)

if (pdfind) { dev.off() }

## extended Cox model - assume a log-linear time-dependent effect
gfit8 <- coxphw(Surv(yrs, status) ~ radiation + log(yrs):radiation, data = gastric,
                template = "PH")
summary(gfit8)
## coxphw(formula = Surv(yrs, status) ~ radiation + log(yrs):radiation, 
##     data = gastric, template = "PH")
## 
## Model fitted by unweighted estimation (PH template) 
## 
##                           coef  se(coef) exp(coef) lower 0.95 upper 0.95
## radiation           0.03766302 0.2367992 1.0383813  0.6528193   1.651660
## log(yrs):radiation -0.66924556 0.4821178 0.5120948  0.1990540   1.317437
##                             z         p
## radiation           0.1590504 0.8736291
## log(yrs):radiation -1.3881370 0.1650953
## 
## Wald Chi-square = 1.97312 on 2  df  p = 0.372857  n = 90
## 
## Covariance-Matrix:
##                      radiation log(yrs):radiation
## radiation          0.056073853        0.004581723
## log(yrs):radiation 0.004581723        0.232437577
## 
## Generalized concordance probability:   Estimates may be biased!
##                    concordance prob. lower 0.95 upper 0.95
## radiation                     0.5094      0.395     0.6229
## log(yrs):radiation            0.3387      0.166     0.5685
## or equivalently
coxph(Surv(yrs, status) ~ radiation + tt(radiation) + cluster(id),
     tt = function(x, t, ...) x * log(t), data = gastric, method = "breslow")
## Call:
## coxph(formula = Surv(yrs, status) ~ radiation + tt(radiation), 
##     data = gastric, tt = function(x, t, ...) x * log(t), method = "breslow", 
##     cluster = id)
## 
##                   coef exp(coef) se(coef) robust se      z     p
## radiation      0.03766   1.03838  0.23962   0.23680  0.159 0.874
## tt(radiation) -0.66925   0.51209  0.27299   0.48212 -1.388 0.165
## 
## Likelihood ratio test=8.02  on 2 df, p=0.01809
## n= 90, number of events= 79
## weighted Cox regression 
fit2 <- coxphw(Surv(yrs, status) ~ radiation, data = gastric, template = "AHR")
summary(fit2)
## coxphw(formula = Surv(yrs, status) ~ radiation, data = gastric, 
##     template = "AHR")
## 
## Model fitted by weighted estimation (AHR template) 
## 
##                coef  se(coef) exp(coef) lower 0.95 upper 0.95        z
## radiation 0.4625051 0.2387432  1.588047  0.9945916   2.535608 1.937249
##                    p
## radiation 0.05271492
## 
## Wald Chi-square = 3.752934 on 1  df  p = 0.05271492  n = 90
## 
## Covariance-Matrix:
##            radiation
## radiation 0.05699834
## 
## Generalized concordance probability:
##           concordance prob. lower 0.95 upper 0.95
## radiation            0.6136     0.4986     0.7172
## weighted Cox regression with truncation of weights
gfit9 <- coxphw(Surv(yrs, status) ~ radiation, data = gastric, template = "AHR",
                  trunc.weights = 0.95)
summary(gfit9)
## coxphw(formula = Surv(yrs, status) ~ radiation, data = gastric, 
##     template = "AHR", trunc.weights = 0.95)
## 
## Model fitted by weighted estimation (AHR template) 
## 
##                coef  se(coef) exp(coef) lower 0.95 upper 0.95        z
## radiation 0.4622282 0.2384041  1.587608  0.9949774   2.533221 1.938843
##                    p
## radiation 0.05252042
## 
## Wald Chi-square = 3.759113 on 1  df  p = 0.05252042  n = 90
## 
## Covariance-Matrix:
##            radiation
## radiation 0.05683651
## 
## Generalized concordance probability:
##           concordance prob. lower 0.95 upper 0.95
## radiation            0.6135     0.4987      0.717
if (pdfind) { pdf(file = "figure4.pdf", width = 6, height = 5) }
  par(oma = c(2, 2, 0.5, 0.5), mar = c(2, 2, 0, 0))
  plot(x = gfit9, las = 1, lwd = 2)
  mtext(side = 1, line = 2.5, text = "time (years)")
  mtext(side = 2, line = 2.5, text = "weight")

if (pdfind) { dev.off() }

## range of normalized totatl weights
range(gfit9$w.matrix[, "w"])
## [1] 0.3134808 1.6534706
Table2["average HR", ] <- cbind(exp(gfit9$coeff),
                                exp(confint(gfit9)),
                                summary(gfit9, print = FALSE)$prob)
Table2["c'%", ] <- c(as.vector(concord(gfit9)), summary(gfit9, print = FALSE)$prob)

## finish Table 1
Table2["c'%", 1:3] <- 100 * Table2["c'%", 1:3]
Table2 <- round(Table2, digits = 3)

Table2[, 1] <- paste(Table2[, 1], " (", Table2[, 2], "-", Table2[, 3], ")", sep = "")
Table2[, 2] <- Table2[, 4]
Table2 <- Table2[, 1:2]
dimnames(Table2)[[2]] <- c("Estimate (95% CI)", "p")
Table2
##                                             Estimate (95% CI)     p
## Ignoring non-proportional hazards *                NA (NA-NA)    NA
## HR Cox regression                         1.152 (0.735-1.805) 0.537
## Estimating piecewise constant HRs *                NA (NA-NA)    NA
## HR 1st year                                2.405 (1.27-4.554) 0.007
## HR >1st year                              0.546 (0.277-1.078) 0.081
## Including a time-by-covariate interaction          NA (NA-NA)    NA
## HR at 0.5 years                            2.197 (1.21-3.992) 0.010
## HR at 1 year                              1.356 (0.854-2.155) 0.197
## HR at 2 years                             0.517 (0.238-1.121) 0.095
## Weighted Cox regression                            NA (NA-NA)    NA
## average HR                                1.588 (0.995-2.533) 0.053
## c'%                                        61.35 (49.87-71.7) 0.053

Section 6.2: Biofeedback therapy study

## ignore non-proportional hazards and apply a Cox model
## (use breslow weights to make it directly comparable to coxphw)
bfit2 <- coxphw(Surv(thdur, success) ~ bfb + log2heal, data = biofeedback, template = "PH")
summary(bfit2)
## coxphw(formula = Surv(thdur, success) ~ bfb + log2heal, data = biofeedback, 
##     template = "PH")
## 
## Model fitted by unweighted estimation (PH template) 
## 
##                coef  se(coef) exp(coef) lower 0.95 upper 0.95          z
## bfb       0.2699762 0.3453073 1.3099332  0.6657683   2.577361  0.7818433
## log2heal -0.5266649 0.3636448 0.5905713  0.2895592   1.204502 -1.4482949
##                  p
## bfb      0.4343067
## log2heal 0.1475346
## 
## Wald Chi-square = 2.657646 on 2  df  p = 0.2647887  n = 33
## 
## Covariance-Matrix:
##                   bfb     log2heal
## bfb       0.119237110 -0.002917929
## log2heal -0.002917929  0.132237551
## 
## Generalized concordance probability:   Estimates may be biased!
##          concordance prob. lower 0.95 upper 0.95
## bfb                 0.5671     0.3997     0.7205
## log2heal            0.3713     0.2245     0.5464
## or equivalently
coxph(Surv(thdur, success) ~ bfb + log2heal + cluster(id), data = biofeedback,
      x = TRUE, method = "breslow")
## Call:
## coxph(formula = Surv(thdur, success) ~ bfb + log2heal, data = biofeedback, 
##     x = TRUE, method = "breslow", cluster = id)
## 
##             coef exp(coef) se(coef) robust se      z     p
## bfb       0.2700    1.3099   0.4273    0.3453  0.782 0.434
## log2heal -0.5267    0.5906   0.2543    0.3636 -1.448 0.148
## 
## Likelihood ratio test=7.19  on 2 df, p=0.02753
## n= 33, number of events= 23
## two stage estimation
stage1 <- coxph(Surv(thdur, success) ~ strata(bfb) + log2heal + tt(log2heal) + cluster(id),
                tt = function(x, t, ...) x * log(t), data = biofeedback, method = "breslow")
summary(stage1)
## Call:
## coxph(formula = Surv(thdur, success) ~ strata(bfb) + log2heal + 
##     tt(log2heal), data = biofeedback, tt = function(x, t, ...) x * 
##     log(t), method = "breslow", cluster = id)
## 
##   n= 33, number of events= 23 
## 
##                 coef exp(coef) se(coef) robust se      z Pr(>|z|)   
## log2heal      0.7368    2.0892   0.9008    0.3797  1.940  0.05233 . 
## tt(log2heal) -0.4153    0.6602   0.3257    0.1490 -2.786  0.00533 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## log2heal        2.0892     0.4787    0.9926    4.3971
## tt(log2heal)    0.6602     1.5148    0.4929    0.8841
## 
## Concordance= 0.664  (se = 0.063 )
## Likelihood ratio test= 8.53  on 2 df,   p=0.01
## Wald test            = 7.84  on 2 df,   p=0.02
## Score (logrank) test = 6.59  on 2 df,   p=0.04,   Robust = 7.71  p=0.02
## 
##   (Note: the likelihood ratio and score tests assume independence of
##      observations within a cluster, the Wald and robust score tests do not).
## for comparison linear time-dependent effect
coxph(Surv(thdur, success) ~ strata(bfb) + log2heal + tt(log2heal) + cluster(id),
      tt = function(x, t, ...) x * t, data = biofeedback, method = "breslow")
## Call:
## coxph(formula = Surv(thdur, success) ~ strata(bfb) + log2heal + 
##     tt(log2heal), data = biofeedback, tt = function(x, t, ...) x * 
##     t, method = "breslow", cluster = id)
## 
##                  coef exp(coef) se(coef) robust se      z     p
## log2heal     -0.02130   0.97892  0.45270   0.43296 -0.049 0.961
## tt(log2heal) -0.01957   0.98062  0.02218   0.01549 -1.263 0.206
## 
## Likelihood ratio test=8.64  on 2 df, p=0.01331
## n= 33, number of events= 23
stage2 <- coxphw(Surv(thdur, success) ~ bfb + log2heal + log(thdur):log2heal, data = biofeedback,
                template = "AHR", betafix = c(NA, coef(stage1)))
summary(stage2)
## coxphw(formula = Surv(thdur, success) ~ bfb + log2heal + log(thdur):log2heal, 
##     data = biofeedback, template = "AHR", betafix = c(NA, coef(stage1)))
## 
## Model fitted by weighted estimation (AHR template) 
## 
##                           coef  se(coef) exp(coef) lower 0.95 upper 0.95
## bfb                  0.5967993 0.3732872 1.8162961  0.8738643   3.775107
## log2heal             0.7367590        NA 2.0891536         NA         NA
## log(thdur):log2heal -0.4152653        NA 0.6601651         NA         NA
##                            z         p
## bfb                 1.598767 0.1098724
## log2heal                  NA        NA
## log(thdur):log2heal       NA        NA
## 
## Wald Chi-square = 2.556056 on 1  df  p = 0.1098724  n = 33  (based on: bfb )
## 
## Covariance-Matrix:
## [1] 0.1393434
## 
## Generalized concordance probability:
##                     concordance prob. lower 0.95 upper 0.95
## bfb                            0.6449     0.4663     0.7906
## log2heal                       0.6763         NA         NA
## log(thdur):log2heal            0.3977         NA         NA
if (pdfind) { pdf(file = "figure5.pdf", width = 6, height = 5) }
  par(oma = c(2, 2, 0.5, 0.5), mar = c(2, 2, 0, 0))
  plot(x = stage2, las = 1, legendxy = c(45, 1.15), lwd = 2)
  mtext(side = 1, line = 2.5, text = "treatment duration (days)")
  mtext(side = 2, line = 2.5, text = "weight")

if (pdfind) { dev.off() }