Stepwise Selection for Fine-Gray Competing Risks Models

Ravi Varadhan, Deborah Kuk

2025-12-31

Introduction

The crrstep package provides stepwise variable selection for the Fine-Gray proportional subdistribution hazards model for competing risks data. This vignette demonstrates how to use the package with both simple and complex formula specifications.

Installation

install.packages('crrstep')
library(crrstep)
library(cmprsk)

Simulated Data

set.seed(123)
n <- 400
age <- rnorm(n, mean = 60, sd = 10)
sex <- factor(sample(c('Male', 'Female'), n, replace = TRUE))
treatment <- factor(sample(c('Control', 'Treatment'), n, replace = TRUE))
biomarker <- rnorm(n, mean = 100, sd = 20)
linpred <- 0.03 * (age - 60) + 0.5 * (sex == 'Male') - 0.4 * (treatment == 'Treatment') + 0.01 * biomarker
base_hazard <- 0.1
true_time <- rexp(n, rate = base_hazard * exp(linpred))
p_cause1 <- plogis(linpred)
cause <- ifelse(runif(n) < p_cause1, 1, 2)
censor_time <- rexp(n, rate = 0.05)
ftime <- pmin(true_time, censor_time)
fstatus <- ifelse(ftime == censor_time, 0, cause)
sim_data <- data.frame(ftime, fstatus, age, sex, treatment, biomarker)
table(sim_data$fstatus)
#> 
#>   0   1   2 
#>  61 252  87

Backward Selection with AIC

result_back <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
#> crrstep(formula = ftime ~ age + sex + treatment + biomarker, 
#>     scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data, 
#>     direction = "backward", criterion = "AIC", trace = TRUE)
#>                AIC
#> <none>     2671.71
#> -biomarker 2672.61
#> -treatment 2676.02
#> -age       2683.60
#> -sex       2700.06
#> Final model:
#>                    estimate std.error     z  p-value
#> age                 0.02400   0.00677  3.54 4.04e-04
#> sexMale             0.70300   0.13000  5.43 5.71e-08
#> treatmentTreatment -0.31800   0.13000 -2.44 1.46e-02
#> biomarker           0.00533   0.00315  1.69 9.13e-02
#> 
#> Log-likelihood: -1331.85
print(result_back)
#> 
#> Call:
#> crrstep(formula = ftime ~ age + sex + treatment + biomarker, 
#>     scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data, 
#>     direction = "backward", criterion = "AIC", trace = TRUE)
#> 
#> Direction: backward 
#> Criterion: AIC 
#> 
#> Selected model:
#>   ~age + sex + treatment + biomarker 
#> 
#> AIC: 2671.71
#> Log-likelihood: -1331.85 
#> 
#> N = 400  Events = 252 
#> 
#> Coefficients:
#>                         coef exp(coef)  se(coef)      z        p    
#> age                 0.023956  1.024245  0.006772  3.537 0.000404 ***
#> sexMale             0.703035  2.019874  0.129526  5.428 5.71e-08 ***
#> treatmentTreatment -0.317948  0.727640  0.130187 -2.442 0.014596 *  
#> biomarker           0.005326  1.005340  0.003154  1.689 0.091307 .  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Forward Selection with AIC

result_fwd <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'forward',
  criterion = 'AIC',
  trace = TRUE
)
#> crrstep(formula = ftime ~ age + sex + treatment + biomarker, 
#>     scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data, 
#>     direction = "forward", criterion = "AIC", trace = TRUE)
#>                AIC
#> +sex       2686.78
#> +age       2705.04
#> +treatment 2710.36
#> <none>     2713.91
#> +biomarker 2714.68
#> Adding: sex 
#> 
#> Step:  AIC =  2686.78 
#> ~sex 
#> 
#>                AIC
#> +age       2676.68
#> +treatment 2683.51
#> <none>     2686.78
#> +biomarker 2687.09
#> Adding: age 
#> 
#> Step:  AIC =  2676.68 
#> ~sex + age 
#> 
#>                AIC
#> +treatment 2672.61
#> +biomarker 2676.02
#> <none>     2676.68
#> Adding: treatment 
#> 
#> Step:  AIC =  2672.61 
#> ~sex + age + treatment 
#> 
#>                AIC
#> +biomarker 2671.71
#> <none>     2672.61
#> Adding: biomarker 
#> 
#> Step:  AIC =  2671.71 
#> ~sex + age + treatment + biomarker 
#> 
#> Final model:
#>                    estimate std.error     z  p-value
#> sexMale             0.70300   0.13000  5.43 5.71e-08
#> age                 0.02400   0.00677  3.54 4.04e-04
#> treatmentTreatment -0.31800   0.13000 -2.44 1.46e-02
#> biomarker           0.00533   0.00315  1.69 9.13e-02
#> 
#> Log-likelihood: -1331.85
print(result_fwd)
#> 
#> Call:
#> crrstep(formula = ftime ~ age + sex + treatment + biomarker, 
#>     scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data, 
#>     direction = "forward", criterion = "AIC", trace = TRUE)
#> 
#> Direction: forward 
#> Criterion: AIC 
#> 
#> Selected model:
#>   ~sex + age + treatment + biomarker 
#> 
#> AIC: 2671.71
#> Log-likelihood: -1331.85 
#> 
#> N = 400  Events = 252 
#> 
#> Coefficients:
#>                         coef exp(coef)  se(coef)      z        p    
#> sexMale             0.703035  2.019874  0.129526  5.428 5.71e-08 ***
#> age                 0.023956  1.024245  0.006772  3.537 0.000404 ***
#> treatmentTreatment -0.317948  0.727640  0.130187 -2.442 0.014596 *  
#> biomarker           0.005326  1.005340  0.003154  1.689 0.091307 .  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Different Selection Criteria

result_bic <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'BIC',
  trace = FALSE
)
cat('AIC selected variables:', nrow(result_back$coefficients), '\\n')
#> AIC selected variables: \n
cat('BIC selected variables:', nrow(result_bic$coefficients), '\\n')
#> BIC selected variables: \n

Forcing Variables with scope.min

result_min <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~ age + sex,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
#> crrstep(formula = ftime ~ age + sex + treatment + biomarker, 
#>     scope.min = ~age + sex, etype = fstatus, failcode = 1, data = sim_data, 
#>     direction = "backward", criterion = "AIC", trace = TRUE)
#>                AIC
#> <none>     2671.71
#> -biomarker 2672.61
#> -treatment 2676.02
#> Final model:
#>                    estimate std.error     z  p-value
#> age                 0.02400   0.00677  3.54 4.04e-04
#> sexMale             0.70300   0.13000  5.43 5.71e-08
#> treatmentTreatment -0.31800   0.13000 -2.44 1.46e-02
#> biomarker           0.00533   0.00315  1.69 9.13e-02
#> 
#> Log-likelihood: -1331.85
print(result_min)
#> 
#> Call:
#> crrstep(formula = ftime ~ age + sex + treatment + biomarker, 
#>     scope.min = ~age + sex, etype = fstatus, failcode = 1, data = sim_data, 
#>     direction = "backward", criterion = "AIC", trace = TRUE)
#> 
#> Direction: backward 
#> Criterion: AIC 
#> 
#> Selected model:
#>   ~age + sex + treatment + biomarker 
#> 
#> AIC: 2671.71
#> Log-likelihood: -1331.85 
#> 
#> N = 400  Events = 252 
#> 
#> Coefficients:
#>                         coef exp(coef)  se(coef)      z        p    
#> age                 0.023956  1.024245  0.006772  3.537 0.000404 ***
#> sexMale             0.703035  2.019874  0.129526  5.428 5.71e-08 ***
#> treatmentTreatment -0.317948  0.727640  0.130187 -2.442 0.014596 *  
#> biomarker           0.005326  1.005340  0.003154  1.689 0.091307 .  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Factor Variables

set.seed(456)
sim_data$stage <- factor(sample(c('I', 'II', 'III', 'IV'), n, replace = TRUE))
sim_data$region <- factor(sample(c('North', 'South', 'East', 'West'), n, replace = TRUE))
result_factors <- crrstep(
  formula = ftime ~ age + sex + stage + region,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'BIC',
  trace = TRUE
)
#> crrstep(formula = ftime ~ age + sex + stage + region, scope.min = ~1, 
#>     etype = fstatus, failcode = 1, data = sim_data, direction = "backward", 
#>     criterion = "BIC", trace = TRUE)
#>             BIC
#> -stage  2699.86
#> -region 2700.59
#> <none>  2715.81
#> -age    2720.92
#> -sex    2741.82
#> Dropping: stage 
#> 
#> Step:  BIC =  2699.86 
#> ~age + sex + region 
#> 
#>             BIC
#> -region 2684.67
#> <none>  2699.86
#> -age    2705.02
#> -sex    2724.64
#> Dropping: region 
#> 
#> Step:  BIC =  2684.67 
#> ~age + sex 
#> 
#>            BIC
#> <none> 2684.67
#> -age   2690.77
#> -sex   2709.03
#> Final model:
#>         estimate std.error    z  p-value
#> age       0.0223   0.00672 3.32 9.15e-04
#> sexMale   0.7030   0.12800 5.47 4.44e-08
#> 
#> Log-likelihood: -1336.34
summary(result_factors)
#> 
#> Stepwise Fine-Gray Competing Risks Regression
#> ================================================== 
#> 
#> Call:
#> crrstep(formula = ftime ~ age + sex + stage + region, scope.min = ~1, 
#>     etype = fstatus, failcode = 1, data = sim_data, direction = "backward", 
#>     criterion = "BIC", trace = TRUE)
#> 
#> Selection method: backward 
#> Selection criterion: BIC 
#> 
#> Selected model:
#>   ~age + sex 
#> 
#>   n = 400 , number of events = 252 
#> 
#> Coefficients:
#>             coef exp(coef) se(coef)     z Pr(>|z|)    
#> age     0.022264  1.022514 0.006715 3.316 0.000915 ***
#> sexMale 0.702915  2.019631 0.128448 5.472 4.44e-08 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 95% Confidence intervals for hazard ratios:
#>         exp(coef) exp(-coef) lower 95% upper 95%
#> age         1.023     0.9780     1.009     1.036
#> sexMale     2.020     0.4951     1.570     2.598
#> 
#> -------------------------------------------------- 
#> BIC : 2684.67 
#> Log-likelihood: -1336.34 
#> Null log-likelihood: -1356.96 
#> 
#> Likelihood ratio test: 41.23 on 2 df, p = 1.114e-09
coef(result_factors)
#>        age    sexMale 
#> 0.02226407 0.70291504
logLik(result_factors)
#> 'log Lik.' -1336.341 (df=2)
BIC(result_factors)
#> [1] 2684.666

Interaction Terms

result_interact <- crrstep(
  formula = ftime ~ age + sex + treatment + age:sex + sex:treatment,
  scope.min = ~ age + sex,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
#> crrstep(formula = ftime ~ age + sex + treatment + age:sex + sex:treatment, 
#>     scope.min = ~age + sex, etype = fstatus, failcode = 1, data = sim_data, 
#>     direction = "backward", criterion = "AIC", trace = TRUE)
#>                    AIC
#> -sex:treatment 2674.02
#> -age:sex       2674.59
#> <none>         2676.01
#> -treatment     2676.22
#> Dropping: sex:treatment 
#> 
#> Step:  AIC =  2674.02 
#> ~age + sex + treatment + age:sex 
#> 
#>                AIC
#> -age:sex   2672.61
#> <none>     2674.02
#> -treatment 2678.11
#> Dropping: age:sex 
#> 
#> Step:  AIC =  2672.61 
#> ~age + sex + treatment 
#> 
#>                AIC
#> <none>     2672.61
#> -treatment 2676.68
#> Final model:
#>                    estimate std.error     z  p-value
#> age                   0.023    0.0067  3.43 6.07e-04
#> sexMale               0.699    0.1290  5.43 5.63e-08
#> treatmentTreatment   -0.312    0.1290 -2.41 1.58e-02
#> 
#> Log-likelihood: -1333.3
print(result_interact)
#> 
#> Call:
#> crrstep(formula = ftime ~ age + sex + treatment + age:sex + sex:treatment, 
#>     scope.min = ~age + sex, etype = fstatus, failcode = 1, data = sim_data, 
#>     direction = "backward", criterion = "AIC", trace = TRUE)
#> 
#> Direction: backward 
#> Criterion: AIC 
#> 
#> Selected model:
#>   ~age + sex + treatment 
#> 
#> AIC: 2672.61
#> Log-likelihood: -1333.30 
#> 
#> N = 400  Events = 252 
#> 
#> Coefficients:
#>                         coef exp(coef)  se(coef)      z        p    
#> age                 0.022968  1.023234  0.006699  3.429 0.000607 ***
#> sexMale             0.698886  2.011510  0.128702  5.430 5.63e-08 ***
#> treatmentTreatment -0.311991  0.731988  0.129244 -2.414 0.015780 *  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Polynomial Terms

sim_data$biomarker_c <- scale(sim_data$biomarker, scale = FALSE)[,1]
result_poly <- crrstep(
  formula = ftime ~ age + sex + biomarker_c + I(biomarker_c^2),
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
#> crrstep(formula = ftime ~ age + sex + biomarker_c + I(biomarker_c^2), 
#>     scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data, 
#>     direction = "backward", criterion = "AIC", trace = TRUE)
#>                       AIC
#> -I(biomarker_c^2) 2676.02
#> <none>            2677.94
#> -biomarker_c      2678.32
#> -age              2689.09
#> -sex              2706.73
#> Dropping: I(biomarker_c^2) 
#> 
#> Step:  AIC =  2676.02 
#> ~age + sex + biomarker_c 
#> 
#>                  AIC
#> <none>       2676.02
#> -biomarker_c 2676.68
#> -age         2687.09
#> -sex         2704.77
#> Final model:
#>             estimate std.error    z  p-value
#> age          0.02330   0.00678 3.44 5.90e-04
#> sexMale      0.70800   0.12900 5.47 4.44e-08
#> biomarker_c  0.00512   0.00313 1.63 1.02e-01
#> 
#> Log-likelihood: -1335.01
print(result_poly)
#> 
#> Call:
#> crrstep(formula = ftime ~ age + sex + biomarker_c + I(biomarker_c^2), 
#>     scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data, 
#>     direction = "backward", criterion = "AIC", trace = TRUE)
#> 
#> Direction: backward 
#> Criterion: AIC 
#> 
#> Selected model:
#>   ~age + sex + biomarker_c 
#> 
#> AIC: 2676.02
#> Log-likelihood: -1335.01 
#> 
#> N = 400  Events = 252 
#> 
#> Coefficients:
#>                 coef exp(coef) se(coef)     z        p    
#> age         0.023302  1.023576 0.006781 3.436  0.00059 ***
#> sexMale     0.707513  2.028939 0.129287 5.472 4.44e-08 ***
#> biomarker_c 0.005120  1.005133 0.003133 1.634  0.10228    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Polynomial-Factor Interactions

result_complex <- crrstep(
  formula = ftime ~ age + treatment + biomarker_c + I(biomarker_c^2) + biomarker_c:treatment + I(biomarker_c^2):treatment,
  scope.min = ~ age,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'BIC',
  trace = TRUE
)
#> crrstep(formula = ftime ~ age + treatment + biomarker_c + I(biomarker_c^2) + 
#>     biomarker_c:treatment + I(biomarker_c^2):treatment, scope.min = ~age, 
#>     etype = fstatus, failcode = 1, data = sim_data, direction = "backward", 
#>     criterion = "BIC", trace = TRUE)
#>                                 BIC
#> -I(biomarker_c^2)           2723.33
#> -treatment:biomarker_c      2723.69
#> -treatment:I(biomarker_c^2) 2723.73
#> -biomarker_c                2725.61
#> <none>                      2729.21
#> -treatment                  2729.58
#> Dropping: I(biomarker_c^2) 
#> 
#> Step:  BIC =  2729.21 
#> ~age + treatment + biomarker_c + treatment:biomarker_c + treatment:I(biomarker_c^2) 
#> 
#>                                 BIC
#> -treatment:I(biomarker_c^2) 2717.79
#> -treatment:biomarker_c      2723.69
#> -biomarker_c                2725.61
#> <none>                      2729.21
#> -treatment                  2729.58
#> Dropping: treatment:I(biomarker_c^2) 
#> 
#> Step:  BIC =  2717.79 
#> ~age + treatment + biomarker_c + treatment:biomarker_c 
#> 
#>                            BIC
#> -treatment:biomarker_c 2712.04
#> -biomarker_c           2714.11
#> <none>                 2717.79
#> -treatment             2718.49
#> Dropping: treatment:biomarker_c 
#> 
#> Step:  BIC =  2712.04 
#> ~age + treatment + biomarker_c 
#> 
#>                  BIC
#> -biomarker_c 2708.58
#> <none>       2712.04
#> -treatment   2712.75
#> Dropping: biomarker_c 
#> 
#> Step:  BIC =  2708.58 
#> ~age + treatment 
#> 
#>                BIC
#> <none>     2708.58
#> -treatment 2709.03
#> Final model:
#>                    estimate std.error     z  p-value
#> age                  0.0215   0.00632  3.40 0.000672
#> treatmentTreatment  -0.3210   0.12600 -2.54 0.011000
#> 
#> Log-likelihood: -1348.3
print(result_complex)
#> 
#> Call:
#> crrstep(formula = ftime ~ age + treatment + biomarker_c + I(biomarker_c^2) + 
#>     biomarker_c:treatment + I(biomarker_c^2):treatment, scope.min = ~age, 
#>     etype = fstatus, failcode = 1, data = sim_data, direction = "backward", 
#>     criterion = "BIC", trace = TRUE)
#> 
#> Direction: backward 
#> Criterion: BIC 
#> 
#> Selected model:
#>   ~age + treatment 
#> 
#> BIC: 2708.58
#> Log-likelihood: -1348.30 
#> 
#> N = 400  Events = 252 
#> 
#> Coefficients:
#>                         coef exp(coef)  se(coef)      z        p    
#> age                 0.021485  1.021718  0.006318  3.401 0.000672 ***
#> treatmentTreatment -0.321142  0.725320  0.126335 -2.542 0.011023 *  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log Transformations

sim_data$biomarker_pos <- sim_data$biomarker - min(sim_data$biomarker) + 1
result_log <- crrstep(
  formula = ftime ~ age + sex + log(biomarker_pos) + log(biomarker_pos):sex,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
#> crrstep(formula = ftime ~ age + sex + log(biomarker_pos) + log(biomarker_pos):sex, 
#>     scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data, 
#>     direction = "backward", criterion = "AIC", trace = TRUE)
#>                             AIC
#> -sex:log(biomarker_pos) 2676.96
#> -sex                    2677.14
#> -log(biomarker_pos)     2677.46
#> <none>                  2678.93
#> -age                    2689.41
#> Dropping: sex:log(biomarker_pos) 
#> 
#> Step:  AIC =  2676.96 
#> ~age + sex + log(biomarker_pos) 
#> 
#>                         AIC
#> -log(biomarker_pos) 2676.68
#> <none>              2676.96
#> -age                2687.63
#> -sex                2705.51
#> Dropping: log(biomarker_pos) 
#> 
#> Step:  AIC =  2676.68 
#> ~age + sex 
#> 
#>            AIC
#> <none> 2676.68
#> -age   2686.78
#> -sex   2705.04
#> Final model:
#>         estimate std.error    z  p-value
#> age       0.0223   0.00672 3.32 9.15e-04
#> sexMale   0.7030   0.12800 5.47 4.44e-08
#> 
#> Log-likelihood: -1336.34
print(result_log)
#> 
#> Call:
#> crrstep(formula = ftime ~ age + sex + log(biomarker_pos) + log(biomarker_pos):sex, 
#>     scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data, 
#>     direction = "backward", criterion = "AIC", trace = TRUE)
#> 
#> Direction: backward 
#> Criterion: AIC 
#> 
#> Selected model:
#>   ~age + sex 
#> 
#> AIC: 2676.68
#> Log-likelihood: -1336.34 
#> 
#> N = 400  Events = 252 
#> 
#> Coefficients:
#>             coef exp(coef) se(coef)     z        p    
#> age     0.022264  1.022514 0.006715 3.316 0.000915 ***
#> sexMale 0.702915  2.019631 0.128448 5.472 4.44e-08 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Three-Way Interactions

result_threeway <- crrstep(
  formula = ftime ~ age + sex + treatment + stage + sex:treatment + sex:stage + treatment:stage + sex:treatment:stage,
  scope.min = ~ age,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'BIC',
  trace = TRUE
)
#> crrstep(formula = ftime ~ age + sex + treatment + stage + sex:treatment + 
#>     sex:stage + treatment:stage + sex:treatment:stage, scope.min = ~age, 
#>     etype = fstatus, failcode = 1, data = sim_data, direction = "backward", 
#>     criterion = "BIC", trace = TRUE)
#>                          BIC
#> -sex:stage           2738.46
#> -stage               2738.65
#> -sex:treatment:stage 2739.63
#> -treatment:stage     2740.20
#> -treatment           2750.16
#> -sex:treatment       2751.15
#> -sex                 2755.35
#> <none>               2756.09
#> Dropping: sex:stage 
#> 
#> Step:  BIC =  2756.09 
#> ~age + sex + treatment + stage + sex:treatment + treatment:stage + sex:treatment:stage 
#> 
#>                          BIC
#> -sex:treatment:stage 2723.20
#> -stage               2738.65
#> -treatment:stage     2740.20
#> -treatment           2750.16
#> -sex:treatment       2751.15
#> -sex                 2755.35
#> <none>               2756.09
#> Dropping: sex:treatment:stage 
#> 
#> Step:  BIC =  2723.2 
#> ~age + sex + treatment + stage + sex:treatment + treatment:stage 
#> 
#>                      BIC
#> -treatment:stage 2706.48
#> -stage           2706.88
#> -sex:treatment   2717.22
#> -treatment       2717.57
#> <none>           2723.20
#> -sex             2735.60
#> Dropping: treatment:stage 
#> 
#> Step:  BIC =  2706.48 
#> ~age + sex + treatment + stage + sex:treatment 
#> 
#>                    BIC
#> -stage         2690.55
#> -sex:treatment 2700.50
#> -treatment     2702.73
#> <none>         2706.48
#> -sex           2718.52
#> Dropping: stage 
#> 
#> Step:  BIC =  2690.55 
#> ~age + sex + treatment + sex:treatment 
#> 
#>                    BIC
#> -sex:treatment 2684.58
#> -treatment     2686.69
#> <none>         2690.55
#> -sex           2702.04
#> Dropping: sex:treatment 
#> 
#> Step:  BIC =  2684.58 
#> ~age + sex + treatment 
#> 
#>                BIC
#> <none>     2684.58
#> -treatment 2684.67
#> -sex       2708.58
#> Final model:
#>                    estimate std.error     z  p-value
#> age                   0.023    0.0067  3.43 6.07e-04
#> sexMale               0.699    0.1290  5.43 5.63e-08
#> treatmentTreatment   -0.312    0.1290 -2.41 1.58e-02
#> 
#> Log-likelihood: -1333.3
print(result_threeway)
#> 
#> Call:
#> crrstep(formula = ftime ~ age + sex + treatment + stage + sex:treatment + 
#>     sex:stage + treatment:stage + sex:treatment:stage, scope.min = ~age, 
#>     etype = fstatus, failcode = 1, data = sim_data, direction = "backward", 
#>     criterion = "BIC", trace = TRUE)
#> 
#> Direction: backward 
#> Criterion: BIC 
#> 
#> Selected model:
#>   ~age + sex + treatment 
#> 
#> BIC: 2684.58
#> Log-likelihood: -1333.30 
#> 
#> N = 400  Events = 252 
#> 
#> Coefficients:
#>                         coef exp(coef)  se(coef)      z        p    
#> age                 0.022968  1.023234  0.006699  3.429 0.000607 ***
#> sexMale             0.698886  2.011510  0.128702  5.430 5.63e-08 ***
#> treatmentTreatment -0.311991  0.731988  0.129244 -2.414 0.015780 *  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Full CRR Object

fit_full <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = FALSE,
  crr.object = TRUE
)
cat('Coefficients:\\n')
#> Coefficients:\n
print(fit_full$coef)
#>                age            sexMale treatmentTreatment          biomarker 
#>        0.023955563        0.703034949       -0.317948441        0.005326249
cat('\\nLog-likelihood:\\n')
#> \nLog-likelihood:\n
print(fit_full$loglik)
#> [1] -1331.854

Missing Data

sim_data_miss <- sim_data
sim_data_miss$age[sample(1:n, 20)] <- NA
sim_data_miss$biomarker[sample(1:n, 15)] <- NA
result_miss <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data_miss,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
#> 35 cases omitted due to missing values
#> crrstep(formula = ftime ~ age + sex + treatment + biomarker, 
#>     scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data_miss, 
#>     direction = "backward", criterion = "AIC", trace = TRUE)
#>                AIC
#> <none>     2427.56
#> -treatment 2429.66
#> -biomarker 2430.02
#> -age       2438.13
#> -sex       2450.35
#> Final model:
#>                    estimate std.error     z  p-value
#> age                 0.02410   0.00706  3.41 6.38e-04
#> sexMale             0.66000   0.13400  4.91 9.08e-07
#> treatmentTreatment -0.26700   0.13500 -1.97 4.91e-02
#> biomarker           0.00681   0.00326  2.09 3.68e-02
#> 
#> Log-likelihood: -1209.78
print(result_miss)
#> 
#> Call:
#> crrstep(formula = ftime ~ age + sex + treatment + biomarker, 
#>     scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data_miss, 
#>     direction = "backward", criterion = "AIC", trace = TRUE)
#> 
#> Direction: backward 
#> Criterion: AIC 
#> 
#> Selected model:
#>   ~age + sex + treatment + biomarker 
#> 
#> AIC: 2427.56
#> Log-likelihood: -1209.78 
#> 
#> N = 365  Events = 233 
#> 
#> Coefficients:
#>                         coef exp(coef)  se(coef)      z        p    
#> age                 0.024107  1.024400  0.007060  3.415 0.000638 ***
#> sexMale             0.660295  1.935363  0.134463  4.911 9.08e-07 ***
#> treatmentTreatment -0.266602  0.765978  0.135484 -1.968 0.049094 *  
#> biomarker           0.006813  1.006837  0.003264  2.088 0.036834 *  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Using Subset

result_subset <- crrstep(
  formula = ftime ~ age + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  subset = sim_data$sex == 'Female',
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = FALSE
)
print(result_subset)
#> 
#> Call:
#> crrstep(formula = ftime ~ age + treatment + biomarker, scope.min = ~1, 
#>     etype = fstatus, failcode = 1, subset = sim_data$sex == "Female", 
#>     data = sim_data, direction = "backward", criterion = "AIC", 
#>     trace = FALSE)
#> 
#> Direction: backward 
#> Criterion: AIC 
#> 
#> Selected model:
#>   ~age + treatment 
#> 
#> AIC: 947.22
#> Log-likelihood: -471.61 
#> 
#> N = 195  Events = 102 
#> 
#> Coefficients:
#>                         coef exp(coef)  se(coef)      z        p    
#> age                 0.029801  1.030250  0.008721  3.417 0.000633 ***
#> treatmentTreatment -0.301905  0.739408  0.196353 -1.538 0.124155    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Best Practices

  1. Center continuous variables when using polynomial terms
  2. Include main effects in scope.min when testing interactions
  3. Use BIC or BICcr for more parsimonious models
  4. Check for multicollinearity before modeling

Troubleshooting

Convergence issues: Center/scale variables, remove correlated variables Singular matrix errors: Remove redundant variables, combine sparse factor levels

References

Kuk, D. and Varadhan, R. (2013). Model selection in competing risks regression. Statistics in Medicine.

Fine, J.P. and Gray, R.J. (1999). A proportional hazards model for the subdistribution of a competing risk. Journal of the American Statistical Association, 94(446), 496-509.

Session Info

sessionInfo()
#> R version 4.5.0 (2025-04-11 ucrt)
#> Platform: x86_64-w64-mingw32/x64
#> Running under: Windows 11 x64 (build 22631)
#> 
#> Matrix products: default
#>   LAPACK version 3.12.1
#> 
#> locale:
#> [1] LC_COLLATE=C                          
#> [2] LC_CTYPE=English_United States.utf8   
#> [3] LC_MONETARY=English_United States.utf8
#> [4] LC_NUMERIC=C                          
#> [5] LC_TIME=English_United States.utf8    
#> 
#> time zone: America/New_York
#> tzcode source: internal
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] cmprsk_2.2-12    survival_3.8-3   crrstep_2025.1.1
#> 
#> loaded via a namespace (and not attached):
#>  [1] digest_0.6.39     R6_2.6.1          fastmap_1.2.0     Matrix_1.7-4     
#>  [5] xfun_0.54         lattice_0.22-7    splines_4.5.0     cachem_1.1.0     
#>  [9] knitr_1.50        htmltools_0.5.9   rmarkdown_2.30    lifecycle_1.0.4  
#> [13] cli_3.6.5         grid_4.5.0        sass_0.4.10       jquerylib_0.1.4  
#> [17] compiler_4.5.0    rstudioapi_0.17.1 tools_4.5.0       evaluate_1.0.5   
#> [21] bslib_0.9.0       yaml_2.3.12       rlang_1.1.6       jsonlite_2.0.0