The crrstep package provides stepwise variable selection for the Fine-Gray proportional subdistribution hazards model for competing risks data. This vignette demonstrates how to use the package with both simple and complex formula specifications.
set.seed(123)
n <- 400
age <- rnorm(n, mean = 60, sd = 10)
sex <- factor(sample(c('Male', 'Female'), n, replace = TRUE))
treatment <- factor(sample(c('Control', 'Treatment'), n, replace = TRUE))
biomarker <- rnorm(n, mean = 100, sd = 20)
linpred <- 0.03 * (age - 60) + 0.5 * (sex == 'Male') - 0.4 * (treatment == 'Treatment') + 0.01 * biomarker
base_hazard <- 0.1
true_time <- rexp(n, rate = base_hazard * exp(linpred))
p_cause1 <- plogis(linpred)
cause <- ifelse(runif(n) < p_cause1, 1, 2)
censor_time <- rexp(n, rate = 0.05)
ftime <- pmin(true_time, censor_time)
fstatus <- ifelse(ftime == censor_time, 0, cause)
sim_data <- data.frame(ftime, fstatus, age, sex, treatment, biomarker)
table(sim_data$fstatus)
#>
#> 0 1 2
#> 61 252 87result_back <- crrstep(
formula = ftime ~ age + sex + treatment + biomarker,
scope.min = ~1,
etype = fstatus,
data = sim_data,
failcode = 1,
direction = 'backward',
criterion = 'AIC',
trace = TRUE
)
#> crrstep(formula = ftime ~ age + sex + treatment + biomarker,
#> scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data,
#> direction = "backward", criterion = "AIC", trace = TRUE)
#> AIC
#> <none> 2671.71
#> -biomarker 2672.61
#> -treatment 2676.02
#> -age 2683.60
#> -sex 2700.06
#> Final model:
#> estimate std.error z p-value
#> age 0.02400 0.00677 3.54 4.04e-04
#> sexMale 0.70300 0.13000 5.43 5.71e-08
#> treatmentTreatment -0.31800 0.13000 -2.44 1.46e-02
#> biomarker 0.00533 0.00315 1.69 9.13e-02
#>
#> Log-likelihood: -1331.85
print(result_back)
#>
#> Call:
#> crrstep(formula = ftime ~ age + sex + treatment + biomarker,
#> scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data,
#> direction = "backward", criterion = "AIC", trace = TRUE)
#>
#> Direction: backward
#> Criterion: AIC
#>
#> Selected model:
#> ~age + sex + treatment + biomarker
#>
#> AIC: 2671.71
#> Log-likelihood: -1331.85
#>
#> N = 400 Events = 252
#>
#> Coefficients:
#> coef exp(coef) se(coef) z p
#> age 0.023956 1.024245 0.006772 3.537 0.000404 ***
#> sexMale 0.703035 2.019874 0.129526 5.428 5.71e-08 ***
#> treatmentTreatment -0.317948 0.727640 0.130187 -2.442 0.014596 *
#> biomarker 0.005326 1.005340 0.003154 1.689 0.091307 .
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1result_fwd <- crrstep(
formula = ftime ~ age + sex + treatment + biomarker,
scope.min = ~1,
etype = fstatus,
data = sim_data,
failcode = 1,
direction = 'forward',
criterion = 'AIC',
trace = TRUE
)
#> crrstep(formula = ftime ~ age + sex + treatment + biomarker,
#> scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data,
#> direction = "forward", criterion = "AIC", trace = TRUE)
#> AIC
#> +sex 2686.78
#> +age 2705.04
#> +treatment 2710.36
#> <none> 2713.91
#> +biomarker 2714.68
#> Adding: sex
#>
#> Step: AIC = 2686.78
#> ~sex
#>
#> AIC
#> +age 2676.68
#> +treatment 2683.51
#> <none> 2686.78
#> +biomarker 2687.09
#> Adding: age
#>
#> Step: AIC = 2676.68
#> ~sex + age
#>
#> AIC
#> +treatment 2672.61
#> +biomarker 2676.02
#> <none> 2676.68
#> Adding: treatment
#>
#> Step: AIC = 2672.61
#> ~sex + age + treatment
#>
#> AIC
#> +biomarker 2671.71
#> <none> 2672.61
#> Adding: biomarker
#>
#> Step: AIC = 2671.71
#> ~sex + age + treatment + biomarker
#>
#> Final model:
#> estimate std.error z p-value
#> sexMale 0.70300 0.13000 5.43 5.71e-08
#> age 0.02400 0.00677 3.54 4.04e-04
#> treatmentTreatment -0.31800 0.13000 -2.44 1.46e-02
#> biomarker 0.00533 0.00315 1.69 9.13e-02
#>
#> Log-likelihood: -1331.85
print(result_fwd)
#>
#> Call:
#> crrstep(formula = ftime ~ age + sex + treatment + biomarker,
#> scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data,
#> direction = "forward", criterion = "AIC", trace = TRUE)
#>
#> Direction: forward
#> Criterion: AIC
#>
#> Selected model:
#> ~sex + age + treatment + biomarker
#>
#> AIC: 2671.71
#> Log-likelihood: -1331.85
#>
#> N = 400 Events = 252
#>
#> Coefficients:
#> coef exp(coef) se(coef) z p
#> sexMale 0.703035 2.019874 0.129526 5.428 5.71e-08 ***
#> age 0.023956 1.024245 0.006772 3.537 0.000404 ***
#> treatmentTreatment -0.317948 0.727640 0.130187 -2.442 0.014596 *
#> biomarker 0.005326 1.005340 0.003154 1.689 0.091307 .
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1result_bic <- crrstep(
formula = ftime ~ age + sex + treatment + biomarker,
scope.min = ~1,
etype = fstatus,
data = sim_data,
failcode = 1,
direction = 'backward',
criterion = 'BIC',
trace = FALSE
)
cat('AIC selected variables:', nrow(result_back$coefficients), '\\n')
#> AIC selected variables: \n
cat('BIC selected variables:', nrow(result_bic$coefficients), '\\n')
#> BIC selected variables: \nresult_min <- crrstep(
formula = ftime ~ age + sex + treatment + biomarker,
scope.min = ~ age + sex,
etype = fstatus,
data = sim_data,
failcode = 1,
direction = 'backward',
criterion = 'AIC',
trace = TRUE
)
#> crrstep(formula = ftime ~ age + sex + treatment + biomarker,
#> scope.min = ~age + sex, etype = fstatus, failcode = 1, data = sim_data,
#> direction = "backward", criterion = "AIC", trace = TRUE)
#> AIC
#> <none> 2671.71
#> -biomarker 2672.61
#> -treatment 2676.02
#> Final model:
#> estimate std.error z p-value
#> age 0.02400 0.00677 3.54 4.04e-04
#> sexMale 0.70300 0.13000 5.43 5.71e-08
#> treatmentTreatment -0.31800 0.13000 -2.44 1.46e-02
#> biomarker 0.00533 0.00315 1.69 9.13e-02
#>
#> Log-likelihood: -1331.85
print(result_min)
#>
#> Call:
#> crrstep(formula = ftime ~ age + sex + treatment + biomarker,
#> scope.min = ~age + sex, etype = fstatus, failcode = 1, data = sim_data,
#> direction = "backward", criterion = "AIC", trace = TRUE)
#>
#> Direction: backward
#> Criterion: AIC
#>
#> Selected model:
#> ~age + sex + treatment + biomarker
#>
#> AIC: 2671.71
#> Log-likelihood: -1331.85
#>
#> N = 400 Events = 252
#>
#> Coefficients:
#> coef exp(coef) se(coef) z p
#> age 0.023956 1.024245 0.006772 3.537 0.000404 ***
#> sexMale 0.703035 2.019874 0.129526 5.428 5.71e-08 ***
#> treatmentTreatment -0.317948 0.727640 0.130187 -2.442 0.014596 *
#> biomarker 0.005326 1.005340 0.003154 1.689 0.091307 .
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1set.seed(456)
sim_data$stage <- factor(sample(c('I', 'II', 'III', 'IV'), n, replace = TRUE))
sim_data$region <- factor(sample(c('North', 'South', 'East', 'West'), n, replace = TRUE))
result_factors <- crrstep(
formula = ftime ~ age + sex + stage + region,
scope.min = ~1,
etype = fstatus,
data = sim_data,
failcode = 1,
direction = 'backward',
criterion = 'BIC',
trace = TRUE
)
#> crrstep(formula = ftime ~ age + sex + stage + region, scope.min = ~1,
#> etype = fstatus, failcode = 1, data = sim_data, direction = "backward",
#> criterion = "BIC", trace = TRUE)
#> BIC
#> -stage 2699.86
#> -region 2700.59
#> <none> 2715.81
#> -age 2720.92
#> -sex 2741.82
#> Dropping: stage
#>
#> Step: BIC = 2699.86
#> ~age + sex + region
#>
#> BIC
#> -region 2684.67
#> <none> 2699.86
#> -age 2705.02
#> -sex 2724.64
#> Dropping: region
#>
#> Step: BIC = 2684.67
#> ~age + sex
#>
#> BIC
#> <none> 2684.67
#> -age 2690.77
#> -sex 2709.03
#> Final model:
#> estimate std.error z p-value
#> age 0.0223 0.00672 3.32 9.15e-04
#> sexMale 0.7030 0.12800 5.47 4.44e-08
#>
#> Log-likelihood: -1336.34
summary(result_factors)
#>
#> Stepwise Fine-Gray Competing Risks Regression
#> ==================================================
#>
#> Call:
#> crrstep(formula = ftime ~ age + sex + stage + region, scope.min = ~1,
#> etype = fstatus, failcode = 1, data = sim_data, direction = "backward",
#> criterion = "BIC", trace = TRUE)
#>
#> Selection method: backward
#> Selection criterion: BIC
#>
#> Selected model:
#> ~age + sex
#>
#> n = 400 , number of events = 252
#>
#> Coefficients:
#> coef exp(coef) se(coef) z Pr(>|z|)
#> age 0.022264 1.022514 0.006715 3.316 0.000915 ***
#> sexMale 0.702915 2.019631 0.128448 5.472 4.44e-08 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> 95% Confidence intervals for hazard ratios:
#> exp(coef) exp(-coef) lower 95% upper 95%
#> age 1.023 0.9780 1.009 1.036
#> sexMale 2.020 0.4951 1.570 2.598
#>
#> --------------------------------------------------
#> BIC : 2684.67
#> Log-likelihood: -1336.34
#> Null log-likelihood: -1356.96
#>
#> Likelihood ratio test: 41.23 on 2 df, p = 1.114e-09
coef(result_factors)
#> age sexMale
#> 0.02226407 0.70291504
logLik(result_factors)
#> 'log Lik.' -1336.341 (df=2)
BIC(result_factors)
#> [1] 2684.666result_interact <- crrstep(
formula = ftime ~ age + sex + treatment + age:sex + sex:treatment,
scope.min = ~ age + sex,
etype = fstatus,
data = sim_data,
failcode = 1,
direction = 'backward',
criterion = 'AIC',
trace = TRUE
)
#> crrstep(formula = ftime ~ age + sex + treatment + age:sex + sex:treatment,
#> scope.min = ~age + sex, etype = fstatus, failcode = 1, data = sim_data,
#> direction = "backward", criterion = "AIC", trace = TRUE)
#> AIC
#> -sex:treatment 2674.02
#> -age:sex 2674.59
#> <none> 2676.01
#> -treatment 2676.22
#> Dropping: sex:treatment
#>
#> Step: AIC = 2674.02
#> ~age + sex + treatment + age:sex
#>
#> AIC
#> -age:sex 2672.61
#> <none> 2674.02
#> -treatment 2678.11
#> Dropping: age:sex
#>
#> Step: AIC = 2672.61
#> ~age + sex + treatment
#>
#> AIC
#> <none> 2672.61
#> -treatment 2676.68
#> Final model:
#> estimate std.error z p-value
#> age 0.023 0.0067 3.43 6.07e-04
#> sexMale 0.699 0.1290 5.43 5.63e-08
#> treatmentTreatment -0.312 0.1290 -2.41 1.58e-02
#>
#> Log-likelihood: -1333.3
print(result_interact)
#>
#> Call:
#> crrstep(formula = ftime ~ age + sex + treatment + age:sex + sex:treatment,
#> scope.min = ~age + sex, etype = fstatus, failcode = 1, data = sim_data,
#> direction = "backward", criterion = "AIC", trace = TRUE)
#>
#> Direction: backward
#> Criterion: AIC
#>
#> Selected model:
#> ~age + sex + treatment
#>
#> AIC: 2672.61
#> Log-likelihood: -1333.30
#>
#> N = 400 Events = 252
#>
#> Coefficients:
#> coef exp(coef) se(coef) z p
#> age 0.022968 1.023234 0.006699 3.429 0.000607 ***
#> sexMale 0.698886 2.011510 0.128702 5.430 5.63e-08 ***
#> treatmentTreatment -0.311991 0.731988 0.129244 -2.414 0.015780 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1sim_data$biomarker_c <- scale(sim_data$biomarker, scale = FALSE)[,1]
result_poly <- crrstep(
formula = ftime ~ age + sex + biomarker_c + I(biomarker_c^2),
scope.min = ~1,
etype = fstatus,
data = sim_data,
failcode = 1,
direction = 'backward',
criterion = 'AIC',
trace = TRUE
)
#> crrstep(formula = ftime ~ age + sex + biomarker_c + I(biomarker_c^2),
#> scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data,
#> direction = "backward", criterion = "AIC", trace = TRUE)
#> AIC
#> -I(biomarker_c^2) 2676.02
#> <none> 2677.94
#> -biomarker_c 2678.32
#> -age 2689.09
#> -sex 2706.73
#> Dropping: I(biomarker_c^2)
#>
#> Step: AIC = 2676.02
#> ~age + sex + biomarker_c
#>
#> AIC
#> <none> 2676.02
#> -biomarker_c 2676.68
#> -age 2687.09
#> -sex 2704.77
#> Final model:
#> estimate std.error z p-value
#> age 0.02330 0.00678 3.44 5.90e-04
#> sexMale 0.70800 0.12900 5.47 4.44e-08
#> biomarker_c 0.00512 0.00313 1.63 1.02e-01
#>
#> Log-likelihood: -1335.01
print(result_poly)
#>
#> Call:
#> crrstep(formula = ftime ~ age + sex + biomarker_c + I(biomarker_c^2),
#> scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data,
#> direction = "backward", criterion = "AIC", trace = TRUE)
#>
#> Direction: backward
#> Criterion: AIC
#>
#> Selected model:
#> ~age + sex + biomarker_c
#>
#> AIC: 2676.02
#> Log-likelihood: -1335.01
#>
#> N = 400 Events = 252
#>
#> Coefficients:
#> coef exp(coef) se(coef) z p
#> age 0.023302 1.023576 0.006781 3.436 0.00059 ***
#> sexMale 0.707513 2.028939 0.129287 5.472 4.44e-08 ***
#> biomarker_c 0.005120 1.005133 0.003133 1.634 0.10228
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1result_complex <- crrstep(
formula = ftime ~ age + treatment + biomarker_c + I(biomarker_c^2) + biomarker_c:treatment + I(biomarker_c^2):treatment,
scope.min = ~ age,
etype = fstatus,
data = sim_data,
failcode = 1,
direction = 'backward',
criterion = 'BIC',
trace = TRUE
)
#> crrstep(formula = ftime ~ age + treatment + biomarker_c + I(biomarker_c^2) +
#> biomarker_c:treatment + I(biomarker_c^2):treatment, scope.min = ~age,
#> etype = fstatus, failcode = 1, data = sim_data, direction = "backward",
#> criterion = "BIC", trace = TRUE)
#> BIC
#> -I(biomarker_c^2) 2723.33
#> -treatment:biomarker_c 2723.69
#> -treatment:I(biomarker_c^2) 2723.73
#> -biomarker_c 2725.61
#> <none> 2729.21
#> -treatment 2729.58
#> Dropping: I(biomarker_c^2)
#>
#> Step: BIC = 2729.21
#> ~age + treatment + biomarker_c + treatment:biomarker_c + treatment:I(biomarker_c^2)
#>
#> BIC
#> -treatment:I(biomarker_c^2) 2717.79
#> -treatment:biomarker_c 2723.69
#> -biomarker_c 2725.61
#> <none> 2729.21
#> -treatment 2729.58
#> Dropping: treatment:I(biomarker_c^2)
#>
#> Step: BIC = 2717.79
#> ~age + treatment + biomarker_c + treatment:biomarker_c
#>
#> BIC
#> -treatment:biomarker_c 2712.04
#> -biomarker_c 2714.11
#> <none> 2717.79
#> -treatment 2718.49
#> Dropping: treatment:biomarker_c
#>
#> Step: BIC = 2712.04
#> ~age + treatment + biomarker_c
#>
#> BIC
#> -biomarker_c 2708.58
#> <none> 2712.04
#> -treatment 2712.75
#> Dropping: biomarker_c
#>
#> Step: BIC = 2708.58
#> ~age + treatment
#>
#> BIC
#> <none> 2708.58
#> -treatment 2709.03
#> Final model:
#> estimate std.error z p-value
#> age 0.0215 0.00632 3.40 0.000672
#> treatmentTreatment -0.3210 0.12600 -2.54 0.011000
#>
#> Log-likelihood: -1348.3
print(result_complex)
#>
#> Call:
#> crrstep(formula = ftime ~ age + treatment + biomarker_c + I(biomarker_c^2) +
#> biomarker_c:treatment + I(biomarker_c^2):treatment, scope.min = ~age,
#> etype = fstatus, failcode = 1, data = sim_data, direction = "backward",
#> criterion = "BIC", trace = TRUE)
#>
#> Direction: backward
#> Criterion: BIC
#>
#> Selected model:
#> ~age + treatment
#>
#> BIC: 2708.58
#> Log-likelihood: -1348.30
#>
#> N = 400 Events = 252
#>
#> Coefficients:
#> coef exp(coef) se(coef) z p
#> age 0.021485 1.021718 0.006318 3.401 0.000672 ***
#> treatmentTreatment -0.321142 0.725320 0.126335 -2.542 0.011023 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1sim_data$biomarker_pos <- sim_data$biomarker - min(sim_data$biomarker) + 1
result_log <- crrstep(
formula = ftime ~ age + sex + log(biomarker_pos) + log(biomarker_pos):sex,
scope.min = ~1,
etype = fstatus,
data = sim_data,
failcode = 1,
direction = 'backward',
criterion = 'AIC',
trace = TRUE
)
#> crrstep(formula = ftime ~ age + sex + log(biomarker_pos) + log(biomarker_pos):sex,
#> scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data,
#> direction = "backward", criterion = "AIC", trace = TRUE)
#> AIC
#> -sex:log(biomarker_pos) 2676.96
#> -sex 2677.14
#> -log(biomarker_pos) 2677.46
#> <none> 2678.93
#> -age 2689.41
#> Dropping: sex:log(biomarker_pos)
#>
#> Step: AIC = 2676.96
#> ~age + sex + log(biomarker_pos)
#>
#> AIC
#> -log(biomarker_pos) 2676.68
#> <none> 2676.96
#> -age 2687.63
#> -sex 2705.51
#> Dropping: log(biomarker_pos)
#>
#> Step: AIC = 2676.68
#> ~age + sex
#>
#> AIC
#> <none> 2676.68
#> -age 2686.78
#> -sex 2705.04
#> Final model:
#> estimate std.error z p-value
#> age 0.0223 0.00672 3.32 9.15e-04
#> sexMale 0.7030 0.12800 5.47 4.44e-08
#>
#> Log-likelihood: -1336.34
print(result_log)
#>
#> Call:
#> crrstep(formula = ftime ~ age + sex + log(biomarker_pos) + log(biomarker_pos):sex,
#> scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data,
#> direction = "backward", criterion = "AIC", trace = TRUE)
#>
#> Direction: backward
#> Criterion: AIC
#>
#> Selected model:
#> ~age + sex
#>
#> AIC: 2676.68
#> Log-likelihood: -1336.34
#>
#> N = 400 Events = 252
#>
#> Coefficients:
#> coef exp(coef) se(coef) z p
#> age 0.022264 1.022514 0.006715 3.316 0.000915 ***
#> sexMale 0.702915 2.019631 0.128448 5.472 4.44e-08 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1result_threeway <- crrstep(
formula = ftime ~ age + sex + treatment + stage + sex:treatment + sex:stage + treatment:stage + sex:treatment:stage,
scope.min = ~ age,
etype = fstatus,
data = sim_data,
failcode = 1,
direction = 'backward',
criterion = 'BIC',
trace = TRUE
)
#> crrstep(formula = ftime ~ age + sex + treatment + stage + sex:treatment +
#> sex:stage + treatment:stage + sex:treatment:stage, scope.min = ~age,
#> etype = fstatus, failcode = 1, data = sim_data, direction = "backward",
#> criterion = "BIC", trace = TRUE)
#> BIC
#> -sex:stage 2738.46
#> -stage 2738.65
#> -sex:treatment:stage 2739.63
#> -treatment:stage 2740.20
#> -treatment 2750.16
#> -sex:treatment 2751.15
#> -sex 2755.35
#> <none> 2756.09
#> Dropping: sex:stage
#>
#> Step: BIC = 2756.09
#> ~age + sex + treatment + stage + sex:treatment + treatment:stage + sex:treatment:stage
#>
#> BIC
#> -sex:treatment:stage 2723.20
#> -stage 2738.65
#> -treatment:stage 2740.20
#> -treatment 2750.16
#> -sex:treatment 2751.15
#> -sex 2755.35
#> <none> 2756.09
#> Dropping: sex:treatment:stage
#>
#> Step: BIC = 2723.2
#> ~age + sex + treatment + stage + sex:treatment + treatment:stage
#>
#> BIC
#> -treatment:stage 2706.48
#> -stage 2706.88
#> -sex:treatment 2717.22
#> -treatment 2717.57
#> <none> 2723.20
#> -sex 2735.60
#> Dropping: treatment:stage
#>
#> Step: BIC = 2706.48
#> ~age + sex + treatment + stage + sex:treatment
#>
#> BIC
#> -stage 2690.55
#> -sex:treatment 2700.50
#> -treatment 2702.73
#> <none> 2706.48
#> -sex 2718.52
#> Dropping: stage
#>
#> Step: BIC = 2690.55
#> ~age + sex + treatment + sex:treatment
#>
#> BIC
#> -sex:treatment 2684.58
#> -treatment 2686.69
#> <none> 2690.55
#> -sex 2702.04
#> Dropping: sex:treatment
#>
#> Step: BIC = 2684.58
#> ~age + sex + treatment
#>
#> BIC
#> <none> 2684.58
#> -treatment 2684.67
#> -sex 2708.58
#> Final model:
#> estimate std.error z p-value
#> age 0.023 0.0067 3.43 6.07e-04
#> sexMale 0.699 0.1290 5.43 5.63e-08
#> treatmentTreatment -0.312 0.1290 -2.41 1.58e-02
#>
#> Log-likelihood: -1333.3
print(result_threeway)
#>
#> Call:
#> crrstep(formula = ftime ~ age + sex + treatment + stage + sex:treatment +
#> sex:stage + treatment:stage + sex:treatment:stage, scope.min = ~age,
#> etype = fstatus, failcode = 1, data = sim_data, direction = "backward",
#> criterion = "BIC", trace = TRUE)
#>
#> Direction: backward
#> Criterion: BIC
#>
#> Selected model:
#> ~age + sex + treatment
#>
#> BIC: 2684.58
#> Log-likelihood: -1333.30
#>
#> N = 400 Events = 252
#>
#> Coefficients:
#> coef exp(coef) se(coef) z p
#> age 0.022968 1.023234 0.006699 3.429 0.000607 ***
#> sexMale 0.698886 2.011510 0.128702 5.430 5.63e-08 ***
#> treatmentTreatment -0.311991 0.731988 0.129244 -2.414 0.015780 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1fit_full <- crrstep(
formula = ftime ~ age + sex + treatment + biomarker,
scope.min = ~1,
etype = fstatus,
data = sim_data,
failcode = 1,
direction = 'backward',
criterion = 'AIC',
trace = FALSE,
crr.object = TRUE
)
cat('Coefficients:\\n')
#> Coefficients:\n
print(fit_full$coef)
#> age sexMale treatmentTreatment biomarker
#> 0.023955563 0.703034949 -0.317948441 0.005326249
cat('\\nLog-likelihood:\\n')
#> \nLog-likelihood:\n
print(fit_full$loglik)
#> [1] -1331.854sim_data_miss <- sim_data
sim_data_miss$age[sample(1:n, 20)] <- NA
sim_data_miss$biomarker[sample(1:n, 15)] <- NA
result_miss <- crrstep(
formula = ftime ~ age + sex + treatment + biomarker,
scope.min = ~1,
etype = fstatus,
data = sim_data_miss,
failcode = 1,
direction = 'backward',
criterion = 'AIC',
trace = TRUE
)
#> 35 cases omitted due to missing values
#> crrstep(formula = ftime ~ age + sex + treatment + biomarker,
#> scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data_miss,
#> direction = "backward", criterion = "AIC", trace = TRUE)
#> AIC
#> <none> 2427.56
#> -treatment 2429.66
#> -biomarker 2430.02
#> -age 2438.13
#> -sex 2450.35
#> Final model:
#> estimate std.error z p-value
#> age 0.02410 0.00706 3.41 6.38e-04
#> sexMale 0.66000 0.13400 4.91 9.08e-07
#> treatmentTreatment -0.26700 0.13500 -1.97 4.91e-02
#> biomarker 0.00681 0.00326 2.09 3.68e-02
#>
#> Log-likelihood: -1209.78
print(result_miss)
#>
#> Call:
#> crrstep(formula = ftime ~ age + sex + treatment + biomarker,
#> scope.min = ~1, etype = fstatus, failcode = 1, data = sim_data_miss,
#> direction = "backward", criterion = "AIC", trace = TRUE)
#>
#> Direction: backward
#> Criterion: AIC
#>
#> Selected model:
#> ~age + sex + treatment + biomarker
#>
#> AIC: 2427.56
#> Log-likelihood: -1209.78
#>
#> N = 365 Events = 233
#>
#> Coefficients:
#> coef exp(coef) se(coef) z p
#> age 0.024107 1.024400 0.007060 3.415 0.000638 ***
#> sexMale 0.660295 1.935363 0.134463 4.911 9.08e-07 ***
#> treatmentTreatment -0.266602 0.765978 0.135484 -1.968 0.049094 *
#> biomarker 0.006813 1.006837 0.003264 2.088 0.036834 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1result_subset <- crrstep(
formula = ftime ~ age + treatment + biomarker,
scope.min = ~1,
etype = fstatus,
subset = sim_data$sex == 'Female',
data = sim_data,
failcode = 1,
direction = 'backward',
criterion = 'AIC',
trace = FALSE
)
print(result_subset)
#>
#> Call:
#> crrstep(formula = ftime ~ age + treatment + biomarker, scope.min = ~1,
#> etype = fstatus, failcode = 1, subset = sim_data$sex == "Female",
#> data = sim_data, direction = "backward", criterion = "AIC",
#> trace = FALSE)
#>
#> Direction: backward
#> Criterion: AIC
#>
#> Selected model:
#> ~age + treatment
#>
#> AIC: 947.22
#> Log-likelihood: -471.61
#>
#> N = 195 Events = 102
#>
#> Coefficients:
#> coef exp(coef) se(coef) z p
#> age 0.029801 1.030250 0.008721 3.417 0.000633 ***
#> treatmentTreatment -0.301905 0.739408 0.196353 -1.538 0.124155
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1Convergence issues: Center/scale variables, remove correlated variables Singular matrix errors: Remove redundant variables, combine sparse factor levels
Kuk, D. and Varadhan, R. (2013). Model selection in competing risks regression. Statistics in Medicine.
Fine, J.P. and Gray, R.J. (1999). A proportional hazards model for the subdistribution of a competing risk. Journal of the American Statistical Association, 94(446), 496-509.
sessionInfo()
#> R version 4.5.0 (2025-04-11 ucrt)
#> Platform: x86_64-w64-mingw32/x64
#> Running under: Windows 11 x64 (build 22631)
#>
#> Matrix products: default
#> LAPACK version 3.12.1
#>
#> locale:
#> [1] LC_COLLATE=C
#> [2] LC_CTYPE=English_United States.utf8
#> [3] LC_MONETARY=English_United States.utf8
#> [4] LC_NUMERIC=C
#> [5] LC_TIME=English_United States.utf8
#>
#> time zone: America/New_York
#> tzcode source: internal
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] cmprsk_2.2-12 survival_3.8-3 crrstep_2025.1.1
#>
#> loaded via a namespace (and not attached):
#> [1] digest_0.6.39 R6_2.6.1 fastmap_1.2.0 Matrix_1.7-4
#> [5] xfun_0.54 lattice_0.22-7 splines_4.5.0 cachem_1.1.0
#> [9] knitr_1.50 htmltools_0.5.9 rmarkdown_2.30 lifecycle_1.0.4
#> [13] cli_3.6.5 grid_4.5.0 sass_0.4.10 jquerylib_0.1.4
#> [17] compiler_4.5.0 rstudioapi_0.17.1 tools_4.5.0 evaluate_1.0.5
#> [21] bslib_0.9.0 yaml_2.3.12 rlang_1.1.6 jsonlite_2.0.0