使用 rms 包预测 r 中的生存概率时出错

Question

我有一个包含 5 个变量的数据集：CANT_ANU、RETIRE、SEX、PROF 和 AGE_INI。像这样：

library(data.table)
library(rms)

dt <- structure(list(CANT_ANU = c(32.1671232876712, 12.1671232876712, 
                                    45, 28.2465753424658, 33, 3, 34.2493150684932, 31.3287671232877, 
                                    40.413698630137, 31, 19.0849315068493, 31.0846994535519, 20.586301369863, 
                                    34, 36.0849315068493, 32.9150684931507, 24.0849315068493, 3.24657534246575, 
                                    17.0792349726776, 15.0849315068493, 25.7486338797814, 19, 31.3287671232877, 
                                    17.1616438356164, 31.0849315068493), RETIRE = c(1, 1, 1, 1, 1, 
                                                                                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1), 
                 SEX = c(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 
                          1, 1, 1, 1, 1, 0, 1, 0, 1), PROF = c(0, 1, 0, 0, 0, 
                                                                      0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 
                                                                      0), AGE_INI = c(49.2383561643836, 64.0575342465753, 32.9945205479452, 
                                                                                       44.6767123287671, 40.4383561643836, 68.7890410958904, 38.2459016393443, 
                                                                                       38.7479452054795, 30.5561643835616, 41.0684931506849, 51.1260273972603, 
                                                                                       41.6219178082192, 54.9479452054794, 36.0846994535519, 32.0109589041096, 
                                                                                       36.1698630136986, 44.6794520547945, 66.4630136986301, 54.8493150684931, 
                                                                                       54.4246575342466, 50.8191780821918, 48.6657534246575, 37.0493150684932, 
                                                                                       53.2712328767123, 37.6684931506849)), row.names = c(NA, -25L
                                                                                       ), class = c("data.table", "data.frame"))

我想从估计的指数模型中获得整个数据集上每个受试者的生存概率，所以我尝试了这个：

mod.par.exp <- psm(formula = Surv(CANT_ANU, RETIRE) ~ SEX + PROF + AGE_INI,data = dt,dist = "exponential")
probs.exp.rms <- survest(mod.par.exp,dt,times = seq(10,50,10), conf.int=.9)

但我收到以下错误：

Error in x %*% fit$var[-last, last, drop = FALSE] : 
  argumentos no compatibles

我做错了什么？我之前尝试过软件包手册中 survest.psm 中指示的示例并且它们有效。

Answer 1

调用错误是因为 x 是一个 4x4 矩阵，而 fit$var[-last, last, drop = FALSE] 是一个 3x1 列向量。

这是代码的一部分： survest.psm

附上一个 reprex，向您展示错误发生的原因。

对于 conf.int = 0 或 conf.int = FALSE 它有效。

希望回答能帮助您缩小问题范围。联系软件包作者或 post github 站点上的问题也可能很有用。

library(data.table)
library(rms)
#> Lade nötiges Paket: Hmisc
#> Lade nötiges Paket: lattice
#> Lade nötiges Paket: survival
#> Lade nötiges Paket: Formula
#> Lade nötiges Paket: ggplot2
#> 
#> Attache Paket: 'Hmisc'
#> The following objects are masked from 'package:base':
#> 
#>     format.pval, units
#> Lade nötiges Paket: SparseM
#> 
#> Attache Paket: 'SparseM'
#> The following object is masked from 'package:base':
#> 
#>     backsolve

dt <- structure(list(CANT_ANU = c(32.1671232876712, 12.1671232876712, 
                                  45, 28.2465753424658, 33, 3, 34.2493150684932, 31.3287671232877, 
                                  40.413698630137, 31, 19.0849315068493, 31.0846994535519, 20.586301369863, 
                                  34, 36.0849315068493, 32.9150684931507, 24.0849315068493, 3.24657534246575, 
                                  17.0792349726776, 15.0849315068493, 25.7486338797814, 19, 31.3287671232877, 
                                  17.1616438356164, 31.0849315068493), RETIRE = c(1, 1, 1, 1, 1, 
                                                                                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1), 
                     SEX = c(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 
                             1, 1, 1, 1, 1, 0, 1, 0, 1), PROF = c(0, 1, 0, 0, 0, 
                                                                  0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 
                                                                  0), AGE_INI = c(49.2383561643836, 64.0575342465753, 32.9945205479452, 
                                                                                  44.6767123287671, 40.4383561643836, 68.7890410958904, 38.2459016393443, 
                                                                                  38.7479452054795, 30.5561643835616, 41.0684931506849, 51.1260273972603, 
                                                                                  41.6219178082192, 54.9479452054794, 36.0846994535519, 32.0109589041096, 
                                                                                  36.1698630136986, 44.6794520547945, 66.4630136986301, 54.8493150684931, 
                                                                                  54.4246575342466, 50.8191780821918, 48.6657534246575, 37.0493150684932, 
                                                                                  53.2712328767123, 37.6684931506849)), row.names = c(NA, -25L
                                                                                  ), class = c("data.table", "data.frame"))

mod.par.exp <- psm(formula = Surv(CANT_ANU, RETIRE) ~ SEX + PROF + AGE_INI,data = dt,dist = "exponential")

# Throws the error: 
probs.exp.rms <- survest(mod.par.exp,dt,times = seq(10,50,10), conf.int = .9)
#> Error in x %*% fit$var[-last, last, drop = FALSE]: nicht passende Argumente

# reason for the error: 
## setup of nvar Line 38 of linked github site: 
## nvar <- length(fit$coef) - num.intercepts(fit)

## in your case nvar is: 
nvar_test <- length(mod.par.exp$coef) - num.intercepts(mod.par.exp); nvar_test
#> [1] 3

## setup of x since linear.predictors are missing (first if)... and some more conditions: 
## x is defined in Line 52 of linked github site. 
##  if(missing(x)) x <- cbind(Intercept=1, predict(fit, newdata, type="x"))

## in your case x is: 
x_test <- cbind(Intercept=1, predict(mod.par.exp, dt, type="x")); x_test
#>    Intercept SEX PROF  AGE_INI
#> 1          1   1    0 49.23836
#> 2          1   1    1 64.05753
#> 3          1   1    0 32.99452
#> 4          1   1    0 44.67671
#> 5          1   0    0 40.43836
#> 6          1   0    0 68.78904
#> 7          1   0    1 38.24590
#> 8          1   0    1 38.74795
#> 9          1   1    0 30.55616
#> 10         1   1    0 41.06849
#> 11         1   0    0 51.12603
#> 12         1   1    0 41.62192
#> 13         1   1    0 54.94795
#> 14         1   0    0 36.08470
#> 15         1   1    0 32.01096
#> 16         1   1    0 36.16986
#> 17         1   1    0 44.67945
#> 18         1   1    0 66.46301
#> 19         1   1    0 54.84932
#> 20         1   1    0 54.42466
#> 21         1   1    1 50.81918
#> 22         1   0    0 48.66575
#> 23         1   1    0 37.04932
#> 24         1   0    1 53.27123
#> 25         1   1    0 37.66849

## next condition from line 55 to 62: 
## g1 in line 56 can be calculated properly

## last is calculated from 57 - 60
# last <- {
#   nscale <- length(fit$icoef) - 1
#   ncol(fit$var) - (1 : nscale) + 1
# }

## in your case last is: 
last_test <- {
  nscale <- length(mod.par.exp$icoef) - 1
  ncol(mod.par.exp$var) - (1 : nscale) + 1
}
last_test
#> [1] 4

## computation of g2 in Line 62 throws the error: 

## g2 <- drop(x %*% fit$var[-last, last, drop=FALSE])

column_vec <- mod.par.exp$var[-last_test, last_test, drop = FALSE]; column_vec
#>               [,1]
#> [1,] -0.0222297822
#> [2,] -0.0003043264
#> [3,] -0.0022725899

## error in matrix multiplication: 4x4 %*% 3x1 does not work!! 
x_test %*% column_vec
#> Error in x_test %*% column_vec: nicht passende Argumente

# setting conf.int to FALSE or 0 works. 
probs.exp.rms_noconf <- survest(mod.par.exp, dt, times = seq(10,50,10), conf.int = FALSE)
probs.exp.rms_noconf
#>           10         20          30           40           50
#> 1  0.6109711 0.37328575 0.228066821 0.1393422481 8.513409e-02
#> 2  0.5169610 0.26724870 0.138157161 0.0714218675 3.692232e-02
#> 3  0.8116670 0.65880336 0.534728967 0.4340218715 3.522812e-01
#> 4  0.6790361 0.46109007 0.313096820 0.2126040542 1.443658e-01
#> 5  0.6610245 0.43695340 0.288836903 0.1909282713 1.262083e-01
#> 6  0.1565442 0.02450608 0.003836283 0.0006005478 9.401225e-05
#> 7  0.7981757 0.63708443 0.508505305 0.4058765720 3.239608e-01
#> 8  0.7933484 0.62940168 0.499334815 0.3961464753 3.142822e-01
#> 9  0.8324214 0.69292545 0.576806000 0.4801456797 3.996836e-01
#> 10 0.7262735 0.52747323 0.383089845 0.2782280123 2.020696e-01
#> 11 0.4825998 0.23290259 0.112398750 0.0542436172 2.617796e-02
#> 12 0.7194063 0.51754539 0.372325405 0.2678532341 1.926953e-01
#> 13 0.5135481 0.26373161 0.135438854 0.0695543604 3.571951e-02
#> 14 0.7197743 0.51807509 0.372897156 0.2684018024 1.931887e-01
#> 15 0.8202983 0.67288936 0.551970025 0.4527800935 3.714148e-01
#> 16 0.7812767 0.61039332 0.476886090 0.3725800018 2.910881e-01
#> 17 0.6789980 0.46103834 0.313044133 0.2125563541 1.443253e-01
#> 18 0.2936587 0.08623542 0.025323782 0.0074365485 2.183807e-03
#> 19 0.5153319 0.26556694 0.136855109 0.0705257999 3.634419e-02
#> 20 0.5229761 0.27350397 0.143036034 0.0748044237 3.912092e-02
#> 21 0.7206720 0.51936820 0.374294139 0.2697433227 1.943965e-01
#> 22 0.5274674 0.27822187 0.146752970 0.0774074094 4.082989e-02
#> 23 0.7721483 0.59621304 0.460364901 0.3554699878 2.744756e-01
#> 24 0.6071013 0.36857201 0.223760552 0.1358453261 8.247188e-02
#> 25 0.7655297 0.58603572 0.448627746 0.3434378625 2.629119e-01

^{由 reprex package (v0.3.0)}

于 2020-07-06 创建

使用 rms 包预测 r 中的生存概率时出错

Error using rms package for predict survival probability in r

r

predict

survival-analysis