如何在特定条件下将这些分类协变量转换回连续协变量?
How can I convert these categorial covariates back to being continuous covariates under specific conditions?
请在下面找到我的数据样本 e
。
问题: 连续协变量在 Cox 回归中被视为分类协变量
问题: 我需要 (1) 将所有 .0
保留在所有整数上,但要这样 (2) e$Ki67
和 e$age
在我的 Cox 回归中保持连续。这怎么能做到?
我最近在 SO 上收到了很棒的 ,因为我想将 .0
添加到包含整数和小数的列中的所有整数。
解决方案是
e$Ki67 <- sprintf("%0.1f", as.numeric(as.character(e$Ki67)))
e$age <- sprintf("%0.1f", as.numeric(as.character(e$age)))
这样
> head(e)
rfs Ki67 WHO simpson age sex rad.dose recurrence dead os
1 25.33 0.6 1 1 43.0 1 0 1 0 214.67
2 207.93 3.3 2 2 76.0 1 0 0 1 207.93
3 80.00 1.0 2 1 79.0 1 0 0 1 80.00
4 47.77 0.6 1 3 84.0 1 0 1 1 52.67
5 193.25 0.6 1 1 62.0 1 0 0 1 193.25
6 6.08 3.7 2 3 71.0 1 0 1 1 65.54
但是,当 运行 我的 Cox 回归时,我收到此错误:
In fitter(X, Y, strata = Strata, offset = offset, weights = weights, :
Ran out of iterations and did not converge
似乎 rms
包中的 cph
现在考虑 e$Ki67
和 e$age
分类协变量,但它们实际上是连续协变量:
> summary(a)
Effects Response : Surv(rfs, recurrence)
Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
rad.dose 0 53.24 53.24 -8.7557e+00 1.0340e+01 -2.9021e+01 1.1510e+01
Hazard Ratio 0 53.24 53.24 1.5757e-04 NA 2.4899e-13 9.9714e+04
Ki67 - 0.2:0.5 4 1.00 NA -3.4399e+01 1.0790e+00 -3.6514e+01 -3.2284e+01
Hazard Ratio 4 1.00 NA 1.1498e-15 NA 1.3873e-16 9.5298e-15
Ki67 - 0.3:0.5 4 2.00 NA -2.7546e+01 1.6863e+01 -6.0596e+01 5.5041e+00
Hazard Ratio 4 2.00 NA 1.0887e-12 NA 4.8242e-27 2.4569e+02
Ki67 - 0.4:0.5 4 3.00 NA 5.8874e+00 7.6362e+00 -9.0793e+00 2.0854e+01
Hazard Ratio 4 3.00 NA 3.6046e+02 NA 1.1400e-04 1.1398e+09
Ki67 - 0.6:0.5 4 5.00 NA 2.8224e-02 1.4846e+00 -2.8816e+00 2.9380e+00
Hazard Ratio 4 5.00 NA 1.0286e+00 NA 5.6047e-02 1.8878e+01
Ki67 - 0.7:0.5 4 6.00 NA 9.0075e+00 2.5211e+00 4.0662e+00 1.3949e+01
Hazard Ratio 4 6.00 NA 8.1643e+03 NA 5.8336e+01 1.1426e+06
Ki67 - 0.8:0.5 4 7.00 NA -3.4891e-01 3.0083e+00 -6.2451e+00 5.5473e+00
Hazard Ratio 4 7.00 NA 7.0545e-01 NA 1.9399e-03 2.5654e+02
所以;我需要 (1) 将所有 .0
保留在所有整数上,但是 (2) e$Ki67
e$age
在我的 Cox 回归中保持连续。
我使用了以下脚本:
e$Ki67 <- sprintf("%0.1f", as.numeric(as.character(e$Ki67)))
e$age <- sprintf("%0.1f", as.numeric(as.character(e$age)))
library(rms)
d <- datadist(e)
options(datadist="d")
e$WHO <- as.factor(e$WHO)
e$simpson <- as.factor(e$simpson)
e$sex <- as.factor(e$sex)
a <- cph(Surv(rfs,recurrence)~Ki67+simpson+WHO+age+sex+rad.dose,data=e,surv=TRUE,x=TRUE,y=TRUE)
具有以下数据e
:
e <- structure(list(rfs = c(25.33, 207.93, 80, 47.77, 193.25, 6.08,
0.69, 174.85, 30.75, 27.27, 162.27, 204.98, 122.81, 20.53, 22.28,
197.65, 94.23, 195.94, 92.19, 6.93, 193.38, 14.09, 152.38, 49.15,
190.46, 50.56, 66.76, 188.58, 188.42, 78.65, 125.77, 176.59,
185.69, 185.23, 184.71, 184.31, 183.59, 181.49, 96.53, 180.63,
30.16, 65.71, 179.48, 122.61, 177.35, 176.66, 0.13, 67.15, 175.31,
86.74, 174.65, 169.53, 169.23, 41.99, 168.77, 167.69, 56.71,
163.84, 163.81, 162.69, 162.63, 162.37, 119.59, 160.1, 159.47,
12.42, 155.56, 155.47, 155.27, 154.87, 154.61, 128.43, 56.51,
150.67, 50.79, 47.93, 83.58, 146.1, 144.69, 159.31, 140.58, 136.64,
135.52, 88.41, 134.11, 134.18, 133.49, 131.81, 77.04, 130.6,
63.87, 62.98, 88.51, 123.5, 122.45, 121.72, 121.69, 120.57, 1.54,
114.79), Ki67 = c("0.6", "3.3", "1.0", "0.6", "0.6", "3.7", "1.4",
"1.1", "1.8", "1.6", "0.7", "0.5", "0.3", "1.7", "0.5", "1.2",
"4.1", "0.6", "1.4", "1.3", "1.8", "2.6", "0.7", "0.8", "1.0",
"0.7", "0.7", "2.1", "1.3", "2.7", "1.3", "0.8", "1.1", "1.8",
"1.8", "0.4", "0.9", "6.4", "1.7", "1.5", "0.6", "2.7", "0.4",
"1.5", "1.4", "1.8", "2.3", "0.7", "2.4", "1.2", "0.6", "0.8",
"3.0", "4.0", "0.5", "1.2", "5.1", "1.5", "0.6", "1.2", "1.7",
"0.7", "1.4", "2.7", "1.1", "0.9", "0.5", "0.7", "0.9", "0.4",
"0.8", "0.8", "0.5", "0.9", "0.5", "1.2", "1.4", "2.5", "2.7",
"4.2", "0.8", "0.5", "1.7", "1.2", "1.6", "0.5", "2.6", "2.0",
"3.9", "0.6", "0.2", "0.5", "0.8", "0.5", "0.5", "0.6", "1.4",
"0.9", "1.0", "1.8"), WHO = structure(c(1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 3L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3"), class = "factor"), simpson = structure(c(1L, 2L, 1L,
3L, 1L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L,
1L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 2L, 2L, 3L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 3L, 1L,
2L, 2L, 3L, 1L, 1L, 2L, 1L, 3L, 3L, 3L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 3L, 1L, 2L, 2L, 3L, 2L, 3L, 3L, 2L, 1L, 2L, 2L, 3L, 2L, 2L,
2L), .Label = c("1", "2", "3"), class = "factor"), age = c("43.0",
"76.0", "79.0", "84.0", "62.0", "71.0", "75.0", "69.0", "53.0",
"70.0", "56.0", "45.0", "77.0", "72.0", "56.0", "59.0", "84.0",
"72.0", "83.0", "80.0", "49.0", "50.0", "68.0", "49.0", "46.0",
"50.0", "73.0", "51.0", "45.0", "42.0", "73.0", "56.0", "63.0",
"30.0", "67.0", "56.0", "58.0", "72.0", "51.0", "49.0", "68.0",
"65.0", "60.0", "64.0", "52.0", "65.0", "76.0", "78.0", "74.0",
"39.0", "30.0", "66.0", "58.0", "49.0", "67.0", "53.0", "69.0",
"41.0", "42.0", "66.0", "57.0", "52.0", "25.0", "64.0", "48.0",
"51.0", "47.0", "46.0", "44.0", "68.0", "41.0", "76.0", "65.0",
"60.0", "46.0", "54.0", "66.0", "42.0", "46.0", "83.0", "72.0",
"54.0", "51.0", "77.0", "58.0", "49.0", "52.0", "66.0", "50.0",
"32.0", "71.0", "45.0", "68.0", "55.0", "55.0", "44.0", "27.0",
"61.0", "76.0", "47.0"), sex = structure(c(2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), rad.dose = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5.4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5.4, 0, 0, 53.24, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5.4, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), recurrence = c(1L, 0L, 0L,
1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), dead = c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L,
0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L,
0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L), os = c(214.67, 207.93, 80, 52.67,
193.25, 65.54, 0.69, 174.85, 206.29, 27.27, 162.27, 204.98, 122.81,
49.94, 22.28, 197.65, 94.23, 195.94, 92.19, 6.93, 193.38, 18.37,
152.38, 49.15, 190.46, 144.07, 66.76, 188.58, 188.42, 78.65,
125.77, 176.59, 185.69, 185.23, 184.71, 184.31, 183.59, 181.49,
96.53, 180.63, 112.92, 179.88, 179.48, 122.61, 177.35, 176.66,
0.13, 67.15, 175.31, 174.82, 174.65, 169.53, 169.23, 168.8, 168.77,
167.69, 167.29, 163.84, 163.81, 162.69, 162.63, 162.37, 162.2,
160.1, 159.47, 157.4, 155.56, 155.47, 155.27, 154.87, 154.61,
128.43, 56.51, 150.67, 148.73, 147.98, 146.83, 146.1, 144.69,
159.31, 140.58, 136.64, 135.52, 125.77, 134.11, 134.18, 133.49,
131.81, 77.04, 130.6, 63.87, 126.78, 88.51, 123.5, 122.45, 121.72,
121.69, 120.57, 1.54, 114.79)), row.names = c(NA, 100L), class = "data.frame")
我认为你有一些问题,但最主要的是你感兴趣的变量是字符变量:
str(e)
'data.frame': 100 obs. of 10 variables:
$ rfs : num 25.3 207.9 80 47.8 193.2 ...
$ Ki67 : chr "0.6" "3.3" "1.0" "0.6" ...
$ WHO : Factor w/ 3 levels "1","2","3": 1 2 2 1 1 2 2 1 1 1 ...
$ simpson : Factor w/ 3 levels "1","2","3": 1 2 1 3 1 3 2 2 2 2 ...
$ age : chr "43.0" "76.0" "79.0" "84.0" ...
$ sex : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 1 ...
$ rad.dose : num 0 0 0 0 0 0 0 0 0 0 ...
$ recurrence: int 1 0 0 1 0 1 0 0 1 0 ...
$ dead : int 0 1 1 1 1 1 1 1 1 1 ...
$ os : num 214.7 207.9 80 52.7 193.2 ...
只需将它们更改为数字,以便 cph
识别它们:
e$Ki67 <- as.numeric(e$Ki67)
e$age <- as.numeric(e$Ki67)
你不能让一个变量既是连续的又是字符的。我看不到在这里使用 sprintf
的意义。如果你想要它用于展示目的,你可以创建变量的第二个副本,然后将它们用于表格等,但我认为没有必要,这个 用于展示数据。
更大的问题是模型仍然不会 运行 但我认为那是因为你在这里用小样本过度拟合,它可能适用于你的完整数据集。
请在下面找到我的数据样本 e
。
问题: 连续协变量在 Cox 回归中被视为分类协变量
问题: 我需要 (1) 将所有 .0
保留在所有整数上,但要这样 (2) e$Ki67
和 e$age
在我的 Cox 回归中保持连续。这怎么能做到?
我最近在 SO 上收到了很棒的 .0
添加到包含整数和小数的列中的所有整数。
解决方案是
e$Ki67 <- sprintf("%0.1f", as.numeric(as.character(e$Ki67)))
e$age <- sprintf("%0.1f", as.numeric(as.character(e$age)))
这样
> head(e)
rfs Ki67 WHO simpson age sex rad.dose recurrence dead os
1 25.33 0.6 1 1 43.0 1 0 1 0 214.67
2 207.93 3.3 2 2 76.0 1 0 0 1 207.93
3 80.00 1.0 2 1 79.0 1 0 0 1 80.00
4 47.77 0.6 1 3 84.0 1 0 1 1 52.67
5 193.25 0.6 1 1 62.0 1 0 0 1 193.25
6 6.08 3.7 2 3 71.0 1 0 1 1 65.54
但是,当 运行 我的 Cox 回归时,我收到此错误:
In fitter(X, Y, strata = Strata, offset = offset, weights = weights, : Ran out of iterations and did not converge
似乎 rms
包中的 cph
现在考虑 e$Ki67
和 e$age
分类协变量,但它们实际上是连续协变量:
> summary(a)
Effects Response : Surv(rfs, recurrence)
Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
rad.dose 0 53.24 53.24 -8.7557e+00 1.0340e+01 -2.9021e+01 1.1510e+01
Hazard Ratio 0 53.24 53.24 1.5757e-04 NA 2.4899e-13 9.9714e+04
Ki67 - 0.2:0.5 4 1.00 NA -3.4399e+01 1.0790e+00 -3.6514e+01 -3.2284e+01
Hazard Ratio 4 1.00 NA 1.1498e-15 NA 1.3873e-16 9.5298e-15
Ki67 - 0.3:0.5 4 2.00 NA -2.7546e+01 1.6863e+01 -6.0596e+01 5.5041e+00
Hazard Ratio 4 2.00 NA 1.0887e-12 NA 4.8242e-27 2.4569e+02
Ki67 - 0.4:0.5 4 3.00 NA 5.8874e+00 7.6362e+00 -9.0793e+00 2.0854e+01
Hazard Ratio 4 3.00 NA 3.6046e+02 NA 1.1400e-04 1.1398e+09
Ki67 - 0.6:0.5 4 5.00 NA 2.8224e-02 1.4846e+00 -2.8816e+00 2.9380e+00
Hazard Ratio 4 5.00 NA 1.0286e+00 NA 5.6047e-02 1.8878e+01
Ki67 - 0.7:0.5 4 6.00 NA 9.0075e+00 2.5211e+00 4.0662e+00 1.3949e+01
Hazard Ratio 4 6.00 NA 8.1643e+03 NA 5.8336e+01 1.1426e+06
Ki67 - 0.8:0.5 4 7.00 NA -3.4891e-01 3.0083e+00 -6.2451e+00 5.5473e+00
Hazard Ratio 4 7.00 NA 7.0545e-01 NA 1.9399e-03 2.5654e+02
所以;我需要 (1) 将所有 .0
保留在所有整数上,但是 (2) e$Ki67
e$age
在我的 Cox 回归中保持连续。
我使用了以下脚本:
e$Ki67 <- sprintf("%0.1f", as.numeric(as.character(e$Ki67)))
e$age <- sprintf("%0.1f", as.numeric(as.character(e$age)))
library(rms)
d <- datadist(e)
options(datadist="d")
e$WHO <- as.factor(e$WHO)
e$simpson <- as.factor(e$simpson)
e$sex <- as.factor(e$sex)
a <- cph(Surv(rfs,recurrence)~Ki67+simpson+WHO+age+sex+rad.dose,data=e,surv=TRUE,x=TRUE,y=TRUE)
具有以下数据e
:
e <- structure(list(rfs = c(25.33, 207.93, 80, 47.77, 193.25, 6.08,
0.69, 174.85, 30.75, 27.27, 162.27, 204.98, 122.81, 20.53, 22.28,
197.65, 94.23, 195.94, 92.19, 6.93, 193.38, 14.09, 152.38, 49.15,
190.46, 50.56, 66.76, 188.58, 188.42, 78.65, 125.77, 176.59,
185.69, 185.23, 184.71, 184.31, 183.59, 181.49, 96.53, 180.63,
30.16, 65.71, 179.48, 122.61, 177.35, 176.66, 0.13, 67.15, 175.31,
86.74, 174.65, 169.53, 169.23, 41.99, 168.77, 167.69, 56.71,
163.84, 163.81, 162.69, 162.63, 162.37, 119.59, 160.1, 159.47,
12.42, 155.56, 155.47, 155.27, 154.87, 154.61, 128.43, 56.51,
150.67, 50.79, 47.93, 83.58, 146.1, 144.69, 159.31, 140.58, 136.64,
135.52, 88.41, 134.11, 134.18, 133.49, 131.81, 77.04, 130.6,
63.87, 62.98, 88.51, 123.5, 122.45, 121.72, 121.69, 120.57, 1.54,
114.79), Ki67 = c("0.6", "3.3", "1.0", "0.6", "0.6", "3.7", "1.4",
"1.1", "1.8", "1.6", "0.7", "0.5", "0.3", "1.7", "0.5", "1.2",
"4.1", "0.6", "1.4", "1.3", "1.8", "2.6", "0.7", "0.8", "1.0",
"0.7", "0.7", "2.1", "1.3", "2.7", "1.3", "0.8", "1.1", "1.8",
"1.8", "0.4", "0.9", "6.4", "1.7", "1.5", "0.6", "2.7", "0.4",
"1.5", "1.4", "1.8", "2.3", "0.7", "2.4", "1.2", "0.6", "0.8",
"3.0", "4.0", "0.5", "1.2", "5.1", "1.5", "0.6", "1.2", "1.7",
"0.7", "1.4", "2.7", "1.1", "0.9", "0.5", "0.7", "0.9", "0.4",
"0.8", "0.8", "0.5", "0.9", "0.5", "1.2", "1.4", "2.5", "2.7",
"4.2", "0.8", "0.5", "1.7", "1.2", "1.6", "0.5", "2.6", "2.0",
"3.9", "0.6", "0.2", "0.5", "0.8", "0.5", "0.5", "0.6", "1.4",
"0.9", "1.0", "1.8"), WHO = structure(c(1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 3L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3"), class = "factor"), simpson = structure(c(1L, 2L, 1L,
3L, 1L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L,
1L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 2L, 2L, 3L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 3L, 1L,
2L, 2L, 3L, 1L, 1L, 2L, 1L, 3L, 3L, 3L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 3L, 1L, 2L, 2L, 3L, 2L, 3L, 3L, 2L, 1L, 2L, 2L, 3L, 2L, 2L,
2L), .Label = c("1", "2", "3"), class = "factor"), age = c("43.0",
"76.0", "79.0", "84.0", "62.0", "71.0", "75.0", "69.0", "53.0",
"70.0", "56.0", "45.0", "77.0", "72.0", "56.0", "59.0", "84.0",
"72.0", "83.0", "80.0", "49.0", "50.0", "68.0", "49.0", "46.0",
"50.0", "73.0", "51.0", "45.0", "42.0", "73.0", "56.0", "63.0",
"30.0", "67.0", "56.0", "58.0", "72.0", "51.0", "49.0", "68.0",
"65.0", "60.0", "64.0", "52.0", "65.0", "76.0", "78.0", "74.0",
"39.0", "30.0", "66.0", "58.0", "49.0", "67.0", "53.0", "69.0",
"41.0", "42.0", "66.0", "57.0", "52.0", "25.0", "64.0", "48.0",
"51.0", "47.0", "46.0", "44.0", "68.0", "41.0", "76.0", "65.0",
"60.0", "46.0", "54.0", "66.0", "42.0", "46.0", "83.0", "72.0",
"54.0", "51.0", "77.0", "58.0", "49.0", "52.0", "66.0", "50.0",
"32.0", "71.0", "45.0", "68.0", "55.0", "55.0", "44.0", "27.0",
"61.0", "76.0", "47.0"), sex = structure(c(2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), rad.dose = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5.4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5.4, 0, 0, 53.24, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5.4, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), recurrence = c(1L, 0L, 0L,
1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), dead = c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L,
0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L,
0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L), os = c(214.67, 207.93, 80, 52.67,
193.25, 65.54, 0.69, 174.85, 206.29, 27.27, 162.27, 204.98, 122.81,
49.94, 22.28, 197.65, 94.23, 195.94, 92.19, 6.93, 193.38, 18.37,
152.38, 49.15, 190.46, 144.07, 66.76, 188.58, 188.42, 78.65,
125.77, 176.59, 185.69, 185.23, 184.71, 184.31, 183.59, 181.49,
96.53, 180.63, 112.92, 179.88, 179.48, 122.61, 177.35, 176.66,
0.13, 67.15, 175.31, 174.82, 174.65, 169.53, 169.23, 168.8, 168.77,
167.69, 167.29, 163.84, 163.81, 162.69, 162.63, 162.37, 162.2,
160.1, 159.47, 157.4, 155.56, 155.47, 155.27, 154.87, 154.61,
128.43, 56.51, 150.67, 148.73, 147.98, 146.83, 146.1, 144.69,
159.31, 140.58, 136.64, 135.52, 125.77, 134.11, 134.18, 133.49,
131.81, 77.04, 130.6, 63.87, 126.78, 88.51, 123.5, 122.45, 121.72,
121.69, 120.57, 1.54, 114.79)), row.names = c(NA, 100L), class = "data.frame")
我认为你有一些问题,但最主要的是你感兴趣的变量是字符变量:
str(e)
'data.frame': 100 obs. of 10 variables:
$ rfs : num 25.3 207.9 80 47.8 193.2 ...
$ Ki67 : chr "0.6" "3.3" "1.0" "0.6" ...
$ WHO : Factor w/ 3 levels "1","2","3": 1 2 2 1 1 2 2 1 1 1 ...
$ simpson : Factor w/ 3 levels "1","2","3": 1 2 1 3 1 3 2 2 2 2 ...
$ age : chr "43.0" "76.0" "79.0" "84.0" ...
$ sex : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 1 ...
$ rad.dose : num 0 0 0 0 0 0 0 0 0 0 ...
$ recurrence: int 1 0 0 1 0 1 0 0 1 0 ...
$ dead : int 0 1 1 1 1 1 1 1 1 1 ...
$ os : num 214.7 207.9 80 52.7 193.2 ...
只需将它们更改为数字,以便 cph
识别它们:
e$Ki67 <- as.numeric(e$Ki67)
e$age <- as.numeric(e$Ki67)
你不能让一个变量既是连续的又是字符的。我看不到在这里使用 sprintf
的意义。如果你想要它用于展示目的,你可以创建变量的第二个副本,然后将它们用于表格等,但我认为没有必要,这个
更大的问题是模型仍然不会 运行 但我认为那是因为你在这里用小样本过度拟合,它可能适用于你的完整数据集。