在 R 中的 mice 库的 mids 对象中重新编码变量
Recode a variable within the mids object of the mice library in R
以下数据集可用
data <- structure(list(asdas_6month = c(23.1222666868239, 25.4056847196073,
25.9886630231065, NA, 26.9450864282904, 15.1832953552198, 22.1618055512694,
NA, 24.1387146612986, 25.598233740795, 22.6844495409994, 25.0138310842063,
20.9944595011522, 17.0762423377328, NA, NA, 20.2359010676347,
17.5468970969989, 22.9765676870538, 26.3032333127368, NA, NA,
NA, 17.3203951667699, 19.126959104744), gender = structure(c(1L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L), .Label = c("Female", "Male"), class = "factor"),
age = c(47.9379517873091, 46.837373193357, 48.5646295793097,
43.1378807456583, 60.3619393447192, 70.1290549397305, 84.3587981654008,
59.2292347942614, 41.7327157246053, 52.0137845399698, 55.0951441078166,
71.6184307122057, 43.3101374804154, 33.5854501557607, 51.9032470737109,
68.1204996602706, 42.9427562299075, 55.909031412815, 29.895500127283,
20.9296411673894, 29.3957377286062, 46.974102661638, 54.6740110130539,
42.6997039072135, 67.3413773507263), asdas_baseline = c(63.7251494911822,
NA, 65.0638161875852, 70.1816100941605, 53.1972327260365,
62.980030777934, 60.3085321252511, 58.9998256902073, 56.8045598820947,
54.4446059090559, NA, 61.7293600038226, 56.5674724119214,
62.8593507709476, NA, 54.9028311743253, NA, NA, 67.6467591815449,
58.5134614505046, 59.3735346553234, 51.9158516755166, 63.0645651881476,
58.7759004270177, 55.0687922895208)), class = "data.frame", row.names = c(NA,
-25L))
这是它的样子:
'data.frame': 25 obs. of 4 variables:
$ asdas_6month : num 23.1 25.4 26 NA 26.9 ...
$ gender : Factor w/ 2 levels "Female","Male": 1 2 1 1 1 1 2 2 2 1 ...
$ age : num 47.9 46.8 48.6 43.1 60.4 ...
$ asdas_baseline: num 63.7 NA 65.1 70.2 53.2 ...
使用以下代码,我可以从 mice
库生成一个 mids
对象并创建五个估算数据集
library(mice)
new_imp <- mice(data, m=5, maxit=10, print = FALSE, seed = 449)
print(new_imp)
Number of multiple imputations: 5
Imputation methods:
asdas_6month gender age asdas_baseline
"pmm" "" "" "pmm"
PredictorMatrix:
asdas_6month gender age asdas_baseline
asdas_6month 0 1 1 1
gender 1 0 1 1
age 1 1 0 1
asdas_baseline 1 1 1 0
我的目标是重新编码一个新变量并创建 asdas_improvement
,即 6 个月时 ASDAS 得分 >30 improvement
或更高。通常我可以用 dplyr
的 mutate 函数计算如下:
library(dplyr)
data %>%
mutate(asdas_improvement = if_else(asdas_baseline - asdas_6month >= 40, 1, 0))
如何重新编码 mids
对象中的类似变量?
要对估算数据集进行计算,我们可以使用 complete
获取估算数据的数据框。然后,我们可以正常使用 mutate
进行计算。然后,您可以使用 as.mids
将其变回 mids
对象。
library(dplyr)
full.impdata <- complete(new_imp, 'long', include = TRUE) %>%
mutate(asdas_improvement = if_else(asdas_baseline - asdas_6month >= 40, 1, 0))
new_imp <- as.mids(full.impdata)
输出
str(new_imp$imp$asdas_improvement)
'data.frame': 11 obs. of 5 variables:
$ 1: num 0 1 0 0 1 0 0 0 1 0 ...
$ 2: num 0 1 0 0 0 0 0 1 0 0 ...
$ 3: num 0 1 0 0 0 0 1 1 0 0 ...
$ 4: num 0 1 1 0 0 0 0 1 0 0 ...
$ 5: num 0 1 0 0 0 0 0 0 1 0 ...
以下数据集可用
data <- structure(list(asdas_6month = c(23.1222666868239, 25.4056847196073,
25.9886630231065, NA, 26.9450864282904, 15.1832953552198, 22.1618055512694,
NA, 24.1387146612986, 25.598233740795, 22.6844495409994, 25.0138310842063,
20.9944595011522, 17.0762423377328, NA, NA, 20.2359010676347,
17.5468970969989, 22.9765676870538, 26.3032333127368, NA, NA,
NA, 17.3203951667699, 19.126959104744), gender = structure(c(1L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L), .Label = c("Female", "Male"), class = "factor"),
age = c(47.9379517873091, 46.837373193357, 48.5646295793097,
43.1378807456583, 60.3619393447192, 70.1290549397305, 84.3587981654008,
59.2292347942614, 41.7327157246053, 52.0137845399698, 55.0951441078166,
71.6184307122057, 43.3101374804154, 33.5854501557607, 51.9032470737109,
68.1204996602706, 42.9427562299075, 55.909031412815, 29.895500127283,
20.9296411673894, 29.3957377286062, 46.974102661638, 54.6740110130539,
42.6997039072135, 67.3413773507263), asdas_baseline = c(63.7251494911822,
NA, 65.0638161875852, 70.1816100941605, 53.1972327260365,
62.980030777934, 60.3085321252511, 58.9998256902073, 56.8045598820947,
54.4446059090559, NA, 61.7293600038226, 56.5674724119214,
62.8593507709476, NA, 54.9028311743253, NA, NA, 67.6467591815449,
58.5134614505046, 59.3735346553234, 51.9158516755166, 63.0645651881476,
58.7759004270177, 55.0687922895208)), class = "data.frame", row.names = c(NA,
-25L))
这是它的样子:
'data.frame': 25 obs. of 4 variables:
$ asdas_6month : num 23.1 25.4 26 NA 26.9 ...
$ gender : Factor w/ 2 levels "Female","Male": 1 2 1 1 1 1 2 2 2 1 ...
$ age : num 47.9 46.8 48.6 43.1 60.4 ...
$ asdas_baseline: num 63.7 NA 65.1 70.2 53.2 ...
使用以下代码,我可以从 mice
库生成一个 mids
对象并创建五个估算数据集
library(mice)
new_imp <- mice(data, m=5, maxit=10, print = FALSE, seed = 449)
print(new_imp)
Number of multiple imputations: 5
Imputation methods:
asdas_6month gender age asdas_baseline
"pmm" "" "" "pmm"
PredictorMatrix:
asdas_6month gender age asdas_baseline
asdas_6month 0 1 1 1
gender 1 0 1 1
age 1 1 0 1
asdas_baseline 1 1 1 0
我的目标是重新编码一个新变量并创建 asdas_improvement
,即 6 个月时 ASDAS 得分 >30 improvement
或更高。通常我可以用 dplyr
的 mutate 函数计算如下:
library(dplyr)
data %>%
mutate(asdas_improvement = if_else(asdas_baseline - asdas_6month >= 40, 1, 0))
如何重新编码 mids
对象中的类似变量?
要对估算数据集进行计算,我们可以使用 complete
获取估算数据的数据框。然后,我们可以正常使用 mutate
进行计算。然后,您可以使用 as.mids
将其变回 mids
对象。
library(dplyr)
full.impdata <- complete(new_imp, 'long', include = TRUE) %>%
mutate(asdas_improvement = if_else(asdas_baseline - asdas_6month >= 40, 1, 0))
new_imp <- as.mids(full.impdata)
输出
str(new_imp$imp$asdas_improvement)
'data.frame': 11 obs. of 5 variables:
$ 1: num 0 1 0 0 1 0 0 0 1 0 ...
$ 2: num 0 1 0 0 0 0 0 1 0 0 ...
$ 3: num 0 1 0 0 0 0 1 1 0 0 ...
$ 4: num 0 1 1 0 0 0 0 1 0 0 ...
$ 5: num 0 1 0 0 0 0 0 0 1 0 ...