R MICE 插补

R MICE Imputation

data=data.frame("student"=c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5),
"time"=c(1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4),
"v1"=c(16,12,14,12,17,16,12,12,13,12,16,16,10,10,14,17,17,12,10,11),
"v2"=c(1,1,3,2,2,2,3,1,2,1,2,1,3,1,1,2,3,3,1,2),
"v3"=c(4,1,4,4,2,2,2,2,1,3,2,3,1,2,2,1,4,1,1,4),
"v4"=c(NA,27,NA,42,40,48,45,25,29,NA,NA,27,NA,NA,NA,NA,NA,NA,44,39),
"v5"=c(NA,1,NA,NA,1,3,3,2,NA,NA,NA,1,NA,NA,NA,NA,3,2,4,1),
"v6"=c(NA,0,1,NA,1,NA,1,NA,0,NA,1,1,NA,NA,NA,NA,0,0,NA,0),
"v7"=c(0,1,1,NA,0,1,1,0,1,0,NA,0,NA,NA,NA,NA,0,1,NA,1),
"v8"=c(1,NA,0,1,0,0,NA,1,1,NA,0,0,NA,NA,NA,NA,1,0,NA,1))

这是我的样本数据,我想用它来:

一个。对于时间 = 1,使用 v1-v3 使用 MICE 估算 v4-v8(v4 是连续的,v5 是分类的,v6-v8 是二进制的)

乙。在为 time = 1 估算值后,我想用前一个值填充 NA 值。因此,如果时间 1-4 的变量是:NA,NA,0,1 并且时间 1 的估算值为 1,则它可能是:1-1-0-1

我尝试过:

dataNEW <- mice(data[,data$time == 1],m=5,maxit=50,meth='pmm',seed=500)

A. For time = 1 use v1-v3 to impute v4-v8 using MICE (v4 is continuous, v5 is categorical, v6-v8 is binary)

首先,变量 v5 - v6 必须转换为因子:

data$v5 <- factor(data$v5)
data$v6 <- factor(data$v6)
data$v7 <- factor(data$v7)
data$v8 <- factor(data$v8)

创建一个预测矩阵来告诉老鼠只使用 v1-v3 来预测 v4-v8:

Pred_Matrix <- 1 - diag(ncol(data))
Pred_Matrix[,c(1:2, 6:10)] <- 0

仅使用 1 个插补(默认为 5)进行插补,因为您想要的只是插补值;您没有做任何其他事情,例如汇集建模结果。

impA <- mice(subset(data, subset = time==1), pred = Pred_Matrix, m = 1)

可以使用 complete 函数(来自 mice 包,而不是 tidyr)提取估算数据。

B. After imputed values are imputed for time = 1, I want to fill NA values that follow with the previous value. So if the variable for time 1-4 is: NA,NA,0,1 and the imputed value at time 1 is 1, then it could be: 1-1-0-1

library(dplyr)
library(tidyr)  # Needed for the fill function

mice::complete(impA) %>%
  rbind(subset(data, subset=time!=1)) %>%
  arrange(student, time) %>%
  group_by(student) %>%
  fill(v4:v8)

# A tibble: 20 x 10
# Groups:   student [5]
   student  time    v1    v2    v3    v4 v5    v6    v7    v8   
     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <fct> <fct> <fct>
 1       1     1    16     1     4    40 2     1     0     1    
 2       1     2    12     1     1    27 1     0     1     1    
 3       1     3    14     3     4    27 1     1     1     0    
 4       1     4    12     2     4    42 1     1     1     1    
 5       2     1    17     2     2    40 1     1     0     0    
 6       2     2    16     2     2    48 3     1     1     0    
 7       2     3    12     3     2    45 3     1     1     0   
 8       2     4    12     1     2    25 2     1     0     1    
 9       3     1    13     2     1    29 1     0     1     1    
10       3     2    12     1     3    29 1     0     0     1    
11       3     3    16     2     2    29 1     1     0     0    
12       3     4    16     1     3    27 1     1     0     0    
13       4     1    10     3     1    40 1     0     0     0    
14       4     2    10     1     2    40 1     0     0     0    
15       4     3    14     1     2    40 1     0     0     0    
16       4     4    17     2     1    40 1     0     0     0    
17       5     1    17     3     4    40 3     0     0     1    
18       5     2    12     3     1    40 2     0     1     0    
19       5     3    10     1     1    44 4     0     1     0    
20       5     4    11     2     4    39 1     0     1     1

数据

注意,我必须将 v5 的第一个值更改为 2,否则 polyreg 插补失败(time=1 只有两个类别)。

data=data.frame("student"=c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5),
                "time"=c(1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4),
                "v1"=c(16,12,14,12,17,16,12,12,13,12,16,16,10,10,14,17,17,12,10,11),
                "v2"=c(1,1,3,2,2,2,3,1,2,1,2,1,3,1,1,2,3,3,1,2),
                "v3"=c(4,1,4,4,2,2,2,2,1,3,2,3,1,2,2,1,4,1,1,4),
                "v4"=c(NA,27,NA,42,40,48,45,25,29,NA,NA,27,NA,NA,NA,NA,NA,NA,44,39),
                "v5"=c(2,1,NA,NA,1,3,3,2,NA,NA,NA,1,NA,NA,NA,NA,3,2,4,1),
                "v6"=c(NA,0,1,NA,1,NA,1,NA,0,NA,1,1,NA,NA,NA,NA,0,0,NA,0),
                "v7"=c(0,1,1,NA,0,1,1,0,1,0,NA,0,NA,NA,NA,NA,0,1,NA,1),
                "v8"=c(1,NA,0,1,0,0,NA,1,1,NA,0,0,NA,NA,NA,NA,1,0,NA,1))