重复测量 bootstrap：每个条件每个 ID 重新采样

Question

在这里遇到困难...我正在尝试在重复测量设计中为每个主题创建 1000 个自举数据集，其中包含三个自变量：DepthValidity（2 个级别）、SideValidity（2 个级别）和 TargetDepth （2 个级别）。另一个目标是计算 each 受试者的自举反应时间平均值、中位数和 sd，对于 each 可能的条件（总共有八个条件).

我尝试使用和操作此处找到的代码：repeated measures bootstrap stats, grouped by multiple factors

df <- mydata %>%
  group_by(ID, Depth, TarDepth, Side) %>%
  summarise(measure=list(ReactionTime)) %>%
  ungroup()

myfunc <- function(data, indices) {
  data <- data[indices,]
  return(c(mean=mean(unlist(data$measure)),
           median=median(unlist(data$measure)),
           sd = sd(unlist(data$measure))))
}
set.seed(333)
bootresults <- df %>%
  group_by(ID, Depth, TarDepth, Side) %>%
  do(tidy(boot(data = ., statistic = myfunc, R = 1000)))

我的原始数据框（即 mydata）是长格式的，其中每一行对应一个单个个人在八种条件之一下的数据点。每个人的每个条件大约有 90 个数据点。

使用上面的代码，我得到了具有重复值的数据，如此处突出显示：

是否因为我需要在 for 循环中执行上述代码（即针对每个唯一 ID）而出现相同的值？我试过了，但似乎没有用，但我很可能也在那里做错了什么。也许是因为我必须有一个包含所有不同条件组合的列，而不是三个单独的列？如何防止重复？

编辑：包括 dput

dput(droplevels(head(individ, 20)))

structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "s109", class = "factor"), 
    TarDepth = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Mid", class = "factor"), 
    Side = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "DIFF", class = "factor"), 
    PRTS = c(0.834416149, 0.716587752, 0.716472204, 0.69970636, 
    0.699617629, 0.682915685, 0.666703417, 0.616733331, 0.599953582, 
    0.597570097, 0.595346526, 0.592605137, 0.588598339, 0.583834349, 
    0.58285897, 0.568965957, 0.567117837, 0.566593729, 0.566063329, 
    0.550269553), Depth = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "DIFF", class = "factor")), row.names = c(NA, 
20L), class = "data.frame")

编辑：包括两个主题 ID 的输出，因为根据评论者的最新解决方案，我有偏见并且 std.error 为 0：

dput(droplevels(head(individ, 32)))

structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("s97", "s98"), class = "factor"), 
    TarDepth = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 
    1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
    1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("Mid", "Near"
    ), class = "factor"), Side = structure(c(1L, 1L, 2L, 2L, 
    1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 
    2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("DIFF", 
    "SAME"), class = "factor"), PRTS = c(0.851425991, 0.84961243, 
    0.840487545, 0.839716775, 0.820657432, 0.815991426, 0.807378203, 
    0.800551856, 0.799805387, 0.787336857, 0.77253443, 0.765844159, 
    0.751196415, 0.749769895, 0.749374114, 0.649443255, 0.184844206, 
    0.608819523, 0.117052886, 0.082718123, 0.762629011, 0.050756321, 
    0.074764508, 0.147296557, 0.428583992, 0.432677868, 0.378136045, 
    0.135034201, 0.367393051, 0.593182243, 0.723897573, 0.533599005
    ), Depth = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("DIFF", "SAME"
    ), class = "factor")), row.names = c(NA, 32L), class = "data.frame")

Answer 1

我们可以用 group_split 拆分数据并循环遍历 list

library(dplyr)
library(purrr)
library(broom)
set.seed(333)
bootresults <-  df %>%
  group_split(ID, Depth, TarDepth, Side)  %>%
  map_dfr(~ tidy(boot(data = .x, statistic = myfunc, R = 1000)))

或者另一种选择是 nest_by

set.seed(333)
bootresults <- df %>%
    nest_by(ID, Depth, TarDepth, Side) %>%
    mutate(new = list(tidy(boot(data = data, statistic = myfunc, R = 1000))))

更新

使用可重现的例子

df <- data.frame(id=c(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2),
                  cond=c('A', 'A', 'B', 'B', 'A', 'A', 'B', 'B', 'A', 'A', 'B', 'B', 'A', 'A', 'B', 'B'),
                  comm=c('X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y','X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y'),
                  measure=c(0.8, 1.1, 0.7, 1.2, 0.9, 2.3, 0.6, 1.1, 0.7, 1.3, 0.6, 1.5, 1.0, 2.1, 0.7, 1.2))
myfunc <- function(data, indices) {
    data <- data[indices,]
    return(c(mean=mean(unlist(data$measure)),
             median=median(unlist(data$measure)),
             sd = sd(unlist(data$measure))))
}
df1 <- df %>% 
   nest_by(cond, comm) %>% 
   mutate(out = list(tidy(boot(data = data, statistic = myfunc, R = 1000))))
df1
# A tibble: 4 x 4
# Rowwise:  cond, comm
  cond  comm                data out             
  <chr> <chr> <list<tibble[,2]>> <list>          
1 A     X                [4 × 2] <tibble [3 × 4]>
2 A     Y                [4 × 2] <tibble [3 × 4]>
3 B     X                [4 × 2] <tibble [3 × 4]>
4 B     Y                [4 × 2] <tibble [3 × 4]>

那么，我们unnest

library(tidyr)
df1 %>%
      ungroup %>% 
      select(-data) %>%
      unnest(out)
# A tibble: 12 x 6
   cond  comm  term   statistic      bias std.error
   <chr> <chr> <chr>      <dbl>     <dbl>     <dbl>
 1 A     X     mean      0.85   -0.000250    0.0555
 2 A     X     median    0.85    0.000900    0.0734
 3 A     X     sd        0.129  -0.0246      0.0362
 4 A     Y     mean      1.7    -0.00575     0.253 
 5 A     Y     median    1.7    -0.00650     0.374 
 6 A     Y     sd        0.589  -0.103       0.162 
 7 B     X     mean      0.65    0.000200    0.0258
 8 B     X     median    0.65    0.000550    0.0402
 9 B     X     sd        0.0577 -0.0120      0.0189
10 B     Y     mean      1.25    0.00260     0.0767
11 B     Y     median    1.2     0.0337      0.0995
12 B     Y     sd        0.173  -0.0372      0.0661

更新2

根据 OP 的输入数据，通过将 'measure' 更改为 'PRTS'

来更改函数 'myfunc'

myfunc <- function(data, indices) {
  data <- data[indices,]
  return(c(mean=mean(unlist(data$PRTS)),
           median=median(unlist(data$PRTS)),
           sd = sd(unlist(data$PRTS))))
}
individ %>% 
   nest_by(ID, Depth, TarDepth, Side) %>%
   mutate(out = list(tidy(boot(data = data, statistic = myfunc, R = 1000)))) %>% 
   ungroup %>% 
   select(-data) %>% 
   unnest(out)
# A tibble: 3 x 8
  ID    Depth TarDepth Side  term   statistic      bias std.error
  <fct> <fct> <fct>    <fct> <chr>      <dbl>     <dbl>     <dbl>
1 s109  DIFF  Mid      DIFF  mean      0.630   0.000108    0.0166
2 s109  DIFF  Mid      DIFF  median    0.596   0.00756     0.0229
3 s109  DIFF  Mid      DIFF  sd        0.0738 -0.00361     0.0139

重复测量 bootstrap：每个条件每个 ID 重新采样

Repeated measures bootstrap: resample per ID per condition

statistics

r

resampling

statistics-bootstrap

更新

更新2