重复测量 bootstrap:每个条件每个 ID 重新采样
Repeated measures bootstrap: resample per ID per condition
在这里遇到困难...我正在尝试在重复测量设计中为每个主题创建 1000 个自举数据集,其中包含三个自变量:DepthValidity(2 个级别)、SideValidity(2 个级别)和 TargetDepth (2 个级别)。另一个目标是计算 each 受试者的自举反应时间平均值、中位数和 sd,对于 each 可能的条件(总共有八个条件).
我尝试使用和操作此处找到的代码:repeated measures bootstrap stats, grouped by multiple factors
df <- mydata %>%
group_by(ID, Depth, TarDepth, Side) %>%
summarise(measure=list(ReactionTime)) %>%
ungroup()
myfunc <- function(data, indices) {
data <- data[indices,]
return(c(mean=mean(unlist(data$measure)),
median=median(unlist(data$measure)),
sd = sd(unlist(data$measure))))
}
set.seed(333)
bootresults <- df %>%
group_by(ID, Depth, TarDepth, Side) %>%
do(tidy(boot(data = ., statistic = myfunc, R = 1000)))
我的原始数据框(即 mydata)是长格式的,其中每一行对应一个 单个 个人在八种条件之一下的数据点。每个人的每个条件大约有 90 个数据点。
使用上面的代码,我得到了具有重复值的数据,如此处突出显示:
是否因为我需要在 for 循环中执行上述代码(即针对每个唯一 ID)而出现相同的值?我试过了,但似乎没有用,但我很可能也在那里做错了什么。也许是因为我必须有一个包含所有不同条件组合的列,而不是三个单独的列?如何防止重复?
编辑:包括 dput
dput(droplevels(head(individ, 20)))
structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "s109", class = "factor"),
TarDepth = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Mid", class = "factor"),
Side = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "DIFF", class = "factor"),
PRTS = c(0.834416149, 0.716587752, 0.716472204, 0.69970636,
0.699617629, 0.682915685, 0.666703417, 0.616733331, 0.599953582,
0.597570097, 0.595346526, 0.592605137, 0.588598339, 0.583834349,
0.58285897, 0.568965957, 0.567117837, 0.566593729, 0.566063329,
0.550269553), Depth = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "DIFF", class = "factor")), row.names = c(NA,
20L), class = "data.frame")
编辑:
包括 两个 主题 ID 的输出,因为根据评论者的最新解决方案,我有偏见并且 std.error 为 0:
dput(droplevels(head(individ, 32)))
structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("s97", "s98"), class = "factor"),
TarDepth = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("Mid", "Near"
), class = "factor"), Side = structure(c(1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("DIFF",
"SAME"), class = "factor"), PRTS = c(0.851425991, 0.84961243,
0.840487545, 0.839716775, 0.820657432, 0.815991426, 0.807378203,
0.800551856, 0.799805387, 0.787336857, 0.77253443, 0.765844159,
0.751196415, 0.749769895, 0.749374114, 0.649443255, 0.184844206,
0.608819523, 0.117052886, 0.082718123, 0.762629011, 0.050756321,
0.074764508, 0.147296557, 0.428583992, 0.432677868, 0.378136045,
0.135034201, 0.367393051, 0.593182243, 0.723897573, 0.533599005
), Depth = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("DIFF", "SAME"
), class = "factor")), row.names = c(NA, 32L), class = "data.frame")
我们可以用 group_split
拆分数据并循环遍历 list
library(dplyr)
library(purrr)
library(broom)
set.seed(333)
bootresults <- df %>%
group_split(ID, Depth, TarDepth, Side) %>%
map_dfr(~ tidy(boot(data = .x, statistic = myfunc, R = 1000)))
或者另一种选择是 nest_by
set.seed(333)
bootresults <- df %>%
nest_by(ID, Depth, TarDepth, Side) %>%
mutate(new = list(tidy(boot(data = data, statistic = myfunc, R = 1000))))
更新
使用可重现的例子
df <- data.frame(id=c(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2),
cond=c('A', 'A', 'B', 'B', 'A', 'A', 'B', 'B', 'A', 'A', 'B', 'B', 'A', 'A', 'B', 'B'),
comm=c('X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y','X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y'),
measure=c(0.8, 1.1, 0.7, 1.2, 0.9, 2.3, 0.6, 1.1, 0.7, 1.3, 0.6, 1.5, 1.0, 2.1, 0.7, 1.2))
myfunc <- function(data, indices) {
data <- data[indices,]
return(c(mean=mean(unlist(data$measure)),
median=median(unlist(data$measure)),
sd = sd(unlist(data$measure))))
}
df1 <- df %>%
nest_by(cond, comm) %>%
mutate(out = list(tidy(boot(data = data, statistic = myfunc, R = 1000))))
df1
# A tibble: 4 x 4
# Rowwise: cond, comm
cond comm data out
<chr> <chr> <list<tibble[,2]>> <list>
1 A X [4 × 2] <tibble [3 × 4]>
2 A Y [4 × 2] <tibble [3 × 4]>
3 B X [4 × 2] <tibble [3 × 4]>
4 B Y [4 × 2] <tibble [3 × 4]>
那么,我们unnest
library(tidyr)
df1 %>%
ungroup %>%
select(-data) %>%
unnest(out)
# A tibble: 12 x 6
cond comm term statistic bias std.error
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 A X mean 0.85 -0.000250 0.0555
2 A X median 0.85 0.000900 0.0734
3 A X sd 0.129 -0.0246 0.0362
4 A Y mean 1.7 -0.00575 0.253
5 A Y median 1.7 -0.00650 0.374
6 A Y sd 0.589 -0.103 0.162
7 B X mean 0.65 0.000200 0.0258
8 B X median 0.65 0.000550 0.0402
9 B X sd 0.0577 -0.0120 0.0189
10 B Y mean 1.25 0.00260 0.0767
11 B Y median 1.2 0.0337 0.0995
12 B Y sd 0.173 -0.0372 0.0661
更新2
根据 OP 的输入数据,通过将 'measure' 更改为 'PRTS'
来更改函数 'myfunc'
myfunc <- function(data, indices) {
data <- data[indices,]
return(c(mean=mean(unlist(data$PRTS)),
median=median(unlist(data$PRTS)),
sd = sd(unlist(data$PRTS))))
}
individ %>%
nest_by(ID, Depth, TarDepth, Side) %>%
mutate(out = list(tidy(boot(data = data, statistic = myfunc, R = 1000)))) %>%
ungroup %>%
select(-data) %>%
unnest(out)
# A tibble: 3 x 8
ID Depth TarDepth Side term statistic bias std.error
<fct> <fct> <fct> <fct> <chr> <dbl> <dbl> <dbl>
1 s109 DIFF Mid DIFF mean 0.630 0.000108 0.0166
2 s109 DIFF Mid DIFF median 0.596 0.00756 0.0229
3 s109 DIFF Mid DIFF sd 0.0738 -0.00361 0.0139
在这里遇到困难...我正在尝试在重复测量设计中为每个主题创建 1000 个自举数据集,其中包含三个自变量:DepthValidity(2 个级别)、SideValidity(2 个级别)和 TargetDepth (2 个级别)。另一个目标是计算 each 受试者的自举反应时间平均值、中位数和 sd,对于 each 可能的条件(总共有八个条件).
我尝试使用和操作此处找到的代码:repeated measures bootstrap stats, grouped by multiple factors
df <- mydata %>%
group_by(ID, Depth, TarDepth, Side) %>%
summarise(measure=list(ReactionTime)) %>%
ungroup()
myfunc <- function(data, indices) {
data <- data[indices,]
return(c(mean=mean(unlist(data$measure)),
median=median(unlist(data$measure)),
sd = sd(unlist(data$measure))))
}
set.seed(333)
bootresults <- df %>%
group_by(ID, Depth, TarDepth, Side) %>%
do(tidy(boot(data = ., statistic = myfunc, R = 1000)))
我的原始数据框(即 mydata)是长格式的,其中每一行对应一个 单个 个人在八种条件之一下的数据点。每个人的每个条件大约有 90 个数据点。
使用上面的代码,我得到了具有重复值的数据,如此处突出显示:
是否因为我需要在 for 循环中执行上述代码(即针对每个唯一 ID)而出现相同的值?我试过了,但似乎没有用,但我很可能也在那里做错了什么。也许是因为我必须有一个包含所有不同条件组合的列,而不是三个单独的列?如何防止重复?
编辑:包括 dput
dput(droplevels(head(individ, 20)))
structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "s109", class = "factor"),
TarDepth = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Mid", class = "factor"),
Side = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "DIFF", class = "factor"),
PRTS = c(0.834416149, 0.716587752, 0.716472204, 0.69970636,
0.699617629, 0.682915685, 0.666703417, 0.616733331, 0.599953582,
0.597570097, 0.595346526, 0.592605137, 0.588598339, 0.583834349,
0.58285897, 0.568965957, 0.567117837, 0.566593729, 0.566063329,
0.550269553), Depth = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "DIFF", class = "factor")), row.names = c(NA,
20L), class = "data.frame")
编辑: 包括 两个 主题 ID 的输出,因为根据评论者的最新解决方案,我有偏见并且 std.error 为 0:
dput(droplevels(head(individ, 32)))
structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("s97", "s98"), class = "factor"),
TarDepth = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("Mid", "Near"
), class = "factor"), Side = structure(c(1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("DIFF",
"SAME"), class = "factor"), PRTS = c(0.851425991, 0.84961243,
0.840487545, 0.839716775, 0.820657432, 0.815991426, 0.807378203,
0.800551856, 0.799805387, 0.787336857, 0.77253443, 0.765844159,
0.751196415, 0.749769895, 0.749374114, 0.649443255, 0.184844206,
0.608819523, 0.117052886, 0.082718123, 0.762629011, 0.050756321,
0.074764508, 0.147296557, 0.428583992, 0.432677868, 0.378136045,
0.135034201, 0.367393051, 0.593182243, 0.723897573, 0.533599005
), Depth = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("DIFF", "SAME"
), class = "factor")), row.names = c(NA, 32L), class = "data.frame")
我们可以用 group_split
拆分数据并循环遍历 list
library(dplyr)
library(purrr)
library(broom)
set.seed(333)
bootresults <- df %>%
group_split(ID, Depth, TarDepth, Side) %>%
map_dfr(~ tidy(boot(data = .x, statistic = myfunc, R = 1000)))
或者另一种选择是 nest_by
set.seed(333)
bootresults <- df %>%
nest_by(ID, Depth, TarDepth, Side) %>%
mutate(new = list(tidy(boot(data = data, statistic = myfunc, R = 1000))))
更新
使用可重现的例子
df <- data.frame(id=c(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2),
cond=c('A', 'A', 'B', 'B', 'A', 'A', 'B', 'B', 'A', 'A', 'B', 'B', 'A', 'A', 'B', 'B'),
comm=c('X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y','X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y'),
measure=c(0.8, 1.1, 0.7, 1.2, 0.9, 2.3, 0.6, 1.1, 0.7, 1.3, 0.6, 1.5, 1.0, 2.1, 0.7, 1.2))
myfunc <- function(data, indices) {
data <- data[indices,]
return(c(mean=mean(unlist(data$measure)),
median=median(unlist(data$measure)),
sd = sd(unlist(data$measure))))
}
df1 <- df %>%
nest_by(cond, comm) %>%
mutate(out = list(tidy(boot(data = data, statistic = myfunc, R = 1000))))
df1
# A tibble: 4 x 4
# Rowwise: cond, comm
cond comm data out
<chr> <chr> <list<tibble[,2]>> <list>
1 A X [4 × 2] <tibble [3 × 4]>
2 A Y [4 × 2] <tibble [3 × 4]>
3 B X [4 × 2] <tibble [3 × 4]>
4 B Y [4 × 2] <tibble [3 × 4]>
那么,我们unnest
library(tidyr)
df1 %>%
ungroup %>%
select(-data) %>%
unnest(out)
# A tibble: 12 x 6
cond comm term statistic bias std.error
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 A X mean 0.85 -0.000250 0.0555
2 A X median 0.85 0.000900 0.0734
3 A X sd 0.129 -0.0246 0.0362
4 A Y mean 1.7 -0.00575 0.253
5 A Y median 1.7 -0.00650 0.374
6 A Y sd 0.589 -0.103 0.162
7 B X mean 0.65 0.000200 0.0258
8 B X median 0.65 0.000550 0.0402
9 B X sd 0.0577 -0.0120 0.0189
10 B Y mean 1.25 0.00260 0.0767
11 B Y median 1.2 0.0337 0.0995
12 B Y sd 0.173 -0.0372 0.0661
更新2
根据 OP 的输入数据,通过将 'measure' 更改为 'PRTS'
来更改函数 'myfunc'myfunc <- function(data, indices) {
data <- data[indices,]
return(c(mean=mean(unlist(data$PRTS)),
median=median(unlist(data$PRTS)),
sd = sd(unlist(data$PRTS))))
}
individ %>%
nest_by(ID, Depth, TarDepth, Side) %>%
mutate(out = list(tidy(boot(data = data, statistic = myfunc, R = 1000)))) %>%
ungroup %>%
select(-data) %>%
unnest(out)
# A tibble: 3 x 8
ID Depth TarDepth Side term statistic bias std.error
<fct> <fct> <fct> <fct> <chr> <dbl> <dbl> <dbl>
1 s109 DIFF Mid DIFF mean 0.630 0.000108 0.0166
2 s109 DIFF Mid DIFF median 0.596 0.00756 0.0229
3 s109 DIFF Mid DIFF sd 0.0738 -0.00361 0.0139