gls 模型的 `emmeans` 不在 `map` 中 运行
`emmeans` for a gls model doesn't run inside `map`
这个问题的灵感来自, and related to and
我想将多个测试打包到一个工作流程中。
这适用于 glm
模型。
library(dplyr)
library(purrr)
library(emmeans)
library(nlme)
diamond_result <- diamonds %>%
group_by(cut) %>%
nest() %>%
ungroup %>%
dplyr::mutate(models=map(data,~glm(price ~ x + y + z + clarity + color,data=.x)),
jt = map(models, ~emmeans::joint_tests(.x, data = .x$data)),
means=map(models,~emmeans::emmeans(.x,"color",data=.x$data)),
p_cont = map(means, ~emmeans::contrast(.x, "pairwise",infer = c(T,T))),
across(models:p_cont, stats::setNames, .$cut))
> diamond_result$jt
$Ideal
model term df1 df2 F.ratio p.value
x 1 Inf 611.626 <.0001
y 1 Inf 2.914 0.0878
z 1 Inf 100.457 <.0001
clarity 7 Inf 800.852 <.0001
color 6 Inf 256.796 <.0001
$Premium
model term df1 df2 F.ratio p.value
x 1 Inf 2074.371 <.0001
但是相同的语法不适用于 gls
模型,所以我在 emmeans()
步骤停止了。最后,我希望在 mutate
步骤中 joint_tests
、emmeans
和 contrast
。
diamonds_emm2 <- diamonds %>%
group_by(cut) %>%
nest() %>%
ungroup() %>%
dplyr::mutate(models=map(data,~gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data =.x)),
means=map(models,~emmeans::emmeans(.x,"clarity",data=.x$data)),
across(models:p_cont, setNames, .$cut))
Error: Problem with `mutate()` input `means`.
x undefined columns selected
ℹ Input `means` is `map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))`.
Run `rlang::last_error()` to see where the error occurred.
<error/dplyr:::mutate_error>
Problem with `mutate()` input `means`.
x undefined columns selected
ℹ Input `means` is `map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))`.
Backtrace:
1. `%>%`(...)
18. base::.handleSimpleError(...)
19. dplyr:::h(simpleError(msg, call))
<error/dplyr:::mutate_error>
Problem with `mutate()` input `means`.
x undefined columns selected
ℹ Input `means` is `map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))`.
Backtrace:
█
1. ├─`%>%`(...)
2. ├─dplyr::mutate(...)
3. ├─dplyr:::mutate.data.frame(...)
4. │ └─dplyr:::mutate_cols(.data, ...)
5. │ ├─base::withCallingHandlers(...)
6. │ └─mask$eval_all_mutate(dots[[i]])
7. ├─purrr::map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))
8. │ └─.f(.x[[i]], ...)
9. │ └─emmeans::emmeans(.x, "clarity", data = .x$data)
10. │ ├─base::do.call(ref_grid, args)
11. │ └─(function (object, at, cov.reduce = mean, cov.keep = get_emm_option("cov.keep"), ...
12. │ ├─emmeans::recover_data(object, data = as.data.frame(data), ...)
13. │ └─emmeans:::recover_data.gls(...)
14. │ └─emmeans:::recover_data.call(...)
15. │ ├─tbl[, vars, drop = FALSE]
16. │ └─base::`[.data.frame`(tbl, , vars, drop = FALSE)
17. │ └─base::stop("undefined columns selected")
18. └─base::.handleSimpleError(...)
19. └─dplyr:::h(simpleError(msg, call))
代码在这一步运行正常。
diamonds_emm <- diamonds %>%
group_by(cut) %>% nest() %>%
mutate(models=map(data,~gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data =.x)))
如何解决这个问题?谢谢。
更新:Ronak 的回答中的 map2
函数解决了 means
步骤中的问题,但它不会进行成对对比。我错过了什么?
diamonds %>%
group_by(cut) %>%
nest() %>%
mutate(models=map(data,~gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data =.x)),
means = map2(data, models,~emmeans::emmeans(.y,"clarity",data=.x)),
p_cont = map2(means, ~emmeans::contrast(.y, "pairwise",infer = c(T,T)))) %>%
ungroup %>%
mutate(across(models:p_cont, setNames, .$cut)) -> result
Error: Problem with `mutate()` input `p_cont`.
x object '.z' not found
ℹ Input `p_cont` is `map(means, ~emmeans::contrast(.y, "pairwise", infer = c(T, T)))`.
ℹ The error occurred in group 1: cut = "Fair".
在 p_cont
步给输入重新命名,例如 ~emmeans::contrast(.z, "pairwise", infer = c(T, T)))
没有解决问题。
使用 map2
在 emmeans
步骤中传递数据和模型。对于 contrasts
和 joint_tests
你可以使用 map
.
library(tidyverse)
library(emmeans)
library(nlme)
diamonds %>%
group_by(cut) %>%
nest() %>%
mutate(models=map(data,~gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data =.x))) %>%
ungroup %>%
mutate(means = map2(data, models,~emmeans(.y,"clarity",data=.x)),
p_cont = map(means, contrast, "pairwise"),
joint_tests = map(means, joint_tests),
across(models:joint_tests, setNames, .$cut)) -> result
result
# cut data models means p_cont joint_tests
# <ord> <list> <named lis> <named lis> <named li> <named list>
#1 Ideal <tibble [21,551 × … <gls> <emmGrid> <emmGrid> <summary_emm[,5] [1 ×…
#2 Premium <tibble [13,791 × … <gls> <emmGrid> <emmGrid> <summary_emm[,5] [1 ×…
#3 Good <tibble [4,906 × 9… <gls> <emmGrid> <emmGrid> <summary_emm[,5] [1 ×…
#4 Very Good <tibble [12,082 × … <gls> <emmGrid> <emmGrid> <summary_emm[,5] [1 ×…
#5 Fair <tibble [1,610 × 9… <gls> <emmGrid> <emmGrid> <summary_emm[,5] [1 ×…
如果你不坚持使用purrr::map
系列,我建议使用新的(dplyr 1.0.0)rowwise
style。它不那么令人困惑,因为您可以按原样使用 variable/column 名称,而无需选择正确的 map
函数并计算出 lambda 表示法。您只需要将函数调用包装在 list(...)
中。只有最后一次调用 across
需要在未分组的 data.frame
.
上调用
library(tidyverse)
library(emmeans)
library(nlme)
diamonds_emm_row <- diamonds %>%
nest_by(cut) %>%
dplyr::mutate(models = list(gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data = data)),
jt = list(joint_tests(models, data = data)),
means = list(emmeans::emmeans(models, "clarity", data = data)),
p_cont = list(emmeans::contrast(means, "pairwise", infer = c(T,T)))) %>%
ungroup %>%
mutate(across(models:p_cont, setNames, .$cut))
diamonds_emm_row
#> # A tibble: 5 x 6
#> cut data models jt means p_cont
#> <ord> <list<tbl_df[,9]> <named lis> <named list> <named l> <named li>
#> 1 Fair [1,610 × 9] <gls> <summary_emm[,5] … <emmGrid> <emmGrid>
#> 2 Good [4,906 × 9] <gls> <summary_emm[,5] … <emmGrid> <emmGrid>
#> 3 Very Go… [12,082 × 9] <gls> <summary_emm[,5] … <emmGrid> <emmGrid>
#> 4 Premium [13,791 × 9] <gls> <summary_emm[,5] … <emmGrid> <emmGrid>
#> 5 Ideal [21,551 × 9] <gls> <summary_emm[,5] … <emmGrid> <emmGrid>
由 reprex package (v0.3.0)
于 2021 年 1 月 1 日创建
这与使用 purrr::map
产生的结果大致相同。 “或多或少”,因为 identical()
不会显示它,因为函数调用保存在属性(和其他地方)中,并且取决于您是否使用 {dplyr} 的 rowwise
样式或 map
lambda 表示法。
diamonds_emm_map <- diamonds %>%
nest_by(cut) %>%
ungroup() %>%
dplyr::mutate(models=map(data,~gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data =.x)),
jt = map2(models, data, ~ joint_tests(.x, data = .y)),
means = map2(data, models, ~ emmeans::emmeans(.y, "clarity", data = .x)),
p_cont = map(means, ~emmeans::contrast(.x, "pairwise", infer = c(T,T))),
across(models:p_cont, setNames, .$cut))
map2(diamonds_emm_row, diamonds_emm_map, all.equal, check.attributes = FALSE)
#> $cut
#> [1] TRUE
#>
#> $data
#> [1] TRUE
#>
#> $models
#> [1] "Component \"Fair\": Component \"call\": target, current do not match when deparsed"
#> [2] "Component \"Good\": Component \"call\": target, current do not match when deparsed"
#> [3] "Component \"Very Good\": Component \"call\": target, current do not match when deparsed"
#> [4] "Component \"Premium\": Component \"call\": target, current do not match when deparsed"
#> [5] "Component \"Ideal\": Component \"call\": target, current do not match when deparsed"
#>
#> $jt
#> [1] TRUE
#>
#> $means
#> [1] TRUE
#>
#> $p_cont
#> [1] TRUE
由 reprex package (v0.3.0)
于 2021 年 1 月 1 日创建
这个问题的灵感来自
我想将多个测试打包到一个工作流程中。
这适用于 glm
模型。
library(dplyr)
library(purrr)
library(emmeans)
library(nlme)
diamond_result <- diamonds %>%
group_by(cut) %>%
nest() %>%
ungroup %>%
dplyr::mutate(models=map(data,~glm(price ~ x + y + z + clarity + color,data=.x)),
jt = map(models, ~emmeans::joint_tests(.x, data = .x$data)),
means=map(models,~emmeans::emmeans(.x,"color",data=.x$data)),
p_cont = map(means, ~emmeans::contrast(.x, "pairwise",infer = c(T,T))),
across(models:p_cont, stats::setNames, .$cut))
> diamond_result$jt
$Ideal
model term df1 df2 F.ratio p.value
x 1 Inf 611.626 <.0001
y 1 Inf 2.914 0.0878
z 1 Inf 100.457 <.0001
clarity 7 Inf 800.852 <.0001
color 6 Inf 256.796 <.0001
$Premium
model term df1 df2 F.ratio p.value
x 1 Inf 2074.371 <.0001
但是相同的语法不适用于 gls
模型,所以我在 emmeans()
步骤停止了。最后,我希望在 mutate
步骤中 joint_tests
、emmeans
和 contrast
。
diamonds_emm2 <- diamonds %>%
group_by(cut) %>%
nest() %>%
ungroup() %>%
dplyr::mutate(models=map(data,~gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data =.x)),
means=map(models,~emmeans::emmeans(.x,"clarity",data=.x$data)),
across(models:p_cont, setNames, .$cut))
Error: Problem with `mutate()` input `means`.
x undefined columns selected
ℹ Input `means` is `map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))`.
Run `rlang::last_error()` to see where the error occurred.
<error/dplyr:::mutate_error>
Problem with `mutate()` input `means`.
x undefined columns selected
ℹ Input `means` is `map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))`.
Backtrace:
1. `%>%`(...)
18. base::.handleSimpleError(...)
19. dplyr:::h(simpleError(msg, call))
<error/dplyr:::mutate_error>
Problem with `mutate()` input `means`.
x undefined columns selected
ℹ Input `means` is `map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))`.
Backtrace:
█
1. ├─`%>%`(...)
2. ├─dplyr::mutate(...)
3. ├─dplyr:::mutate.data.frame(...)
4. │ └─dplyr:::mutate_cols(.data, ...)
5. │ ├─base::withCallingHandlers(...)
6. │ └─mask$eval_all_mutate(dots[[i]])
7. ├─purrr::map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))
8. │ └─.f(.x[[i]], ...)
9. │ └─emmeans::emmeans(.x, "clarity", data = .x$data)
10. │ ├─base::do.call(ref_grid, args)
11. │ └─(function (object, at, cov.reduce = mean, cov.keep = get_emm_option("cov.keep"), ...
12. │ ├─emmeans::recover_data(object, data = as.data.frame(data), ...)
13. │ └─emmeans:::recover_data.gls(...)
14. │ └─emmeans:::recover_data.call(...)
15. │ ├─tbl[, vars, drop = FALSE]
16. │ └─base::`[.data.frame`(tbl, , vars, drop = FALSE)
17. │ └─base::stop("undefined columns selected")
18. └─base::.handleSimpleError(...)
19. └─dplyr:::h(simpleError(msg, call))
代码在这一步运行正常。
diamonds_emm <- diamonds %>%
group_by(cut) %>% nest() %>%
mutate(models=map(data,~gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data =.x)))
如何解决这个问题?谢谢。
更新:Ronak 的回答中的 map2
函数解决了 means
步骤中的问题,但它不会进行成对对比。我错过了什么?
diamonds %>%
group_by(cut) %>%
nest() %>%
mutate(models=map(data,~gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data =.x)),
means = map2(data, models,~emmeans::emmeans(.y,"clarity",data=.x)),
p_cont = map2(means, ~emmeans::contrast(.y, "pairwise",infer = c(T,T)))) %>%
ungroup %>%
mutate(across(models:p_cont, setNames, .$cut)) -> result
Error: Problem with `mutate()` input `p_cont`.
x object '.z' not found
ℹ Input `p_cont` is `map(means, ~emmeans::contrast(.y, "pairwise", infer = c(T, T)))`.
ℹ The error occurred in group 1: cut = "Fair".
在 p_cont
步给输入重新命名,例如 ~emmeans::contrast(.z, "pairwise", infer = c(T, T)))
没有解决问题。
使用 map2
在 emmeans
步骤中传递数据和模型。对于 contrasts
和 joint_tests
你可以使用 map
.
library(tidyverse)
library(emmeans)
library(nlme)
diamonds %>%
group_by(cut) %>%
nest() %>%
mutate(models=map(data,~gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data =.x))) %>%
ungroup %>%
mutate(means = map2(data, models,~emmeans(.y,"clarity",data=.x)),
p_cont = map(means, contrast, "pairwise"),
joint_tests = map(means, joint_tests),
across(models:joint_tests, setNames, .$cut)) -> result
result
# cut data models means p_cont joint_tests
# <ord> <list> <named lis> <named lis> <named li> <named list>
#1 Ideal <tibble [21,551 × … <gls> <emmGrid> <emmGrid> <summary_emm[,5] [1 ×…
#2 Premium <tibble [13,791 × … <gls> <emmGrid> <emmGrid> <summary_emm[,5] [1 ×…
#3 Good <tibble [4,906 × 9… <gls> <emmGrid> <emmGrid> <summary_emm[,5] [1 ×…
#4 Very Good <tibble [12,082 × … <gls> <emmGrid> <emmGrid> <summary_emm[,5] [1 ×…
#5 Fair <tibble [1,610 × 9… <gls> <emmGrid> <emmGrid> <summary_emm[,5] [1 ×…
如果你不坚持使用purrr::map
系列,我建议使用新的(dplyr 1.0.0)rowwise
style。它不那么令人困惑,因为您可以按原样使用 variable/column 名称,而无需选择正确的 map
函数并计算出 lambda 表示法。您只需要将函数调用包装在 list(...)
中。只有最后一次调用 across
需要在未分组的 data.frame
.
library(tidyverse)
library(emmeans)
library(nlme)
diamonds_emm_row <- diamonds %>%
nest_by(cut) %>%
dplyr::mutate(models = list(gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data = data)),
jt = list(joint_tests(models, data = data)),
means = list(emmeans::emmeans(models, "clarity", data = data)),
p_cont = list(emmeans::contrast(means, "pairwise", infer = c(T,T)))) %>%
ungroup %>%
mutate(across(models:p_cont, setNames, .$cut))
diamonds_emm_row
#> # A tibble: 5 x 6
#> cut data models jt means p_cont
#> <ord> <list<tbl_df[,9]> <named lis> <named list> <named l> <named li>
#> 1 Fair [1,610 × 9] <gls> <summary_emm[,5] … <emmGrid> <emmGrid>
#> 2 Good [4,906 × 9] <gls> <summary_emm[,5] … <emmGrid> <emmGrid>
#> 3 Very Go… [12,082 × 9] <gls> <summary_emm[,5] … <emmGrid> <emmGrid>
#> 4 Premium [13,791 × 9] <gls> <summary_emm[,5] … <emmGrid> <emmGrid>
#> 5 Ideal [21,551 × 9] <gls> <summary_emm[,5] … <emmGrid> <emmGrid>
由 reprex package (v0.3.0)
于 2021 年 1 月 1 日创建这与使用 purrr::map
产生的结果大致相同。 “或多或少”,因为 identical()
不会显示它,因为函数调用保存在属性(和其他地方)中,并且取决于您是否使用 {dplyr} 的 rowwise
样式或 map
lambda 表示法。
diamonds_emm_map <- diamonds %>%
nest_by(cut) %>%
ungroup() %>%
dplyr::mutate(models=map(data,~gls(price ~ x + y + z + clarity,
weights = varIdent(form = ~ 1|color),
data =.x)),
jt = map2(models, data, ~ joint_tests(.x, data = .y)),
means = map2(data, models, ~ emmeans::emmeans(.y, "clarity", data = .x)),
p_cont = map(means, ~emmeans::contrast(.x, "pairwise", infer = c(T,T))),
across(models:p_cont, setNames, .$cut))
map2(diamonds_emm_row, diamonds_emm_map, all.equal, check.attributes = FALSE)
#> $cut
#> [1] TRUE
#>
#> $data
#> [1] TRUE
#>
#> $models
#> [1] "Component \"Fair\": Component \"call\": target, current do not match when deparsed"
#> [2] "Component \"Good\": Component \"call\": target, current do not match when deparsed"
#> [3] "Component \"Very Good\": Component \"call\": target, current do not match when deparsed"
#> [4] "Component \"Premium\": Component \"call\": target, current do not match when deparsed"
#> [5] "Component \"Ideal\": Component \"call\": target, current do not match when deparsed"
#>
#> $jt
#> [1] TRUE
#>
#> $means
#> [1] TRUE
#>
#> $p_cont
#> [1] TRUE
由 reprex package (v0.3.0)
于 2021 年 1 月 1 日创建