gls 模型的 `emmeans` 不在 `map` 中运行

Question

这个问题的灵感来自, and related to and

我想将多个测试打包到一个工作流程中。

这适用于 glm 模型。

library(dplyr)
library(purrr)
library(emmeans)
library(nlme)

diamond_result <- diamonds %>%
  group_by(cut) %>%
  nest() %>% 
  ungroup %>%
  dplyr::mutate(models=map(data,~glm(price ~ x + y + z + clarity + color,data=.x)),
         jt = map(models, ~emmeans::joint_tests(.x, data = .x$data)),
         means=map(models,~emmeans::emmeans(.x,"color",data=.x$data)),
         p_cont = map(means, ~emmeans::contrast(.x, "pairwise",infer = c(T,T))),
         across(models:p_cont, stats::setNames,  .$cut)) 
> diamond_result$jt
$Ideal
 model term df1 df2 F.ratio p.value
 x            1 Inf 611.626 <.0001 
 y            1 Inf   2.914 0.0878 
 z            1 Inf 100.457 <.0001 
 clarity      7 Inf 800.852 <.0001 
 color        6 Inf 256.796 <.0001 


$Premium
 model term df1 df2  F.ratio p.value
 x            1 Inf 2074.371 <.0001

但是相同的语法不适用于 gls 模型，所以我在 emmeans() 步骤停止了。最后，我希望在 mutate 步骤中 joint_tests、emmeans 和 contrast。

diamonds_emm2 <-  diamonds %>%
  group_by(cut) %>% 
  nest() %>%
  ungroup() %>%
  dplyr::mutate(models=map(data,~gls(price ~ x + y + z + clarity,  
                              weights = varIdent(form = ~ 1|color),
                              data =.x)),
                means=map(models,~emmeans::emmeans(.x,"clarity",data=.x$data)),
                across(models:p_cont, setNames,  .$cut))
    Error: Problem with `mutate()` input `means`.
    x undefined columns selected
    ℹ Input `means` is `map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))`.
    Run `rlang::last_error()` to see where the error occurred.
<error/dplyr:::mutate_error>
Problem with `mutate()` input `means`.
x undefined columns selected
ℹ Input `means` is `map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))`.
Backtrace:
  1. `%>%`(...)
 18. base::.handleSimpleError(...)
 19. dplyr:::h(simpleError(msg, call))

<error/dplyr:::mutate_error>
Problem with `mutate()` input `means`.
x undefined columns selected
ℹ Input `means` is `map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))`.
Backtrace:
     █
  1. ├─`%>%`(...)
  2. ├─dplyr::mutate(...)
  3. ├─dplyr:::mutate.data.frame(...)
  4. │ └─dplyr:::mutate_cols(.data, ...)
  5. │   ├─base::withCallingHandlers(...)
  6. │   └─mask$eval_all_mutate(dots[[i]])
  7. ├─purrr::map(models, ~emmeans::emmeans(.x, "clarity", data = .x$data))
  8. │ └─.f(.x[[i]], ...)
  9. │   └─emmeans::emmeans(.x, "clarity", data = .x$data)
 10. │     ├─base::do.call(ref_grid, args)
 11. │     └─(function (object, at, cov.reduce = mean, cov.keep = get_emm_option("cov.keep"), ...
 12. │       ├─emmeans::recover_data(object, data = as.data.frame(data), ...)
 13. │       └─emmeans:::recover_data.gls(...)
 14. │         └─emmeans:::recover_data.call(...)
 15. │           ├─tbl[, vars, drop = FALSE]
 16. │           └─base::`[.data.frame`(tbl, , vars, drop = FALSE)
 17. │             └─base::stop("undefined columns selected")
 18. └─base::.handleSimpleError(...)
 19.   └─dplyr:::h(simpleError(msg, call))

代码在这一步运行正常。

diamonds_emm <-  diamonds %>%
  group_by(cut) %>% nest() %>%
  mutate(models=map(data,~gls(price ~ x + y + z + clarity,  
                              weights = varIdent(form = ~ 1|color),
                              data =.x)))

如何解决这个问题？谢谢。

更新：Ronak 的回答中的 map2 函数解决了 means 步骤中的问题，但它不会进行成对对比。我错过了什么？

diamonds %>%
  group_by(cut) %>% 
  nest() %>%
  mutate(models=map(data,~gls(price ~ x + y + z + clarity,  
                              weights = varIdent(form = ~ 1|color),
                              data =.x)), 
         means = map2(data, models,~emmeans::emmeans(.y,"clarity",data=.x)),
         p_cont = map2(means, ~emmeans::contrast(.y, "pairwise",infer = c(T,T)))) %>%
  ungroup %>%
  mutate(across(models:p_cont, setNames,  .$cut)) -> result
Error: Problem with `mutate()` input `p_cont`.
x object '.z' not found
ℹ Input `p_cont` is `map(means, ~emmeans::contrast(.y, "pairwise", infer = c(T, T)))`.
ℹ The error occurred in group 1: cut = "Fair".

在 p_cont 步给输入重新命名，例如 ~emmeans::contrast(.z, "pairwise", infer = c(T, T))) 没有解决问题。

Answer 1

使用 map2 在 emmeans 步骤中传递数据和模型。对于 contrasts 和 joint_tests 你可以使用 map.

library(tidyverse)
library(emmeans)
library(nlme)

diamonds %>%
  group_by(cut) %>% 
  nest() %>%
  mutate(models=map(data,~gls(price ~ x + y + z + clarity,  
                              weights = varIdent(form = ~ 1|color),
                              data =.x))) %>%
  ungroup %>%
  mutate(means = map2(data, models,~emmeans(.y,"clarity",data=.x)),
         p_cont = map(means, contrast, "pairwise"), 
         joint_tests = map(means, joint_tests),
         across(models:joint_tests, setNames,  .$cut)) -> result

result

#   cut       data                models      means       p_cont     joint_tests           
#  <ord>     <list>              <named lis> <named lis> <named li> <named list>          
#1 Ideal     <tibble [21,551 × … <gls>       <emmGrid>   <emmGrid>  <summary_emm[,5] [1 ×…
#2 Premium   <tibble [13,791 × … <gls>       <emmGrid>   <emmGrid>  <summary_emm[,5] [1 ×…
#3 Good      <tibble [4,906 × 9… <gls>       <emmGrid>   <emmGrid>  <summary_emm[,5] [1 ×…
#4 Very Good <tibble [12,082 × … <gls>       <emmGrid>   <emmGrid>  <summary_emm[,5] [1 ×…
#5 Fair      <tibble [1,610 × 9… <gls>       <emmGrid>   <emmGrid>  <summary_emm[,5] [1 ×…

Answer 2

如果你不坚持使用purrr::map系列，我建议使用新的（dplyr 1.0.0）rowwise style。它不那么令人困惑，因为您可以按原样使用 variable/column 名称，而无需选择正确的 map 函数并计算出 lambda 表示法。您只需要将函数调用包装在 list(...) 中。只有最后一次调用 across 需要在未分组的 data.frame.

上调用

library(tidyverse)
library(emmeans)
library(nlme)

diamonds_emm_row <- diamonds %>%
  nest_by(cut) %>% 
  dplyr::mutate(models = list(gls(price ~ x + y + z + clarity,  
                                  weights = varIdent(form = ~ 1|color),
                                  data = data)),
                jt = list(joint_tests(models, data = data)),
                means = list(emmeans::emmeans(models, "clarity", data = data)),
                p_cont = list(emmeans::contrast(means, "pairwise", infer = c(T,T)))) %>% 
  ungroup %>% 
  mutate(across(models:p_cont, setNames, .$cut))

diamonds_emm_row
#> # A tibble: 5 x 6
#>   cut                   data models      jt                 means     p_cont    
#>   <ord>    <list<tbl_df[,9]> <named lis> <named list>       <named l> <named li>
#> 1 Fair           [1,610 × 9] <gls>       <summary_emm[,5] … <emmGrid> <emmGrid> 
#> 2 Good           [4,906 × 9] <gls>       <summary_emm[,5] … <emmGrid> <emmGrid> 
#> 3 Very Go…      [12,082 × 9] <gls>       <summary_emm[,5] … <emmGrid> <emmGrid> 
#> 4 Premium       [13,791 × 9] <gls>       <summary_emm[,5] … <emmGrid> <emmGrid> 
#> 5 Ideal         [21,551 × 9] <gls>       <summary_emm[,5] … <emmGrid> <emmGrid>

^{由 reprex package (v0.3.0)}

于 2021 年 1 月 1 日创建

这与使用 purrr::map 产生的结果大致相同。 “或多或少”，因为 identical() 不会显示它，因为函数调用保存在属性（和其他地方）中，并且取决于您是否使用 {dplyr} 的 rowwise样式或 map lambda 表示法。

diamonds_emm_map <- diamonds %>%
  nest_by(cut) %>% 
  ungroup() %>%
  dplyr::mutate(models=map(data,~gls(price ~ x + y + z + clarity,  
                                     weights = varIdent(form = ~ 1|color),
                                     data =.x)),
                jt = map2(models, data, ~ joint_tests(.x, data = .y)),
                means = map2(data, models, ~ emmeans::emmeans(.y, "clarity", data = .x)),
                p_cont = map(means, ~emmeans::contrast(.x, "pairwise", infer = c(T,T))),
                across(models:p_cont, setNames, .$cut))

map2(diamonds_emm_row, diamonds_emm_map, all.equal, check.attributes = FALSE)
#> $cut
#> [1] TRUE
#> 
#> $data
#> [1] TRUE
#> 
#> $models
#> [1] "Component \"Fair\": Component \"call\": target, current do not match when deparsed"     
#> [2] "Component \"Good\": Component \"call\": target, current do not match when deparsed"     
#> [3] "Component \"Very Good\": Component \"call\": target, current do not match when deparsed"
#> [4] "Component \"Premium\": Component \"call\": target, current do not match when deparsed"  
#> [5] "Component \"Ideal\": Component \"call\": target, current do not match when deparsed"    
#> 
#> $jt
#> [1] TRUE
#> 
#> $means
#> [1] TRUE
#> 
#> $p_cont
#> [1] TRUE

^{由 reprex package (v0.3.0)}

于 2021 年 1 月 1 日创建

gls 模型的 `emmeans` 不在 `map` 中 运行

`emmeans` for a gls model doesn't run inside `map`

r

dplyr

purrr

emmeans

gls 模型的 `emmeans` 不在 `map` 中运行