如何在 dplyr::across() 中使用 returns 多个值的函数？

Question

我想对多个列执行多项操作，我可以使用 dplyr::across() 来实现：

library(tidyverse)

df = tibble(x=1:5, p1=x*2, p2=x*4, p3=x*5)
r1 = df %>% 
    mutate(across(starts_with("p"), c(inf=~.x-1, sup=~.x+1)))
r1
#> # A tibble: 5 x 10
#>       x    p1    p2    p3 p1_inf p1_sup p2_inf p2_sup p3_inf p3_sup
#>   <int> <dbl> <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
#> 1     1     2     4     5      1      3      3      5      4      6
#> 2     2     4     8    10      3      5      7      9      9     11
#> 3     3     6    12    15      5      7     11     13     14     16
#> 4     4     8    16    20      7      9     15     17     19     21
#> 5     5    10    20    25      9     11     19     21     24     26
names(r1)
#>  [1] "x"      "p1"     "p2"     "p3"     "p1_inf" "p1_sup" "p2_inf" "p2_sup"
#>  [9] "p3_inf" "p3_sup"

但是，如果函数计算很多东西，这不是很可扩展，因为它会被评估两次。

相反，如果我可以使用一个函数来计算需要计算的东西，然后 returns 2 个（或更多）结果的列表，那就太好了。

例如，考虑这个例子：

#perform heavy calculation on x2 and return 2 flavours of it
f = function(x) {
    x2=x #wow, such heavy, very calculate
    Sys.sleep(1)
    data.frame(inf=x2-10, sup=x2+10)
}

r2 = df %>% 
    mutate(across(starts_with("p"), f, .names="{.col}_{.fn}"))
r2
#> # A tibble: 5 x 7
#>       x    p1    p2    p3 p1_1$inf  $sup p2_1$inf  $sup p3_1$inf  $sup
#>   <int> <dbl> <dbl> <dbl>    <dbl> <dbl>    <dbl> <dbl>    <dbl> <dbl>
#> 1     1     2     4     5       -8    12       -6    14       -5    15
#> 2     2     4     8    10       -6    14       -2    18        0    20
#> 3     3     6    12    15       -4    16        2    22        5    25
#> 4     4     8    16    20       -2    18        6    26       10    30
#> 5     5    10    20    25        0    20       10    30       15    35
names(r2)
#> [1] "x"    "p1"   "p2"   "p3"   "p1_1" "p2_1" "p3_1"
map_chr(r2, class)
#>            x           p1           p2           p3         p1_1         p2_1 
#>    "integer"    "numeric"    "numeric"    "numeric" "data.frame" "data.frame" 
#>         p3_1 
#> "data.frame"

^{由 reprex package (v2.0.1)}

于 2021-10-25 创建

使用 rbind() 而不是 data.frame() 会得到相同的结果，但名称略有不同（p1_1$inf 变为 p1_1[,"inf"]）和不同的 class ( data.frame 变成 c("matrix", "array").

另外，当使用单个函数时，{.fn}是函数的位置，因此存在命名问题。

我也试过 unnest() 结果，但没有成功。

有没有办法使用 across() 中的函数获得我的第一个输出的准确结果？

Answer 1

也许这对你有帮助？

library(tidyverse)

f = function(x, y) {
  x2=x
  tibble(!!paste0(y, '_inf') := x2-10, 
         !!paste0(y, '_sup') := x2+10)
}

imap_dfc(select(df, starts_with('p')), f)

#  p1_inf p1_sup p2_inf p2_sup p3_inf p3_sup
#   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
#1     -8     12     -6     14     -5     15
#2     -6     14     -2     18      0     20
#3     -4     16      2     22      5     25
#4     -2     18      6     26     10     30
#5      0     20     10     30     15     35

绑定到原始 df。

bind_cols(df %>% select(-starts_with('p')), 
          imap_dfc(select(df, starts_with('p')), f))

Answer 2

实际上，由于您已经完成了繁重的计算，导致了嵌套数据框，因此，您只需将其转换为平面形式，也许一些 mutate()s + rename 可以有帮助吗？

r2 <- df %>% 
mutate(across(2:4, f, .names="{.col}_{.fn}")) %>% 
mutate(across(5:7, .names = ("{.col}_inf"), .fn = ~ .x[,1] )  ) %>%
mutate(across(5:7, .names = ("{.col}_sup"), .fn = ~ .x[,2] )  ) %>% 
rename_with(.fn = ~ gsub("_1_", "_", .x)) %>% 
select(-contains("_1"))

> r2
# A tibble: 5 x 10
      x    p1    p2    p3 p1_inf p2_inf p3_inf p1_sup p2_sup p3_sup
  <int> <dbl> <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
1     1     2     4     5     -8     -6     -5     12     14     15
2     2     4     8    10     -6     -2      0     14     18     20
3     3     6    12    15     -4      2      5     16     22     25
4     4     8    16    20     -2      6     10     18     26     30
5     5    10    20    25      0     10     15     20     30     35

Answer 3

实际上，dplyr 的 Github 上的一个问题已经考虑到这一点：https://github.com/tidyverse/dplyr/issues/5563#issuecomment-721769342。

在那里，@romainfrancois 给出了一个非常有用的解决方案，因为这个 unpackross() 函数：

library(tidyverse)
f = function(x) tibble(inf=x-10, sup=x+10)
unpackross = function(...) {
    out = across(...)
    tidyr::unpack(out, names(out), names_sep = "_")
}

df = tibble(x=1:5, p1=x*2, p2=x*4, p3=x*5)
r2 = df %>% 
    mutate(unpackross(starts_with("p"), f, .names="{.col}_{.fn}"))
r2
#> # A tibble: 5 x 10
#>       x    p1    p2    p3 p1_1_inf p1_1_sup p2_1_inf p2_1_sup p3_1_inf p3_1_sup
#>   <int> <dbl> <dbl> <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#> 1     1     2     4     5       -8       12       -6       14       -5       15
#> 2     2     4     8    10       -6       14       -2       18        0       20
#> 3     3     6    12    15       -4       16        2       22        5       25
#> 4     4     8    16    20       -2       18        6       26       10       30
#> 5     5    10    20    25        0       20       10       30       15       35
names(r2)
#>  [1] "x"        "p1"       "p2"       "p3"       "p1_1_inf" "p1_1_sup"
#>  [7] "p2_1_inf" "p2_1_sup" "p3_1_inf" "p3_1_sup"
map_chr(r2, class)
#>         x        p1        p2        p3  p1_1_inf  p1_1_sup  p2_1_inf  p2_1_sup 
#> "integer" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" 
#>  p3_1_inf  p3_1_sup 
#> "numeric" "numeric"

^{由 reprex package (v2.0.1)}

于 2021-10-26 创建

希望有一天across()会有一个unpack参数！（如果您同意，请为我的建议添加 +1 here）

如何在 dplyr::across() 中使用 returns 多个值的函数？

How to use a function that returns multiple values in dplyr::across()?

r

dplyr

across