不能在 r 中的 across() 中使用 dplyr 编程语法

can not use dplyr programming syntax in across() in r

我想使用 dplyr 编程语法(结合 !!:=)来评估 .fn 参数中的函数但失败了。 代码如下:

library(zoo)
library(glue)

aa = structure(list(region = c(1, 2, 3, 4), co_mean = c(5, 5, 5, 5
), o3_mean = c(5, 5, 5, 5), pm2.5_mean = c(5, 5, 5, 5)), row.names = c(NA, 
                                                                       -4L), class = c("tbl_df", "tbl", "data.frame"))


for (i in 1:3) {
  
fun_name_1 = glue('lag{i}')
fun_name_2 = glue('lag0{i}')
aa = aa %>% group_by(region) %>% 
  mutate(across(.cols = contains('mean'), 
                .fns = list(!!fun_name_1 := ~lag(., i), # ERROR OCCUR AT HERE
                            !!fun_name_2 := ~ rollmeanr(., i)),
                .names = '{.col}_{.fn}'))
aa
}

不知道怎么解决

非常感谢任何帮助!

======更新========

我的新代码和新错误:

library(zoo)
library(glue)

aa = structure(list(region = c(1, 2, 3, 4), co_mean = c(5, 5, 5, 5
), o3_mean = c(5, 5, 5, 5), pm2.5_mean = c(5, 5, 5, 5)), row.names = c(NA, 
                                                                       -4L), class = c("tbl_df", "tbl", "data.frame"))


for (i in 1:3) {
 # i <- 1
  fun_name_1 = glue('lag{i}')
  fun_name_2 = glue('lag0{i}')
  aa %>%
    group_by(region) %>% 
    mutate(across(.cols = contains('mean'), 
                  .fns = setNames(list(~lag(., i),
                                       ~ rollmeanr(., i)), c(fun_name_1, fun_name_2)),
                  .names = '{.col}_{.fn}'))
aa
}

# Error: Problem with `mutate()` input `..1`.
# x 'names' attribute [6] must be the same length as the vector [5]
# i Input `..1` is `across(...)`.
# i The error occurred in group 1: region = 1.
# Run `rlang::last_error()` to see where the error occurred.

它将作为命名 list 工作。首先传递一组是非常有意义的(假设OP的原始示例数据每组有多行)

i <- 1
fun_name_1 = glue('lag{i}')
fun_name_2 = glue('lag0{i}')
aa %>%
  group_by(region) %>% 
  mutate(across(.cols = contains('mean'), 
               .fns = setNames(list(~lag(., i),
                         ~ rollmeanr(., i)), c(fun_name_1, fun_name_2)),
                .names = '{.col}_{.fn}'))

-输出

# A tibble: 4 x 10
# Groups:   region [4]
#  region co_mean o3_mean pm2.5_mean co_mean_lag1 co_mean_lag01 o3_mean_lag1 o3_mean_lag01 pm2.5_mean_lag1 pm2.5_mean_lag01
#   <dbl>   <dbl>   <dbl>      <dbl>        <dbl>         <dbl>        <dbl>         <dbl>           <dbl>            <dbl>
#1      1       5       5          5           NA             5           NA             5              NA                5
#2      2       5       5          5           NA             5           NA             5              NA                5
#3      3       5       5          5           NA             5           NA             5              NA                5
#4      4       5       5          5           NA             5           NA             5              NA         

可以在rollmean

中指定fill = TRUE
aa %>%
   group_by(region) %>% 
   mutate(across(.cols = contains('mean'), 
                .fns = setNames(list(~lag(., i),
                          ~ rollmeanr(., i, fill = TRUE)), c(fun_name_1, fun_name_2)),
                 .names = '{.col}_{.fn}'))

首先我认为你的数据不应该被分组,至少对于共享的数据来说,在组中只有 1 行然后计算 lag 值和滚动平均值是没有意义的它。

您可以在 across 中使用 .names 获得适当的列名,并使用 map_dfc 将所有内容合并到一个数据框中。

library(dplyr)
library(purrr)
library(zoo)

map_dfc(1:3, function(x) {
  aa %>% 
    transmute(across(.cols = contains('mean'), 
                  .fns = list(lag = ~lag(., x), 
                              lag0 = ~rollmeanr(., x, fill = NA)), 
                   .names = sprintf('{fn}_{col}_%d', x)))
  })

如果您在其他数据集上尝试,可以添加 group_by(Region)