如何在 r 中为 dplyr::distinct() 命令使用外部变量列表?

How does one use an external list of variables for dplyr::distinct() command in r?

如何在 r 中为 dplyr::distinct() 命令使用外部变量列表?

例如,我想使用以下变量的外部列表作为 mtcars 数据集的不同命令的基础:

## creates external_list_of_vars_df
# ---- NOTE: creates object
external_list_of_vars_df <- 
    data.frame(
      external_list_of_vars_df = 
        c("gear", "carb", "am")
      )
# ---- NOTE: turns object into tibble
external_list_of_vars_df <- 
  as_tibble(external_list_of_vars_df)
# ---- NOTE: displays data
external_list_of_vars_df
> external_list_of_vars_df
# A tibble: 3 × 1
  external_list_of_vars_df
  <chr>                   
1 gear                    
2 carb                    
3 am   

我可以使用需要手动输入感兴趣的变量的长方法来完成此任务:

> mtcars_distinct_df_long
# A tibble: 13 × 11
     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1  21       6 160     110  3.9   2.62  16.5     0     1     4     4
 2  22.8     4 108      93  3.85  2.32  18.6     1     1     4     1
 3  21.4     6 258     110  3.08  3.22  19.4     1     0     3     1
 4  18.7     8 360     175  3.15  3.44  17.0     0     0     3     2
 5  14.3     8 360     245  3.21  3.57  15.8     0     0     3     4
 6  24.4     4 147.     62  3.69  3.19  20       1     0     4     2
 7  19.2     6 168.    123  3.92  3.44  18.3     1     0     4     4
 8  16.4     8 276.    180  3.07  4.07  17.4     0     0     3     3
 9  30.4     4  75.7    52  4.93  1.62  18.5     1     1     4     2
10  26       4 120.     91  4.43  2.14  16.7     0     1     5     2
11  15.8     8 351     264  4.22  3.17  14.5     0     1     5     4
12  19.7     6 145     175  3.62  2.77  15.5     0     1     5     6
13  15       8 301     335  3.54  3.57  14.6     0     1     5     8

当我尝试使用快捷方式时,它不起作用:

## my short way to create mtcars_distinct_df_external, by inputting variables manually
# ---- NOTE: creates object
mtcars_distinct_df_external <- 
  as_tibble(
    mtcars %>% 
      distinct(vars(external_list_of_vars_df$external_list_of_vars_df), .keep_all = TRUE)
  )
# ---- NOTE: displays data
mtcars_distinct_df_external
> mtcars_distinct_df_external
# A tibble: 1 × 12
    mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb `vars(external_list_of_vars_df$external_li…`
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <quos>                                      
1    21     6   160   110   3.9  2.62  16.5     0     1     4     4 external_list_of_vars_df$external_list_of_v…
> # ---- NOTE: does not work

这个任务可行吗?如果可以,怎么做?

提前致谢。



这是我用来生成示例的代码:

# how to use external list of vars for dplyr::distinct() cammand

## loads package(s)
if(!require(tidyverse)){install.packages("tidyverse")}

## data for example
mtcars

## creates external_list_of_vars_df
# ---- NOTE: creates object
external_list_of_vars_df <- 
    data.frame(
      external_list_of_vars_df = 
        c("gear", "carb", "am")
      )
# ---- NOTE: turns object into tibble
external_list_of_vars_df <- 
  as_tibble(external_list_of_vars_df)
# ---- NOTE: displays data
external_list_of_vars_df

## long way to create mtcars_distinct_df, by inputting variables manually
# ---- NOTE: creates object
mtcars_distinct_df_long <- 
  as_tibble(
    mtcars %>% 
      distinct(gear, carb, am, .keep_all = TRUE)
  )
# ---- NOTE: displays data
mtcars_distinct_df_long

## my short way to create mtcars_distinct_df_external, by inputting variables manually
# ---- NOTE: creates object
mtcars_distinct_df_external <- 
  as_tibble(
    mtcars %>% 
      distinct(vars(external_list_of_vars_df$external_list_of_vars_df), .keep_all = TRUE)
  )
# ---- NOTE: displays data
mtcars_distinct_df_external
# ---- NOTE: does not work
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

mtcars %>% 
  as_tibble() %>% 
  distinct(gear, carb, am, .keep_all = TRUE)
#> # A tibble: 13 x 11
#>      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1  21       6 160     110  3.9   2.62  16.5     0     1     4     4
#>  2  22.8     4 108      93  3.85  2.32  18.6     1     1     4     1
#>  3  21.4     6 258     110  3.08  3.22  19.4     1     0     3     1
#>  4  18.7     8 360     175  3.15  3.44  17.0     0     0     3     2
#>  5  14.3     8 360     245  3.21  3.57  15.8     0     0     3     4
#>  6  24.4     4 147.     62  3.69  3.19  20       1     0     4     2
#>  7  19.2     6 168.    123  3.92  3.44  18.3     1     0     4     4
#>  8  16.4     8 276.    180  3.07  4.07  17.4     0     0     3     3
#>  9  30.4     4  75.7    52  4.93  1.62  18.5     1     1     4     2
#> 10  26       4 120.     91  4.43  2.14  16.7     0     1     5     2
#> 11  15.8     8 351     264  4.22  3.17  14.5     0     1     5     4
#> 12  19.7     6 145     175  3.62  2.77  15.5     0     1     5     6
#> 13  15       8 301     335  3.54  3.57  14.6     0     1     5     8

reprex package (v2.0.1)

于 2022-02-25 创建

有两种方法可以在 dplyr 个动词中使用变量名称的外部向量。

  1. 使用across(all_of())
library(dplyr)

external_list_of_vars <- c("gear", "carb", "am")

mtcars %>%
  distinct(across(all_of(external_list_of_vars)), .keep_all = TRUE)
  1. 使用整洁的评估——具体来说,unquote-splice 运算符 !!!:
mtcars %>%
  distinct(!!!syms(external_list_of_vars), .keep_all = TRUE)

我不清楚您的名称向量是否已经在数据框中,或者这是否只是您尝试解决问题的一部分。如果是前者,您可以将我代码中的 external_list_of_vars 替换为 external_list_of_vars_df$external_list_of_vars_df.