最终用户使用 do.call 到 运行 具有给定参数列表的函数

Use do.call to run a function with a given list of parameters by the end user

我有一个来自 TidyDensity 包的名为 tidy_normal() 的函数。每个参数只接受一个参数,例如:.mean = 1

我想要做的是构建一个函数,该函数将从用户那里接收带引号的字符串和参数列表,如下所示:

tidy_multi_dist(
  .tidy_dist = "tidy_normal", 
  .param_list = list(
    .n = 50, 
    .mean = c(-1, 0, 1), 
    .sd = 1, 
    .num_sims = 1)
  )

我目前拥有的是:

tidy_multi_dist <- function(
  .tidy_dist = NULL,
  .param_list = list()
) {

  # Check param ----
  if (is.null(.tidy_dist)) {
    rlang::abort(
      "Please enter a 'tidy_' distribution function like 'tidy_normal' 
      in quotes."
    )
  }

  if (length(.param_list) == 0) {
    rlang::abort(
      "Please enter some parameters for your chosen 'tidy_' distribution."
    )
  }

  # Call used ---
  td <- as.character(.tidy_dist)

  # Params ----
  params <- .param_list

  # Params for the call ----
  n <- as.integer(params$.n)
  num_sims <- as.integer(params$.num_sims)
  x <- seq(1, num_sims, 1)

  # Final parameter list
  final_params_list <- params[which(!names(params) %in% c(".n", ".num_sims"))]

  # Set the grid to make the calls ----
  param_grid <- expand.grid(final_params_list)

  df <- tidyr::expand_grid(
    n = n,
    param_grid,
    sim = as.integer(x)
  ) #%>%
    #group_by_all()
  
  func_parm_list <- as.list(df)

  # Run call on the grouped df ----
  dff <- df %>%
    dplyr::rowwise() %>%
    dplyr::mutate(results = list(do.call(td, func_parm_list))) # fails here

  #df %>% rowwise() %>% mutate(results = list(do.call(td, list(.n = n, .num_sims = num_sims,.mean = .mean, .sd = .sd)))) %>% unnest(results)

  # Get the attributes to be used later on ----
  atb <- dff$results[[1]] %>% attributes()

  # Make Dist Type for column ----
  dist_type <- stringr::str_remove(atb$tibble_type, "tidy_") %>%
    stringr::str_replace_all(pattern = "_", " ") %>%
    stringr::str_to_title()

  # Get column names from the param_grid in order to make teh dist_type column ----
  cols <- names(param_grid)

  dff$dist_name <- paste0(
    paste0(dist_type, " c("),
    apply(dff[, cols], 1, paste0, collapse = ", "),
    ")"
  )

  df_unnested_tbl <- dff %>%
    tidyr::unnest(results) %>%
    dplyr::ungroup() %>%
    dplyr::select(sim_number, dist_name, x:q) %>%
    dplyr::mutate(dist_name = as.factor(dist_name)) %>%
    dplyr::arrange(sim_number, dist_name)

  # Attach attributes ----
  attr(df_unnested_tbl, "all") <- atb
  attr(df_unnested_tbl, "tbl") <- "tidy_multi_tibble"

  # Return ----
  return(df_unnested_tbl)

}

我收到的错误消息是:

> df %>%
+     #dplyr::rowwise() %>%
+     dplyr::mutate(results = list(do.call(td, func_parm_list)))
Error in `dplyr::mutate()`:
! Problem while computing `results = list(do.call(td, func_parm_list))`.
i The error occurred in group 1: n = 500, .mean = -1, .sd = 1, sim = 1.
Caused by error in `tidy_normal()`:
! unused arguments (n = c(500, 500, 500), sim = c(1, 1, 1))
Run `rlang::last_error()` to see where the error occurred.

因为我不知道用户将要进入哪个发行版,所以我希望这个函数是动态的,并使用 do.call 而不是为每种可能性做出明确的 rlang::call2

我不确定如何从这里继续,因为一切都失败了,我想是因为我的 do.call 是错误的。

函数参数名称应与 'df' 的列名称匹配,即如果我们在函数

中查看 'df' 的输出
> df
# A tibble: 3 × 4
      n .mean   .sd   sim
  <int> <dbl> <dbl> <int>
1    50    -1     1     1
2    50     0     1     1
3    50     1     1     1

tidynormal 的参数是

> formalArgs(tidy_normal)
[1] ".n"        ".mean"     ".sd"       ".num_sims"

在下面的代码中,列名被更改为匹配 formalArgs 并使用 pmap 来应用函数 rowwise(这比 rowwise)

...
names(df) <- formalArgs(td)
...
dff <- df %>% mutate(result = purrr::pmap(cur_data(), match.fun(td)))
...

我们可能需要将功能更改为

tidy_multi_dist <- function(
  .tidy_dist = NULL,
  .param_list = list()
) {

  # Check param ----
  if (is.null(.tidy_dist)) {
    rlang::abort(
      "Please enter a 'tidy_' distribution function like 'tidy_normal' 
      in quotes."
    )
  }

  if (length(.param_list) == 0) {
    rlang::abort(
      "Please enter some parameters for your chosen 'tidy_' distribution."
    )
  }

  # Call used ---
  td <- as.character(.tidy_dist)

  # Params ----
  params <- .param_list

  # Params for the call ----
  n <- as.integer(params$.n)
  num_sims <- as.integer(params$.num_sims)
  x <- seq(1, num_sims, 1)

  # Final parameter list
  final_params_list <- params[which(!names(params) %in% c(".n", ".num_sims"))]

  # Set the grid to make the calls ----
  param_grid <- expand.grid(final_params_list)

  df <- tidyr::expand_grid(
    n = n,
    param_grid,
    sim = as.integer(x)
  ) #%>%
    #group_by_all()
  
  #func_parm_list <- as.list(df)

 names(df) <- formalArgs(td)
  # Run call on the grouped df ----
  #dff <- df %>%
  #  dplyr::rowwise() %>%
  #  dplyr::mutate(results = list(do.call(td, func_parm_list))) # fails here

 dff <- df %>% mutate(results = purrr::pmap(cur_data(), match.fun(td)))
 
  #df %>% rowwise() %>% mutate(results = list(do.call(td, list(.n = n, .num_sims = num_sims,.mean = .mean, .sd = .sd)))) %>% unnest(results)

  # Get the attributes to be used later on ----
  atb <- dff$results[[1]] %>% attributes()

  # Make Dist Type for column ----
  dist_type <- stringr::str_remove(atb$tibble_type, "tidy_") %>%
    stringr::str_replace_all(pattern = "_", " ") %>%
    stringr::str_to_title()

  # Get column names from the param_grid in order to make teh dist_type column ----
  cols <- names(param_grid)

  dff$dist_name <- paste0(
    paste0(dist_type, " c("),
    apply(dff[, cols], 1, paste0, collapse = ", "),
    ")"
  )

  df_unnested_tbl <- dff %>%
    tidyr::unnest(results) %>%
    dplyr::ungroup() %>%
    dplyr::select(sim_number, dist_name, x:q) %>%
    dplyr::mutate(dist_name = as.factor(dist_name)) %>%
    dplyr::arrange(sim_number, dist_name)

  # Attach attributes ----
  attr(df_unnested_tbl, "all") <- atb
  attr(df_unnested_tbl, "tbl") <- "tidy_multi_tibble"

  # Return ----
  return(df_unnested_tbl)

}

-测试

> out <- tidy_multi_dist(
+   .tidy_dist = "tidy_normal", 
+   .param_list = list(
+     .n = 50, 
+     .mean = c(-1, 0, 1), 
+     .sd = 1, 
+     .num_sims = 1)
+   )
> out
# A tibble: 150 × 8
   sim_number dist_name             x      y    dx       dy         p       q
   <fct>      <fct>             <int>  <dbl> <dbl>    <dbl>     <dbl>   <dbl>
 1 1          Gaussian c(-1, 1)     1 -0.879 -4.90 0.000211 0         -Inf   
 2 1          Gaussian c(-1, 1)     2 -1.70  -4.74 0.000585 0           -3.05
 3 1          Gaussian c(-1, 1)     3 -1.72  -4.59 0.00142  0           -2.74
 4 1          Gaussian c(-1, 1)     4 -0.577 -4.43 0.00306  0           -2.54
 5 1          Gaussian c(-1, 1)     5 -1.87  -4.28 0.00583  0           -2.39
 6 1          Gaussian c(-1, 1)     6 -0.779 -4.13 0.00990  0           -2.27
 7 1          Gaussian c(-1, 1)     7  0.342 -3.97 0.0151   5.73e-300   -2.16
 8 1          Gaussian c(-1, 1)     8 -2.28  -3.82 0.0212   1.12e-268   -2.07
 9 1          Gaussian c(-1, 1)     9 -0.875 -3.66 0.0278   4.06e-239   -1.98
10 1          Gaussian c(-1, 1)    10 -1.77  -3.51 0.0350   2.70e-211   -1.90
# … with 140 more rows

> str(out)
tibble [150 × 8] (S3: tbl_df/tbl/data.frame)
 $ sim_number: Factor w/ 1 level "1": 1 1 1 1 1 1 1 1 1 1 ...
 $ dist_name : Factor w/ 3 levels "Gaussian c(-1, 1)",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ x         : int [1:150] 1 2 3 4 5 6 7 8 9 10 ...
 $ y         : num [1:150] -1.901 -3.809 -1.186 -2.821 -0.666 ...
 $ dx        : num [1:150] -5.08 -4.93 -4.78 -4.63 -4.48 ...
 $ dy        : num [1:150] 0.000212 0.000573 0.001367 0.002882 0.005368 ...
 $ p         : num [1:150] 0 0 0 0 0 ...
 $ q         : num [1:150] -Inf -3.05 -2.74 -2.54 -2.39 ...
 - attr(*, "all")=List of 10
  ..$ class      : chr [1:3] "tbl_df" "tbl" "data.frame"
  ..$ row.names  : int [1:50] 1 2 3 4 5 6 7 8 9 10 ...
  ..$ names      : chr [1:7] "sim_number" "x" "y" "dx" ...
  ..$ .mean      : num -1
  ..$ .sd        : num 1
  ..$ .n         : int 50
  ..$ .num_sims  : int 1
  ..$ tibble_type: chr "tidy_gaussian"
  ..$ ps         : num [1:50] -50 -48 -46 -44 -42 -40 -38 -36 -34 -32 ...
  ..$ qs         : num [1:50] 0 0.0204 0.0408 0.0612 0.0816 ...
 - attr(*, "tbl")= chr "tidy_multi_tibble"