在 dplyr 中引用变量中的列名

refer to column name from variable in across in dplyr

给定参考列 z,我想使用 dplyr 将每一列转换为:

x = log(x) - log(z)

我希望 z 是一个字符串,或者更好的是,一个带引号的表达式(例如,由用户输入 - 所有这些都在一个函数中)。

这是我试过的方法:

library(dplyr)
m <- data.frame(x=1:5,y=11:15,z=21:25)
denom = "z"

这个有效:

m %>%
        mutate(across(x:z ,
                list(~ log(.) - log(z) )))

这失败了:

m %>%
        mutate(across(x:z ,
                list(~ log(.) - log(rlang::sym(denom)))))

# Error: Problem with `mutate()` input `..1`.
# ℹ `..1 = across(x:z, list(~log(.) - log(rlang::sym(denom))))`.
# ✖ non-numeric argument to mathematical function
# Run `rlang::last_error()` to see where the error occurred.

这也失败了:

m %>%
        mutate(across(x:z ,
                list(~ log(.) - log(!!denom) )))

# Error: Problem with `mutate()` input `..1`.
# ℹ `..1 = across(x:z, list(~log(.) - log("z")))`.
# ✖ non-numeric argument to mathematical function
# Run `rlang::last_error()` to see where the error occurred.
# >                 #list(~ log(.) - log(rlang::sym(denom)))))

利用 rlang 中的 .data 代词你可以这样做:

library(dplyr)

m <- data.frame(x = 1:5, y = 11:15, z = 21:25)
denom <- "z"

m %>% mutate(across(
  x:z,
  list(~ log(.) - log(.data[[denom]]))
))
#>   x  y  z       x_1        y_1 z_1
#> 1 1 11 21 -3.044522 -0.6466272   0
#> 2 2 12 22 -2.397895 -0.6061358   0
#> 3 3 13 23 -2.036882 -0.5705449   0
#> 4 4 14 24 -1.791759 -0.5389965   0
#> 5 5 15 25 -1.609438 -0.5108256   0

我不知道,这是否是一种好的编码方式,但你可以这样做

library(dplyr)

m %>%
  mutate(across(x:z ,
                list(~ log(.) - log(!!as.name(denom)) )))

你也可以使用get():

m %>% mutate(across(.fns = list(~ log(.) - log(get(denom)))))

如果情况比简单选择一列 evalstr2lang(或 parse)更复杂,可以使用。如果它是 表达式 它可以直接在 eval

中使用
denom  <-  "z"
m %>% mutate(across(x:z, list(~ log(.) - log(eval(str2lang(denom))) )))
#  x  y  z       x_1        y_1 z_1
#1 1 11 21 -3.044522 -0.6466272   0
#2 2 12 22 -2.397895 -0.6061358   0
#3 3 13 23 -2.036882 -0.5705449   0
#4 4 14 24 -1.791759 -0.5389965   0
#5 5 15 25 -1.609438 -0.5108256   0

denom <- expression(z)
m %>% mutate(across(x:z, list(~ log(.) - log(eval(denom)) )))
#  x  y  z       x_1        y_1 z_1
#1 1 11 21 -3.044522 -0.6466272   0
#2 2 12 22 -2.397895 -0.6061358   0
#3 3 13 23 -2.036882 -0.5705449   0
#4 4 14 24 -1.791759 -0.5389965   0
#5 5 15 25 -1.609438 -0.5108256   0

m %>% mutate(across(x:z, list(~ log(.) - log(z) )))
#  x  y  z       x_1        y_1 z_1
#1 1 11 21 -3.044522 -0.6466272   0
#2 2 12 22 -2.397895 -0.6061358   0
#3 3 13 23 -2.036882 -0.5705449   0
#4 4 14 24 -1.791759 -0.5389965   0
#5 5 15 25 -1.609438 -0.5108256   0

更复杂:

denom <- "x + y"
m %>% mutate(across(x:z, list(~ log(.) - log(eval(str2lang(denom))) )))
#  x  y  z       x_1         y_1       z_1
#1 1 11 21 -2.484907 -0.08701138 0.5596158
#2 2 12 22 -1.945910 -0.15415068 0.4519851
#3 3 13 23 -1.673976 -0.20763936 0.3629055
#4 4 14 24 -1.504077 -0.25131443 0.2876821
#5 5 15 25 -1.386294 -0.28768207 0.2231436

denom <- expression(x + y)
m %>% mutate(across(x:z, list(~ log(.) - log(eval(denom)) )))
#  x  y  z       x_1         y_1       z_1
#1 1 11 21 -2.484907 -0.08701138 0.5596158
#2 2 12 22 -1.945910 -0.15415068 0.4519851
#3 3 13 23 -1.673976 -0.20763936 0.3629055
#4 4 14 24 -1.504077 -0.25131443 0.2876821
#5 5 15 25 -1.386294 -0.28768207 0.2231436

m %>% mutate(across(x:z, list(~ log(.) - log(x + y) )))
#  x  y  z       x_1         y_1       z_1
#1 1 11 21 -2.484907 -0.08701138 0.5596158
#2 2 12 22 -1.945910 -0.15415068 0.4519851
#3 3 13 23 -1.673976 -0.20763936 0.3629055
#4 4 14 24 -1.504077 -0.25131443 0.2876821
#5 5 15 25 -1.386294 -0.28768207 0.2231436