利用 dplyr 中的 carrow() 中的函数处理成对列
Utilizing functions within across() in dplyr to work with paired-columns
set.seed(3)
library(dplyr)
x <- tibble(Measure = c("Height","Weight","Width","Length"),
AD1_1= rpois(4,10),
AD1_2= rpois(4,9),
AD2_1= rpois(4,10),
AD2_2= rpois(4,9),
AD3_1= rpois(4,10),
AD3_2= rpois(4,9))
假设我有这样的数据。我希望 运行 每个 AD 的函数,与带下划线的数字配对,即 AD1fun、AD2fun、AD3fun。
而不是写作,
fun <- function(x,y){x-y}
dat %>%
mutate(AD1fun = fun(AD1_1,AD1_2),
AD2fun = fun(AD2_1,AD2_2),
...)
表示
x_minus <- x %>%
mutate(fun(across(ends_with("_1"), .names = "{col}_minus"), across(ends_with("_2")))) %>%
rename_with(~ sub("_\d+", "", .), ends_with("_minus"))
可用于生产
# A tibble: 4 x 10
Measure AD1_1 AD1_2 AD2_1 AD2_2 AD3_1 AD3_2 AD1_minus AD2_minus AD3_minus
<chr> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 Height 6 10 10 3 12 8 -4 7 4
2 Weight 8 9 13 6 14 7 -1 7 7
3 Width 10 9 11 5 12 8 1 6 4
4 Length 8 9 8 7 8 13 -1 1 -5
但是,如果我们要制作非操作函数,
fun <- function(x,y){
case <- case_when(
x == y ~ "Agree",
x == 0 & y != 0 ~ "Disagreement",
x != 0 & y == 0 ~ "Disagreement",
x-y <= 1 & x-y >= -1 ~ "Agree",
TRUE ~ "Disagree"
)
return(case)
}
x_case <- x %>%
mutate(fun(across(ends_with("_1"), .names = "{col}_case"), across(ends_with("_2")))) %>%
rename_with(~ sub("_\d+", "", .), ends_with("_case"))
会报错,因为要引用,
This procedure essentially means that you compare two datasets: one
with variables ending with _1 and one with _2. It is, thus, the same
as dat %>% select(ends_with("_1")) - dat %>% select(ends_with("_2")).
And as these are lists, you cannot compare them that way
如果是这样,如何使用 across() 包含一个函数?
我们可以循环 across
名称为 ends_with
"_1" 的列,然后使用 cur_column()
提取列名,将后缀部分替换为 _2
, get
值并将其用作当前列的 fun
和 _2
中的对应对的参数
library(dplyr)
library(stringr)
x %>%
mutate(across(ends_with("_1"), ~
fun(., get(str_replace(cur_column(), "_1$", "_2"))), .names = "{.col}_case"))
-输出
# A tibble: 4 x 10
# Measure AD1_1 AD1_2 AD2_1 AD2_2 AD3_1 AD3_2 AD1_1_case AD2_1_case AD3_1_case
# <chr> <int> <int> <int> <int> <int> <int> <chr> <chr> <chr>
#1 Height 6 10 10 3 12 8 Disagree Disagree Disagree
#2 Weight 8 9 13 6 14 7 Agree Disagree Disagree
#3 Width 10 9 11 5 12 8 Agree Disagree Disagree
#4 Length 8 9 8 7 8 13 Agree Agree Disagree
或者另一个选项是 split.default/map
。在这里,我们将数据集拆分为 list
和 data.frame
,每个都具有与列名相同的前缀,然后将 fun
应用于每个 list
元素 map/reduce
和使用 bind_cols
将输出绑定回原始数据集
library(purrr)
x %>%
select(-Measure) %>%
split.default(str_remove(names(.), "_\d+$")) %>%
map_dfr(reduce, fun) %>%
rename_all(~ str_c(., "_case")) %>%
bind_cols(x, .)
-输出
# A tibble: 4 x 10
# Measure AD1_1 AD1_2 AD2_1 AD2_2 AD3_1 AD3_2 AD1_case AD2_case AD3_case
# <chr> <int> <int> <int> <int> <int> <int> <chr> <chr> <chr>
#1 Height 6 10 10 3 12 8 Disagree Disagree Disagree
#2 Weight 8 9 13 6 14 7 Agree Disagree Disagree
#3 Width 10 9 11 5 12 8 Agree Disagree Disagree
#4 Length 8 9 8 7 8 13 Agree Agree Disagree
关于 OP 的方法,fun
不是 Vectorize
d。如果我们这样做,它可以应用于多个成对列
x %>%
mutate(Vectorize(fun)(across(ends_with("_1"),
.names = "{col}_minus"), across(ends_with("_2"))))%>%
do.call(data.frame, .) %>%
rename_at(vars(contains('minus')),
~ str_extract(., 'AD\d+_\d+_minus'))
# Measure AD1_1 AD1_2 AD2_1 AD2_2 AD3_1 AD3_2 AD1_1_minus AD2_1_minus AD3_1_minus
#1 Height 6 10 10 3 12 8 Disagree Disagree Disagree
#2 Weight 8 9 13 6 14 7 Agree Disagree Disagree
#3 Width 10 9 11 5 12 8 Agree Disagree Disagree
#4 Length 8 9 8 7 8 13 Agree Agree Disagree
set.seed(3)
library(dplyr)
x <- tibble(Measure = c("Height","Weight","Width","Length"),
AD1_1= rpois(4,10),
AD1_2= rpois(4,9),
AD2_1= rpois(4,10),
AD2_2= rpois(4,9),
AD3_1= rpois(4,10),
AD3_2= rpois(4,9))
假设我有这样的数据。我希望 运行 每个 AD 的函数,与带下划线的数字配对,即 AD1fun、AD2fun、AD3fun。
而不是写作,
fun <- function(x,y){x-y}
dat %>%
mutate(AD1fun = fun(AD1_1,AD1_2),
AD2fun = fun(AD2_1,AD2_2),
...)
x_minus <- x %>%
mutate(fun(across(ends_with("_1"), .names = "{col}_minus"), across(ends_with("_2")))) %>%
rename_with(~ sub("_\d+", "", .), ends_with("_minus"))
可用于生产
# A tibble: 4 x 10
Measure AD1_1 AD1_2 AD2_1 AD2_2 AD3_1 AD3_2 AD1_minus AD2_minus AD3_minus
<chr> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 Height 6 10 10 3 12 8 -4 7 4
2 Weight 8 9 13 6 14 7 -1 7 7
3 Width 10 9 11 5 12 8 1 6 4
4 Length 8 9 8 7 8 13 -1 1 -5
但是,如果我们要制作非操作函数,
fun <- function(x,y){
case <- case_when(
x == y ~ "Agree",
x == 0 & y != 0 ~ "Disagreement",
x != 0 & y == 0 ~ "Disagreement",
x-y <= 1 & x-y >= -1 ~ "Agree",
TRUE ~ "Disagree"
)
return(case)
}
x_case <- x %>%
mutate(fun(across(ends_with("_1"), .names = "{col}_case"), across(ends_with("_2")))) %>%
rename_with(~ sub("_\d+", "", .), ends_with("_case"))
会报错,因为要引用,
This procedure essentially means that you compare two datasets: one with variables ending with _1 and one with _2. It is, thus, the same as dat %>% select(ends_with("_1")) - dat %>% select(ends_with("_2")). And as these are lists, you cannot compare them that way
如果是这样,如何使用 across() 包含一个函数?
我们可以循环 across
名称为 ends_with
"_1" 的列,然后使用 cur_column()
提取列名,将后缀部分替换为 _2
, get
值并将其用作当前列的 fun
和 _2
library(dplyr)
library(stringr)
x %>%
mutate(across(ends_with("_1"), ~
fun(., get(str_replace(cur_column(), "_1$", "_2"))), .names = "{.col}_case"))
-输出
# A tibble: 4 x 10
# Measure AD1_1 AD1_2 AD2_1 AD2_2 AD3_1 AD3_2 AD1_1_case AD2_1_case AD3_1_case
# <chr> <int> <int> <int> <int> <int> <int> <chr> <chr> <chr>
#1 Height 6 10 10 3 12 8 Disagree Disagree Disagree
#2 Weight 8 9 13 6 14 7 Agree Disagree Disagree
#3 Width 10 9 11 5 12 8 Agree Disagree Disagree
#4 Length 8 9 8 7 8 13 Agree Agree Disagree
或者另一个选项是 split.default/map
。在这里,我们将数据集拆分为 list
和 data.frame
,每个都具有与列名相同的前缀,然后将 fun
应用于每个 list
元素 map/reduce
和使用 bind_cols
library(purrr)
x %>%
select(-Measure) %>%
split.default(str_remove(names(.), "_\d+$")) %>%
map_dfr(reduce, fun) %>%
rename_all(~ str_c(., "_case")) %>%
bind_cols(x, .)
-输出
# A tibble: 4 x 10
# Measure AD1_1 AD1_2 AD2_1 AD2_2 AD3_1 AD3_2 AD1_case AD2_case AD3_case
# <chr> <int> <int> <int> <int> <int> <int> <chr> <chr> <chr>
#1 Height 6 10 10 3 12 8 Disagree Disagree Disagree
#2 Weight 8 9 13 6 14 7 Agree Disagree Disagree
#3 Width 10 9 11 5 12 8 Agree Disagree Disagree
#4 Length 8 9 8 7 8 13 Agree Agree Disagree
关于 OP 的方法,fun
不是 Vectorize
d。如果我们这样做,它可以应用于多个成对列
x %>%
mutate(Vectorize(fun)(across(ends_with("_1"),
.names = "{col}_minus"), across(ends_with("_2"))))%>%
do.call(data.frame, .) %>%
rename_at(vars(contains('minus')),
~ str_extract(., 'AD\d+_\d+_minus'))
# Measure AD1_1 AD1_2 AD2_1 AD2_2 AD3_1 AD3_2 AD1_1_minus AD2_1_minus AD3_1_minus
#1 Height 6 10 10 3 12 8 Disagree Disagree Disagree
#2 Weight 8 9 13 6 14 7 Agree Disagree Disagree
#3 Width 10 9 11 5 12 8 Agree Disagree Disagree
#4 Length 8 9 8 7 8 13 Agree Agree Disagree