如何在 R 中用中间结果改变新列,累积或减少
How to mutate new column with intermediate results, accumulate or reduce, in R
我有一个数据集 my_df
具有以下结构(dput
添加在问题的末尾)
> my_df
group_id other_id case
1 1 1 add
2 1 1 add
3 1 11 add
4 1 1 replace
5 1 11 replace
6 1 1 replace
7 1 10 add
8 1 10 replace
9 2 2 add
10 2 10 add
11 2 10 replace
12 2 2 replace
13 2 3 add
14 2 3 replace
我想做的(以 tidyverse 的方式)是创建一个新列,比如 collection
,其中 other_id
将为 group_id
上的每个 group_by 存储基于这两个条件-
如果添加 case
,则当前行的 Other_id 将粘贴到该列的先前值中
如果 case == 'replace'
则当前行的 other_id 将替换为前一行计算(累积)值的 ""
(无)。
我想要的结果是这样的
> result
group_id other_id case collection
1 1 1 add 1,
2 1 1 add 1,1,
3 1 11 add 1,1,11,
4 1 1 replace 1,11,
5 1 11 replace 1,
6 1 1 replace
7 1 10 add 10,
8 1 10 replace
9 2 2 add 2,
10 2 10 add 2,10,
11 2 10 replace 2,
12 2 2 replace
13 2 3 add 3,
14 2 3 replace
显然每个组的末尾都会有空格,因为 my_df 已经是 arranged/sorted 了。
我正在尝试 accumulate
和 reduce
但我只能 generate/accumulate 值 case == 'add'
,我无法在此应用 str_replace
管道(下)。此外,我希望 other_id
的值将在 case == 'add'
时粘贴到 collection
中,但仅适用于先前出现的值,无论它是否属于不同的情况(结果中的第 7 行和第 13 行)。
我尝试的语法只部分起作用
library(tidyverse)
my_df %>% group_by(group_id) %>%
mutate(collection = case_when(case == "add" ~ accumulate(other_id, paste, sep=", "),
case == "replace" ~ "?"))
# A tibble: 14 x 4
# Groups: group_id [2]
group_id other_id case collection
<chr> <chr> <chr> <chr>
1 1 1 add 1
2 1 1 add 1, 1
3 1 11 add 1, 1, 11
4 1 1 replace ?
5 1 11 replace ?
6 1 1 replace ?
7 1 10 add 1, 1, 11, 1, 11, 1, 10
8 1 10 replace ?
9 2 2 add 2
10 2 10 add 2, 10
11 2 10 replace ?
12 2 2 replace ?
13 2 3 add 2, 10, 10, 2, 3
14 2 3 replace ?
感谢期待。
样本输入是
my_df <- structure(list(group_id = c("1", "1", "1", "1", "1", "1", "1",
"1", "2", "2", "2", "2", "2", "2"), other_id = c("1", "1", "11",
"1", "11", "1", "10", "10", "2", "10", "10", "2", "3", "3"),
case = c("add", "add", "add", "replace", "replace", "replace",
"add", "replace", "add", "add", "replace", "replace", "add",
"replace")), row.names = c(NA, -14L), class = "data.frame")
这里有一个可能性,使用 accumulate2
:
f <- function(cur, new, case) {
if (case == "add") paste0(cur, new, ",") else sub(paste0(new, ","), "", cur)
}
my_df %>%
mutate(collection = accumulate2(other_id, case, f, .init = "")[-1])
group_id other_id case collection
1 1 1 add 1,
2 1 1 add 1,1,
3 1 11 add 1,1,11,
4 1 1 replace 1,11,
5 1 11 replace 1,
6 1 1 replace
7 1 10 add 10,
8 1 10 replace
9 2 2 add 2,
10 2 10 add 2,10,
11 2 10 replace 2,
12 2 2 replace
13 2 3 add 3,
14 2 3 replace
我和@Cettt 有同样的想法——使用 accumulate2
。这是一个使用正则表达式处理尾随逗号的选项。
addOrRemove = function(acc, other_id, case) {
if(case == "add") {
ifelse(acc == "", other_id, paste(acc, other_id, sep = ", "))
} else {
sub(
paste0("((?<=^| )", other_id, "(, ))|((^|(, ))", other_id, "$)"),
"",
acc
,
perl = TRUE
)
}
}
my_df %>%
group_by(group_id) %>%
mutate(collection = unlist(accumulate2(other_id, case[-1], addOrRemove))
)
# A tibble: 14 x 4
# Groups: group_id [2]
group_id other_id case collection
<chr> <chr> <chr> <chr>
1 1 1 add "1"
2 1 1 add "1, 1"
3 1 11 add "1, 1, 11"
4 1 1 replace "1, 11"
5 1 11 replace "1"
6 1 1 replace ""
7 1 10 add "10"
8 1 10 replace ""
9 2 2 add "2"
10 2 10 add "2, 10"
11 2 10 replace "2"
12 2 2 replace ""
13 2 3 add "3"
14 2 3 replace ""
我终于做到了,不需要通过事先定义的自定义函数来完成
my_df %>% group_by(group_id) %>%
mutate(new = unlist(accumulate2(other_id, case, ~if_else(..3 != "add", sub(paste0(..2, ","), "", ..1), paste0(..1, ..2, ",")), .init = "")[-1]))
# A tibble: 14 x 4
# Groups: group_id [2]
group_id other_id case new
<chr> <chr> <chr> <chr>
1 1 1 add "1,"
2 1 1 add "1,1,"
3 1 11 add "1,1,11,"
4 1 1 replace "1,11,"
5 1 11 replace "1,"
6 1 1 replace ""
7 1 10 add "10,"
8 1 10 replace ""
9 2 2 add "2,"
10 2 10 add "2,10,"
11 2 10 replace "2,"
12 2 2 replace ""
13 2 3 add "3,"
14 2 3 replace ""
同样在基础 R 中:
my_df$collcetion <- Reduce(function(x, y) {
if(my_df$case[y] == "add") {
paste0(x, my_df$other_id[y], ",")
} else {
sub(paste0(my_df$other_id[y], ","), "", x)
}
}, init = "1,", seq_len(nrow(my_df))[-1], accumulate = TRUE)
my_df
group_id other_id case collcetion
1 1 1 add 1,
2 1 1 add 1,1,
3 1 11 add 1,1,11,
4 1 1 replace 1,11,
5 1 11 replace 1,
6 1 1 replace
7 1 10 add 10,
8 1 10 replace
9 2 2 add 2,
10 2 10 add 2,10,
11 2 10 replace 2,
12 2 2 replace
13 2 3 add 3,
14 2 3 replace
我有一个数据集 my_df
具有以下结构(dput
添加在问题的末尾)
> my_df
group_id other_id case
1 1 1 add
2 1 1 add
3 1 11 add
4 1 1 replace
5 1 11 replace
6 1 1 replace
7 1 10 add
8 1 10 replace
9 2 2 add
10 2 10 add
11 2 10 replace
12 2 2 replace
13 2 3 add
14 2 3 replace
我想做的(以 tidyverse 的方式)是创建一个新列,比如 collection
,其中 other_id
将为 group_id
上的每个 group_by 存储基于这两个条件-
如果添加
case
,则当前行的 Other_id 将粘贴到该列的先前值中如果
case == 'replace'
则当前行的 other_id 将替换为前一行计算(累积)值的""
(无)。
我想要的结果是这样的
> result
group_id other_id case collection
1 1 1 add 1,
2 1 1 add 1,1,
3 1 11 add 1,1,11,
4 1 1 replace 1,11,
5 1 11 replace 1,
6 1 1 replace
7 1 10 add 10,
8 1 10 replace
9 2 2 add 2,
10 2 10 add 2,10,
11 2 10 replace 2,
12 2 2 replace
13 2 3 add 3,
14 2 3 replace
显然每个组的末尾都会有空格,因为 my_df 已经是 arranged/sorted 了。
我正在尝试 accumulate
和 reduce
但我只能 generate/accumulate 值 case == 'add'
,我无法在此应用 str_replace
管道(下)。此外,我希望 other_id
的值将在 case == 'add'
时粘贴到 collection
中,但仅适用于先前出现的值,无论它是否属于不同的情况(结果中的第 7 行和第 13 行)。
我尝试的语法只部分起作用
library(tidyverse)
my_df %>% group_by(group_id) %>%
mutate(collection = case_when(case == "add" ~ accumulate(other_id, paste, sep=", "),
case == "replace" ~ "?"))
# A tibble: 14 x 4
# Groups: group_id [2]
group_id other_id case collection
<chr> <chr> <chr> <chr>
1 1 1 add 1
2 1 1 add 1, 1
3 1 11 add 1, 1, 11
4 1 1 replace ?
5 1 11 replace ?
6 1 1 replace ?
7 1 10 add 1, 1, 11, 1, 11, 1, 10
8 1 10 replace ?
9 2 2 add 2
10 2 10 add 2, 10
11 2 10 replace ?
12 2 2 replace ?
13 2 3 add 2, 10, 10, 2, 3
14 2 3 replace ?
感谢期待。
样本输入是
my_df <- structure(list(group_id = c("1", "1", "1", "1", "1", "1", "1",
"1", "2", "2", "2", "2", "2", "2"), other_id = c("1", "1", "11",
"1", "11", "1", "10", "10", "2", "10", "10", "2", "3", "3"),
case = c("add", "add", "add", "replace", "replace", "replace",
"add", "replace", "add", "add", "replace", "replace", "add",
"replace")), row.names = c(NA, -14L), class = "data.frame")
这里有一个可能性,使用 accumulate2
:
f <- function(cur, new, case) {
if (case == "add") paste0(cur, new, ",") else sub(paste0(new, ","), "", cur)
}
my_df %>%
mutate(collection = accumulate2(other_id, case, f, .init = "")[-1])
group_id other_id case collection
1 1 1 add 1,
2 1 1 add 1,1,
3 1 11 add 1,1,11,
4 1 1 replace 1,11,
5 1 11 replace 1,
6 1 1 replace
7 1 10 add 10,
8 1 10 replace
9 2 2 add 2,
10 2 10 add 2,10,
11 2 10 replace 2,
12 2 2 replace
13 2 3 add 3,
14 2 3 replace
我和@Cettt 有同样的想法——使用 accumulate2
。这是一个使用正则表达式处理尾随逗号的选项。
addOrRemove = function(acc, other_id, case) {
if(case == "add") {
ifelse(acc == "", other_id, paste(acc, other_id, sep = ", "))
} else {
sub(
paste0("((?<=^| )", other_id, "(, ))|((^|(, ))", other_id, "$)"),
"",
acc
,
perl = TRUE
)
}
}
my_df %>%
group_by(group_id) %>%
mutate(collection = unlist(accumulate2(other_id, case[-1], addOrRemove))
)
# A tibble: 14 x 4
# Groups: group_id [2]
group_id other_id case collection
<chr> <chr> <chr> <chr>
1 1 1 add "1"
2 1 1 add "1, 1"
3 1 11 add "1, 1, 11"
4 1 1 replace "1, 11"
5 1 11 replace "1"
6 1 1 replace ""
7 1 10 add "10"
8 1 10 replace ""
9 2 2 add "2"
10 2 10 add "2, 10"
11 2 10 replace "2"
12 2 2 replace ""
13 2 3 add "3"
14 2 3 replace ""
我终于做到了,不需要通过事先定义的自定义函数来完成
my_df %>% group_by(group_id) %>%
mutate(new = unlist(accumulate2(other_id, case, ~if_else(..3 != "add", sub(paste0(..2, ","), "", ..1), paste0(..1, ..2, ",")), .init = "")[-1]))
# A tibble: 14 x 4
# Groups: group_id [2]
group_id other_id case new
<chr> <chr> <chr> <chr>
1 1 1 add "1,"
2 1 1 add "1,1,"
3 1 11 add "1,1,11,"
4 1 1 replace "1,11,"
5 1 11 replace "1,"
6 1 1 replace ""
7 1 10 add "10,"
8 1 10 replace ""
9 2 2 add "2,"
10 2 10 add "2,10,"
11 2 10 replace "2,"
12 2 2 replace ""
13 2 3 add "3,"
14 2 3 replace ""
同样在基础 R 中:
my_df$collcetion <- Reduce(function(x, y) {
if(my_df$case[y] == "add") {
paste0(x, my_df$other_id[y], ",")
} else {
sub(paste0(my_df$other_id[y], ","), "", x)
}
}, init = "1,", seq_len(nrow(my_df))[-1], accumulate = TRUE)
my_df
group_id other_id case collcetion
1 1 1 add 1,
2 1 1 add 1,1,
3 1 11 add 1,1,11,
4 1 1 replace 1,11,
5 1 11 replace 1,
6 1 1 replace
7 1 10 add 10,
8 1 10 replace
9 2 2 add 2,
10 2 10 add 2,10,
11 2 10 replace 2,
12 2 2 replace
13 2 3 add 3,
14 2 3 replace