将两个不同组的 cumsum 的结果分成两个不同的列?
Separating the results from cumsum on two different groups into two different columns?
我正在尝试为两个单独的组求出累计总和,并将这些总和分别列在单独的列中。
这是数据框,根据时间排序:
time group value
0 A 0
0 B 0
0 A 0
1 A 0
1 B 1
1 B 0
2 B 1
2 A 1
2 A 1
2 A -1
3 A 0
3 B 1
这就是我必须按组查找 cumsum 并创建 cumsum 列的方法:
df$cumsum <- ave(df$value, df$group, FUN=cumsum)
time group value cumsum
0 A 0 0
0 B 0 0
0 A 0 0
1 A 0 0
1 B 1 1
1 B 0 1
2 B 1 2
2 A 1 1
2 A 1 2
2 A -1 1
3 A 0 1
3 B 1 3
如何将结果分成两列,一列用于 A,一列用于 B?或者,是否有可能找到有条件的 cumsum?无论哪种方式,我都希望结果如下所示:
time group value cumsum_A cumsum_B
0 A 0 0 0
0 B 0 0 0
0 A 0 0 0
1 A 0 0 0
1 B 1 0 1
1 B 0 0 1
2 B 1 0 2
2 A 1 1 2
2 A 1 2 2
2 A -1 1 2
3 A 0 1 2
3 B 1 1 3
谢谢!
您可以先找出 unique
值并使用 sapply
/lapply
循环遍历它们以有条件地为每个值计算 cumsum
。
unique_val <- unique(df$group)
df[paste0("cumsum_", unique_val)] <- lapply(unique_val,
function(x) cumsum((df$group == x) * df$value))
df
# time group value cumsum_A cumsum_B
#1 0 A 0 0 0
#2 0 B 0 0 0
#3 0 A 0 0 0
#4 1 A 0 0 0
#5 1 B 1 0 1
#6 1 B 0 0 1
#7 2 B 1 0 2
#8 2 A 1 1 2
#9 2 A 1 2 2
#10 2 A -1 1 2
#11 3 A 0 1 2
#12 3 B 1 1 3
这是 table
和 colCumsums
的选项
library(matrixStats)
nm1 <- paste0("cumsum_", unique(df1$group))
df1[nm1] <- colCumsums(table(seq_len(nrow(df1)),df1$group) * df1$value)
df1
# time group value cumsum_A cumsum_B
#1 0 A 0 0 0
#2 0 B 0 0 0
#3 0 A 0 0 0
#4 1 A 0 0 0
#5 1 B 1 0 1
#6 1 B 0 0 1
#7 2 B 1 0 2
#8 2 A 1 1 2
#9 2 A 1 2 2
#10 2 A -1 1 2
#11 3 A 0 1 2
#12 3 B 1 1 3
或者另一种选择是 model.matrix
colCumsums((model.matrix(~ group -1, df1)) * df1$value)
或者 model.matrix
和 tidyverse
library(tidyverse)
df1 %>%
model.matrix( ~group - 1, .) %>%
as_tibble %>%
mutate_all(~ cumsum(. * df1$value)) %>%
rename_all(~ str_replace(., "group", "cumsum")) %>%
bind_cols(df1, .)
# time group value cumsumA cumsumB
#1 0 A 0 0 0
#2 0 B 0 0 0
#3 0 A 0 0 0
#4 1 A 0 0 0
#5 1 B 1 0 1
#6 1 B 0 0 1
#7 2 B 1 0 2
#8 2 A 1 1 2
#9 2 A 1 2 2
#10 2 A -1 1 2
#11 3 A 0 1 2
#12 3 B 1 1 3
或使用 count
和 spread
df1 %>%
mutate(rn = row_number()) %>%
dplyr::count(group, rn) %>%
mutate(group = str_c("cumsum", group)) %>%
spread(group, n, fill = 0) %>%
mutate_at(-1, ~ cumsum(. * df1$value)) %>%
select(-rn) %>%
bind_cols(df1, .)
数据
df1 <- structure(list(time = c(0L, 0L, 0L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L), group = c("A", "B", "A", "A", "B", "B", "B", "A", "A",
"A", "A", "B"), value = c(0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L,
-1L, 0L, 1L)), class = "data.frame", row.names = c(NA, -12L))
您还可以使用 if_else
将 value
替换为 0
,当它不属于所需组时,如下所示。 dplyr
在这里不是必需的(使用 base::ifelse
并避免 mutate
)
library(tidyverse)
df1 <- structure(list(time = c(0L, 0L, 0L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L), group = c("A", "B", "A", "A", "B", "B", "B", "A", "A", "A", "A", "B"), value = c(0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, -1L, 0L, 1L)), class = "data.frame", row.names = c(NA, -12L))
df1 %>%
mutate(
cumsum_A = cumsum(if_else(group == "A", value, 0L)),
cumsum_B = cumsum(if_else(group == "B", value, 0L))
)
#> time group value cumsum_A cumsum_B
#> 1 0 A 0 0 0
#> 2 0 B 0 0 0
#> 3 0 A 0 0 0
#> 4 1 A 0 0 0
#> 5 1 B 1 0 1
#> 6 1 B 0 0 1
#> 7 2 B 1 0 2
#> 8 2 A 1 1 2
#> 9 2 A 1 2 2
#> 10 2 A -1 1 2
#> 11 3 A 0 1 2
#> 12 3 B 1 1 3
由 reprex package (v0.3.0)
于 2019-06-25 创建
我正在尝试为两个单独的组求出累计总和,并将这些总和分别列在单独的列中。
这是数据框,根据时间排序:
time group value
0 A 0
0 B 0
0 A 0
1 A 0
1 B 1
1 B 0
2 B 1
2 A 1
2 A 1
2 A -1
3 A 0
3 B 1
这就是我必须按组查找 cumsum 并创建 cumsum 列的方法:
df$cumsum <- ave(df$value, df$group, FUN=cumsum)
time group value cumsum
0 A 0 0
0 B 0 0
0 A 0 0
1 A 0 0
1 B 1 1
1 B 0 1
2 B 1 2
2 A 1 1
2 A 1 2
2 A -1 1
3 A 0 1
3 B 1 3
如何将结果分成两列,一列用于 A,一列用于 B?或者,是否有可能找到有条件的 cumsum?无论哪种方式,我都希望结果如下所示:
time group value cumsum_A cumsum_B
0 A 0 0 0
0 B 0 0 0
0 A 0 0 0
1 A 0 0 0
1 B 1 0 1
1 B 0 0 1
2 B 1 0 2
2 A 1 1 2
2 A 1 2 2
2 A -1 1 2
3 A 0 1 2
3 B 1 1 3
谢谢!
您可以先找出 unique
值并使用 sapply
/lapply
循环遍历它们以有条件地为每个值计算 cumsum
。
unique_val <- unique(df$group)
df[paste0("cumsum_", unique_val)] <- lapply(unique_val,
function(x) cumsum((df$group == x) * df$value))
df
# time group value cumsum_A cumsum_B
#1 0 A 0 0 0
#2 0 B 0 0 0
#3 0 A 0 0 0
#4 1 A 0 0 0
#5 1 B 1 0 1
#6 1 B 0 0 1
#7 2 B 1 0 2
#8 2 A 1 1 2
#9 2 A 1 2 2
#10 2 A -1 1 2
#11 3 A 0 1 2
#12 3 B 1 1 3
这是 table
和 colCumsums
library(matrixStats)
nm1 <- paste0("cumsum_", unique(df1$group))
df1[nm1] <- colCumsums(table(seq_len(nrow(df1)),df1$group) * df1$value)
df1
# time group value cumsum_A cumsum_B
#1 0 A 0 0 0
#2 0 B 0 0 0
#3 0 A 0 0 0
#4 1 A 0 0 0
#5 1 B 1 0 1
#6 1 B 0 0 1
#7 2 B 1 0 2
#8 2 A 1 1 2
#9 2 A 1 2 2
#10 2 A -1 1 2
#11 3 A 0 1 2
#12 3 B 1 1 3
或者另一种选择是 model.matrix
colCumsums((model.matrix(~ group -1, df1)) * df1$value)
或者 model.matrix
和 tidyverse
library(tidyverse)
df1 %>%
model.matrix( ~group - 1, .) %>%
as_tibble %>%
mutate_all(~ cumsum(. * df1$value)) %>%
rename_all(~ str_replace(., "group", "cumsum")) %>%
bind_cols(df1, .)
# time group value cumsumA cumsumB
#1 0 A 0 0 0
#2 0 B 0 0 0
#3 0 A 0 0 0
#4 1 A 0 0 0
#5 1 B 1 0 1
#6 1 B 0 0 1
#7 2 B 1 0 2
#8 2 A 1 1 2
#9 2 A 1 2 2
#10 2 A -1 1 2
#11 3 A 0 1 2
#12 3 B 1 1 3
或使用 count
和 spread
df1 %>%
mutate(rn = row_number()) %>%
dplyr::count(group, rn) %>%
mutate(group = str_c("cumsum", group)) %>%
spread(group, n, fill = 0) %>%
mutate_at(-1, ~ cumsum(. * df1$value)) %>%
select(-rn) %>%
bind_cols(df1, .)
数据
df1 <- structure(list(time = c(0L, 0L, 0L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L), group = c("A", "B", "A", "A", "B", "B", "B", "A", "A",
"A", "A", "B"), value = c(0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L,
-1L, 0L, 1L)), class = "data.frame", row.names = c(NA, -12L))
您还可以使用 if_else
将 value
替换为 0
,当它不属于所需组时,如下所示。 dplyr
在这里不是必需的(使用 base::ifelse
并避免 mutate
)
library(tidyverse)
df1 <- structure(list(time = c(0L, 0L, 0L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L), group = c("A", "B", "A", "A", "B", "B", "B", "A", "A", "A", "A", "B"), value = c(0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, -1L, 0L, 1L)), class = "data.frame", row.names = c(NA, -12L))
df1 %>%
mutate(
cumsum_A = cumsum(if_else(group == "A", value, 0L)),
cumsum_B = cumsum(if_else(group == "B", value, 0L))
)
#> time group value cumsum_A cumsum_B
#> 1 0 A 0 0 0
#> 2 0 B 0 0 0
#> 3 0 A 0 0 0
#> 4 1 A 0 0 0
#> 5 1 B 1 0 1
#> 6 1 B 0 0 1
#> 7 2 B 1 0 2
#> 8 2 A 1 1 2
#> 9 2 A 1 2 2
#> 10 2 A -1 1 2
#> 11 3 A 0 1 2
#> 12 3 B 1 1 3
由 reprex package (v0.3.0)
于 2019-06-25 创建