通过包含字符串的变量跨行进行变异或汇总
mutate or summarise across rows by variable containing string
我想创建一个新数据 table,它是包含字符串的变量的行的总和。作为菜鸟,我一直在尝试使用新的 dplyr across
将其保留在 tidyverse 中。非常感谢帮助。
dat<- data.frame("Image" = c(1,2,3,4),
"A" = c(1,2,3,4),
"A:B"= c(5,6,7,8),
"A:B:C"= c(9,10,11,12))
获取包含“A”、“B”或“C”的变量行的总和。
datsums<- data.frame("Image" = c(1,2,3,4),
"Asum"= c(15,18,21,24),
"Bsum"=c(14,16,18,20),
"Csum"=c(9,10,11,12))
我使用较新的 dplyr 动词不成功:
datsums<- dat %>% summarise(across(str_detect("A")), sum, .names ="Asum",
across(str_detect("B")), sum, .names="Bsum",
across(str_detect("C")), sum, .names"Csum")
因为你想要按行求和,你可以使用:
library(dplyr)
dat %>%
transmute(Asum = rowSums(select(., contains('A', ignore.case = FALSE))),
Bsum = rowSums(select(., contains('B', ignore.case = FALSE))),
Csum = rowSums(select(., contains('C', ignore.case = FALSE))))
或者对于许多变量使用:
cols <- c('A', 'B', 'C')
purrr::map_dfc(cols, ~dat %>%
transmute(!!paste0(.x, 'sum') :=
rowSums(select(., contains(.x, ignore.case = FALSE)))))
# Asum Bsum Csum
#1 15 14 9
#2 18 16 10
#3 21 18 11
#4 24 20 12
使用 rowwise
和 c_across
:
library(tidyverse)
dat %>%
rowwise() %>%
summarise(
Asum = sum(c_across(contains("A"))),
Bsum = sum(c_across(contains("B"))),
Csum = sum(c_across(contains("C")))
)
Returns:
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 4 x 3
Asum Bsum Csum
<dbl> <dbl> <dbl>
1 16 14 9
2 20 16 10
3 24 18 11
4 28 20 12
要向原始 data.frame 添加列,请使用 mutate
而不是 summarise
:
dat %>%
rowwise() %>%
mutate(
Asum = sum(c_across(contains("A"))),
Bsum = sum(c_across(contains("B"))),
Csum = sum(c_across(contains("C")))
)
# A tibble: 4 x 7
# Rowwise:
Image A A.B A.B.C Asum Bsum Csum
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 5 9 16 14 9
2 2 2 6 10 20 16 10
3 3 3 7 11 24 18 11
4 4 4 8 12 28 20 12
使用pivot_longer
和pivot_wider
library(tidyverse)
dat %>%
pivot_longer(-Image) %>%
separate_rows(name, sep = "\.") %>%
pivot_wider(Image,
names_from = name,
values_from = value,
values_fn = sum,
names_prefix = "sum")
#> # A tibble: 4 x 4
#> Image sumA sumB sumC
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1 15 14 9
#> 2 2 18 16 10
#> 3 3 21 18 11
#> 4 4 24 20 12
由 reprex package (v0.3.0)
于 2020-12-07 创建
我想创建一个新数据 table,它是包含字符串的变量的行的总和。作为菜鸟,我一直在尝试使用新的 dplyr across
将其保留在 tidyverse 中。非常感谢帮助。
dat<- data.frame("Image" = c(1,2,3,4),
"A" = c(1,2,3,4),
"A:B"= c(5,6,7,8),
"A:B:C"= c(9,10,11,12))
获取包含“A”、“B”或“C”的变量行的总和。
datsums<- data.frame("Image" = c(1,2,3,4),
"Asum"= c(15,18,21,24),
"Bsum"=c(14,16,18,20),
"Csum"=c(9,10,11,12))
我使用较新的 dplyr 动词不成功:
datsums<- dat %>% summarise(across(str_detect("A")), sum, .names ="Asum",
across(str_detect("B")), sum, .names="Bsum",
across(str_detect("C")), sum, .names"Csum")
因为你想要按行求和,你可以使用:
library(dplyr)
dat %>%
transmute(Asum = rowSums(select(., contains('A', ignore.case = FALSE))),
Bsum = rowSums(select(., contains('B', ignore.case = FALSE))),
Csum = rowSums(select(., contains('C', ignore.case = FALSE))))
或者对于许多变量使用:
cols <- c('A', 'B', 'C')
purrr::map_dfc(cols, ~dat %>%
transmute(!!paste0(.x, 'sum') :=
rowSums(select(., contains(.x, ignore.case = FALSE)))))
# Asum Bsum Csum
#1 15 14 9
#2 18 16 10
#3 21 18 11
#4 24 20 12
使用 rowwise
和 c_across
:
library(tidyverse)
dat %>%
rowwise() %>%
summarise(
Asum = sum(c_across(contains("A"))),
Bsum = sum(c_across(contains("B"))),
Csum = sum(c_across(contains("C")))
)
Returns:
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 4 x 3
Asum Bsum Csum
<dbl> <dbl> <dbl>
1 16 14 9
2 20 16 10
3 24 18 11
4 28 20 12
要向原始 data.frame 添加列,请使用 mutate
而不是 summarise
:
dat %>%
rowwise() %>%
mutate(
Asum = sum(c_across(contains("A"))),
Bsum = sum(c_across(contains("B"))),
Csum = sum(c_across(contains("C")))
)
# A tibble: 4 x 7
# Rowwise:
Image A A.B A.B.C Asum Bsum Csum
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 5 9 16 14 9
2 2 2 6 10 20 16 10
3 3 3 7 11 24 18 11
4 4 4 8 12 28 20 12
使用pivot_longer
和pivot_wider
library(tidyverse)
dat %>%
pivot_longer(-Image) %>%
separate_rows(name, sep = "\.") %>%
pivot_wider(Image,
names_from = name,
values_from = value,
values_fn = sum,
names_prefix = "sum")
#> # A tibble: 4 x 4
#> Image sumA sumB sumC
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1 15 14 9
#> 2 2 18 16 10
#> 3 3 21 18 11
#> 4 4 24 20 12
由 reprex package (v0.3.0)
于 2020-12-07 创建