通过包含字符串的变量跨行进行变异或汇总

mutate or summarise across rows by variable containing string

我想创建一个新数据 table,它是包含字符串的变量的行的总和。作为菜鸟,我一直在尝试使用新的 dplyr across 将其保留在 tidyverse 中。非常感谢帮助。

dat<- data.frame("Image" = c(1,2,3,4), 
                 "A" = c(1,2,3,4),
                 "A:B"= c(5,6,7,8),
                 "A:B:C"= c(9,10,11,12))

获取包含“A”、“B”或“C”的变量行的总和。

datsums<- data.frame("Image" = c(1,2,3,4),
                     "Asum"= c(15,18,21,24),
                     "Bsum"=c(14,16,18,20),
                     "Csum"=c(9,10,11,12))

我使用较新的 dplyr 动词不成功:

datsums<- dat %>% summarise(across(str_detect("A")), sum, .names ="Asum",
across(str_detect("B")), sum, .names="Bsum",
across(str_detect("C")), sum, .names"Csum")

因为你想要按行求和,你可以使用:

library(dplyr)

dat %>% 
  transmute(Asum = rowSums(select(., contains('A', ignore.case = FALSE))), 
            Bsum = rowSums(select(., contains('B', ignore.case = FALSE))), 
            Csum = rowSums(select(., contains('C', ignore.case = FALSE))))

或者对于许多变量使用:

cols <- c('A', 'B', 'C')
purrr::map_dfc(cols, ~dat %>% 
               transmute(!!paste0(.x, 'sum') := 
                  rowSums(select(., contains(.x, ignore.case = FALSE)))))

#  Asum Bsum Csum
#1   15   14    9
#2   18   16   10
#3   21   18   11
#4   24   20   12

使用 rowwisec_across:

library(tidyverse)

dat %>% 
  rowwise() %>% 
  summarise(
    Asum = sum(c_across(contains("A"))),
    Bsum = sum(c_across(contains("B"))),
    Csum = sum(c_across(contains("C")))
  )

Returns:

`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 4 x 3
   Asum  Bsum  Csum
  <dbl> <dbl> <dbl>
1    16    14     9
2    20    16    10
3    24    18    11
4    28    20    12

要向原始 data.frame 添加列,请使用 mutate 而不是 summarise

dat %>% 
  rowwise() %>% 
  mutate(
    Asum = sum(c_across(contains("A"))),
    Bsum = sum(c_across(contains("B"))),
    Csum = sum(c_across(contains("C")))
  )
# A tibble: 4 x 7
# Rowwise: 
  Image     A   A.B A.B.C  Asum  Bsum  Csum
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     1     1     5     9    16    14     9
2     2     2     6    10    20    16    10
3     3     3     7    11    24    18    11
4     4     4     8    12    28    20    12

使用pivot_longerpivot_wider

library(tidyverse)

dat %>% 
  pivot_longer(-Image) %>% 
  separate_rows(name, sep = "\.") %>% 
  pivot_wider(Image,
              names_from = name,
              values_from = value, 
              values_fn = sum, 
              names_prefix = "sum")
#> # A tibble: 4 x 4
#>   Image  sumA  sumB  sumC
#>   <dbl> <dbl> <dbl> <dbl>
#> 1     1    15    14     9
#> 2     2    18    16    10
#> 3     3    21    18    11
#> 4     4    24    20    12

reprex package (v0.3.0)

于 2020-12-07 创建