如何在 R dplyr/tidyr 的单个命令中多次使用 dplyr::summarize?

How to use dplyr::summarize multiple times in a single command in R dplyr/ tidyr?

我有一个物种 1、2、3 和 4 的群落。我正在尝试使用 dplyr 计算物种 i 与相互物种的组合丰度之间的协方差。我想为每个物种组合做这个。 dplyr 只适用于一个物种,但当我尝试一起进行多个总结时就不行了。有什么建议吗?

set.seed(111)
month <- rep(c("J","J","J","F","F","F"), time = 3)
site <- rep(c(1,2,3), each = 6)
quadrant <- rep(c(1,2,3), times = 6)
sp1 <- sample(0:20, 18, replace = TRUE)
sp2 <- sample(0:15, 18, replace = TRUE)
sp3 <- sample(0:10, 18, replace = TRUE)
sp4 <- sample(0:4, 18, replace = TRUE)

df <- data.frame(month, site, quadrant, sp1, sp2, sp3, sp4)

df$sp2.3.4 <- df$sp2 + df$sp3 + df$sp4 #no sp1
df$sp3.4.1 <- df$sp3 + df$sp4 + df$sp1 #no sp2
df$sp1.2.4 <- df$sp1 + df$sp2 + df$sp4 #no sp3
df$sp1.2.3 <- df$sp1 + df$sp2 + df$sp3 #no sp4

library(tidyr)

df.long <- gather(df,
                  key = "species",
                  value = "abundance",
                  sp1, sp2, sp3, sp4)


df.long <- gather(df.long,
                  key = "species.covar",
                  value = "abundance.covar",
                  sp2.3.4, sp3.4.1, sp1.2.4, sp1.2.3)

df.long$species <- as.factor(as.character(df.long$species))
df.long$species.covar <- as.factor(as.character(df.long$species.covar))

library(dplyr)

agg.cov <- df.long%>% 
  group_by(month,site)%>% 
  dplyr::summarise(covar.species1 = cor(abundance[species=="sp1"],abundance.covar[species.covar=="sp2.3.4"]))%>%
  as.data.frame()



  agg.cov <- df.long%>% 
  group_by(month,site)%>% 
  dplyr::summarise(covar.species1 = cor(abundance[species=="sp1"],abundance.covar[species.covar=="sp2.3.4"]))%>%
  dplyr::summarise(covar.species2 = cor(abundance[species=="sp2"],abundance.covar[species.covar=="sp3.4.1"]))%>%
  dplyr::summarise(covar.species3 = cor(abundance[species=="sp3"],abundance.covar[species.covar=="sp1.2.4"]))%>%
  dplyr::summarise(covar.species4 = cor(abundance[species=="sp4"],abundance.covar[species.covar=="sp1.2.3"]))%>%
  as.data.frame()

Error: Error: Problem with `summarise()` column `covar.species2`.
ℹ `covar.species2 = cor(...)`.
x object 'abundance.covar' not found
ℹ The error occurred in group 1: month = "F".

我无法让您的示例工作,但一个潜在的解决方案(我猜)是:

agg.cov <- df.long %>% 
  group_by(month,site)%>% 
  dplyr::summarise(covar.species1 = cor(abundance[species=="sp1"],abundance.covar[species.covar=="sp2.3.4"]),
                   covar.species2 = cor(abundance[species=="sp2"],abundance.covar[species.covar=="sp3.4.1"]),
                   covar.species3 = cor(abundance[species=="sp3"],abundance.covar[species.covar=="sp1.2.4"]),
                   covar.species4 = cor(abundance[species=="sp4"],abundance.covar[species.covar=="sp1.2.3"]))%>%
  as.data.frame()

下面三种方法应该有效

  1. map - 基于
library(dplyr)
library(stringr)
library(purrr)
nm1 <- names(df)[startsWith(names(df), "sp")]
 
map(nm1, ~  df %>% 
     group_by(month, site) %>%
     summarise(!!str_c("covar_species", "_", .x) := 
         cor(!! rlang::sym(.x), rowSums(select(cur_data(), nm1, - !!.x)) ),
          .groups = 'drop')) %>%
      reduce(left_join)

-输出

# A tibble: 6 x 6
  month  site covar_species_sp1 covar_species_sp2 covar_species_sp3 covar_species_sp4
  <chr> <dbl>             <dbl>             <dbl>             <dbl>             <dbl>
1 F         1             0.479             0.987            -0.170            -0.980
2 F         2            -0.858            -0.454            -0.160             0.359
3 F         3            -0.999            -1.00             -0.933            NA    
4 J         1            -0.945            -0.963            NA                 0.596
5 J         2            -0.516            -0.148            -0.792             0.629
6 J         3             0.277            -0.591            -0.702             0.277

  1. pivot_longer重塑
library(tidyr)
df %>% 
   mutate(rn = row_number()) %>% 
   pivot_longer(cols = starts_with('sp'), names_to = "sp") %>% 
   group_by(rn) %>%
   mutate(newvalue = sum(value) - value)  %>%
   group_by(month, site, sp = str_c('covar_species_', sp)) %>% 
   summarise(value = cor(value, newvalue), .groups = 'drop') %>% 
   pivot_wider(names_from = sp, values_from = value)

-输出

# A tibble: 6 x 6
  month  site covar_species_sp1 covar_species_sp2 covar_species_sp3 covar_species_sp4
  <chr> <dbl>             <dbl>             <dbl>             <dbl>             <dbl>
1 F         1             0.479             0.987            -0.170            -0.980
2 F         2            -0.858            -0.454            -0.160             0.359
3 F         3            -0.999            -1.00             -0.933            NA    
4 J         1            -0.945            -0.963            NA                 0.596
5 J         2            -0.516            -0.148            -0.792             0.629
6 J         3             0.277            -0.591            -0.702             0.277

  1. 使用across
df %>%
     mutate(Sum = select(cur_data(), starts_with('sp')) %>%
             rowSums) %>% 
     group_by(month, site) %>% 
     summarise(across(starts_with('sp'),
       ~ cor(., Sum - .), .names = "covar_species_{.col}"), .groups = 'drop')

-输出

# A tibble: 6 x 6
  month  site covar_species_sp1 covar_species_sp2 covar_species_sp3 covar_species_sp4
  <chr> <dbl>             <dbl>             <dbl>             <dbl>             <dbl>
1 F         1             0.479             0.987            -0.170            -0.980
2 F         2            -0.858            -0.454            -0.160             0.359
3 F         3            -0.999            -1.00             -0.933            NA    
4 J         1            -0.945            -0.963            NA                 0.596
5 J         2            -0.516            -0.148            -0.792             0.629
6 J         3             0.277            -0.591            -0.702             0.277

数据

set.seed(111)
month <- rep(c("J","J","J","F","F","F"), time = 3)
site <- rep(c(1,2,3), each = 6)
quadrant <- rep(c(1,2,3), times = 6)
sp1 <- sample(0:20, 18, replace = TRUE)
sp2 <- sample(0:15, 18, replace = TRUE)
sp3 <- sample(0:10, 18, replace = TRUE)
sp4 <- sample(0:4, 18, replace = TRUE)

df <- data.frame(month, site, quadrant, sp1, sp2, sp3, sp4)