如何在 R dplyr/tidyr 的单个命令中多次使用 dplyr::summarize?
How to use dplyr::summarize multiple times in a single command in R dplyr/ tidyr?
我有一个物种 1、2、3 和 4 的群落。我正在尝试使用 dplyr 计算物种 i 与相互物种的组合丰度之间的协方差。我想为每个物种组合做这个。 dplyr 只适用于一个物种,但当我尝试一起进行多个总结时就不行了。有什么建议吗?
set.seed(111)
month <- rep(c("J","J","J","F","F","F"), time = 3)
site <- rep(c(1,2,3), each = 6)
quadrant <- rep(c(1,2,3), times = 6)
sp1 <- sample(0:20, 18, replace = TRUE)
sp2 <- sample(0:15, 18, replace = TRUE)
sp3 <- sample(0:10, 18, replace = TRUE)
sp4 <- sample(0:4, 18, replace = TRUE)
df <- data.frame(month, site, quadrant, sp1, sp2, sp3, sp4)
df$sp2.3.4 <- df$sp2 + df$sp3 + df$sp4 #no sp1
df$sp3.4.1 <- df$sp3 + df$sp4 + df$sp1 #no sp2
df$sp1.2.4 <- df$sp1 + df$sp2 + df$sp4 #no sp3
df$sp1.2.3 <- df$sp1 + df$sp2 + df$sp3 #no sp4
library(tidyr)
df.long <- gather(df,
key = "species",
value = "abundance",
sp1, sp2, sp3, sp4)
df.long <- gather(df.long,
key = "species.covar",
value = "abundance.covar",
sp2.3.4, sp3.4.1, sp1.2.4, sp1.2.3)
df.long$species <- as.factor(as.character(df.long$species))
df.long$species.covar <- as.factor(as.character(df.long$species.covar))
library(dplyr)
agg.cov <- df.long%>%
group_by(month,site)%>%
dplyr::summarise(covar.species1 = cor(abundance[species=="sp1"],abundance.covar[species.covar=="sp2.3.4"]))%>%
as.data.frame()
agg.cov <- df.long%>%
group_by(month,site)%>%
dplyr::summarise(covar.species1 = cor(abundance[species=="sp1"],abundance.covar[species.covar=="sp2.3.4"]))%>%
dplyr::summarise(covar.species2 = cor(abundance[species=="sp2"],abundance.covar[species.covar=="sp3.4.1"]))%>%
dplyr::summarise(covar.species3 = cor(abundance[species=="sp3"],abundance.covar[species.covar=="sp1.2.4"]))%>%
dplyr::summarise(covar.species4 = cor(abundance[species=="sp4"],abundance.covar[species.covar=="sp1.2.3"]))%>%
as.data.frame()
Error: Error: Problem with `summarise()` column `covar.species2`.
ℹ `covar.species2 = cor(...)`.
x object 'abundance.covar' not found
ℹ The error occurred in group 1: month = "F".
我无法让您的示例工作,但一个潜在的解决方案(我猜)是:
agg.cov <- df.long %>%
group_by(month,site)%>%
dplyr::summarise(covar.species1 = cor(abundance[species=="sp1"],abundance.covar[species.covar=="sp2.3.4"]),
covar.species2 = cor(abundance[species=="sp2"],abundance.covar[species.covar=="sp3.4.1"]),
covar.species3 = cor(abundance[species=="sp3"],abundance.covar[species.covar=="sp1.2.4"]),
covar.species4 = cor(abundance[species=="sp4"],abundance.covar[species.covar=="sp1.2.3"]))%>%
as.data.frame()
下面三种方法应该有效
map
- 基于
library(dplyr)
library(stringr)
library(purrr)
nm1 <- names(df)[startsWith(names(df), "sp")]
map(nm1, ~ df %>%
group_by(month, site) %>%
summarise(!!str_c("covar_species", "_", .x) :=
cor(!! rlang::sym(.x), rowSums(select(cur_data(), nm1, - !!.x)) ),
.groups = 'drop')) %>%
reduce(left_join)
-输出
# A tibble: 6 x 6
month site covar_species_sp1 covar_species_sp2 covar_species_sp3 covar_species_sp4
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 F 1 0.479 0.987 -0.170 -0.980
2 F 2 -0.858 -0.454 -0.160 0.359
3 F 3 -0.999 -1.00 -0.933 NA
4 J 1 -0.945 -0.963 NA 0.596
5 J 2 -0.516 -0.148 -0.792 0.629
6 J 3 0.277 -0.591 -0.702 0.277
- 用
pivot_longer
重塑
library(tidyr)
df %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = starts_with('sp'), names_to = "sp") %>%
group_by(rn) %>%
mutate(newvalue = sum(value) - value) %>%
group_by(month, site, sp = str_c('covar_species_', sp)) %>%
summarise(value = cor(value, newvalue), .groups = 'drop') %>%
pivot_wider(names_from = sp, values_from = value)
-输出
# A tibble: 6 x 6
month site covar_species_sp1 covar_species_sp2 covar_species_sp3 covar_species_sp4
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 F 1 0.479 0.987 -0.170 -0.980
2 F 2 -0.858 -0.454 -0.160 0.359
3 F 3 -0.999 -1.00 -0.933 NA
4 J 1 -0.945 -0.963 NA 0.596
5 J 2 -0.516 -0.148 -0.792 0.629
6 J 3 0.277 -0.591 -0.702 0.277
- 使用
across
df %>%
mutate(Sum = select(cur_data(), starts_with('sp')) %>%
rowSums) %>%
group_by(month, site) %>%
summarise(across(starts_with('sp'),
~ cor(., Sum - .), .names = "covar_species_{.col}"), .groups = 'drop')
-输出
# A tibble: 6 x 6
month site covar_species_sp1 covar_species_sp2 covar_species_sp3 covar_species_sp4
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 F 1 0.479 0.987 -0.170 -0.980
2 F 2 -0.858 -0.454 -0.160 0.359
3 F 3 -0.999 -1.00 -0.933 NA
4 J 1 -0.945 -0.963 NA 0.596
5 J 2 -0.516 -0.148 -0.792 0.629
6 J 3 0.277 -0.591 -0.702 0.277
数据
set.seed(111)
month <- rep(c("J","J","J","F","F","F"), time = 3)
site <- rep(c(1,2,3), each = 6)
quadrant <- rep(c(1,2,3), times = 6)
sp1 <- sample(0:20, 18, replace = TRUE)
sp2 <- sample(0:15, 18, replace = TRUE)
sp3 <- sample(0:10, 18, replace = TRUE)
sp4 <- sample(0:4, 18, replace = TRUE)
df <- data.frame(month, site, quadrant, sp1, sp2, sp3, sp4)
我有一个物种 1、2、3 和 4 的群落。我正在尝试使用 dplyr 计算物种 i 与相互物种的组合丰度之间的协方差。我想为每个物种组合做这个。 dplyr 只适用于一个物种,但当我尝试一起进行多个总结时就不行了。有什么建议吗?
set.seed(111)
month <- rep(c("J","J","J","F","F","F"), time = 3)
site <- rep(c(1,2,3), each = 6)
quadrant <- rep(c(1,2,3), times = 6)
sp1 <- sample(0:20, 18, replace = TRUE)
sp2 <- sample(0:15, 18, replace = TRUE)
sp3 <- sample(0:10, 18, replace = TRUE)
sp4 <- sample(0:4, 18, replace = TRUE)
df <- data.frame(month, site, quadrant, sp1, sp2, sp3, sp4)
df$sp2.3.4 <- df$sp2 + df$sp3 + df$sp4 #no sp1
df$sp3.4.1 <- df$sp3 + df$sp4 + df$sp1 #no sp2
df$sp1.2.4 <- df$sp1 + df$sp2 + df$sp4 #no sp3
df$sp1.2.3 <- df$sp1 + df$sp2 + df$sp3 #no sp4
library(tidyr)
df.long <- gather(df,
key = "species",
value = "abundance",
sp1, sp2, sp3, sp4)
df.long <- gather(df.long,
key = "species.covar",
value = "abundance.covar",
sp2.3.4, sp3.4.1, sp1.2.4, sp1.2.3)
df.long$species <- as.factor(as.character(df.long$species))
df.long$species.covar <- as.factor(as.character(df.long$species.covar))
library(dplyr)
agg.cov <- df.long%>%
group_by(month,site)%>%
dplyr::summarise(covar.species1 = cor(abundance[species=="sp1"],abundance.covar[species.covar=="sp2.3.4"]))%>%
as.data.frame()
agg.cov <- df.long%>%
group_by(month,site)%>%
dplyr::summarise(covar.species1 = cor(abundance[species=="sp1"],abundance.covar[species.covar=="sp2.3.4"]))%>%
dplyr::summarise(covar.species2 = cor(abundance[species=="sp2"],abundance.covar[species.covar=="sp3.4.1"]))%>%
dplyr::summarise(covar.species3 = cor(abundance[species=="sp3"],abundance.covar[species.covar=="sp1.2.4"]))%>%
dplyr::summarise(covar.species4 = cor(abundance[species=="sp4"],abundance.covar[species.covar=="sp1.2.3"]))%>%
as.data.frame()
Error: Error: Problem with `summarise()` column `covar.species2`.
ℹ `covar.species2 = cor(...)`.
x object 'abundance.covar' not found
ℹ The error occurred in group 1: month = "F".
我无法让您的示例工作,但一个潜在的解决方案(我猜)是:
agg.cov <- df.long %>%
group_by(month,site)%>%
dplyr::summarise(covar.species1 = cor(abundance[species=="sp1"],abundance.covar[species.covar=="sp2.3.4"]),
covar.species2 = cor(abundance[species=="sp2"],abundance.covar[species.covar=="sp3.4.1"]),
covar.species3 = cor(abundance[species=="sp3"],abundance.covar[species.covar=="sp1.2.4"]),
covar.species4 = cor(abundance[species=="sp4"],abundance.covar[species.covar=="sp1.2.3"]))%>%
as.data.frame()
下面三种方法应该有效
map
- 基于
library(dplyr)
library(stringr)
library(purrr)
nm1 <- names(df)[startsWith(names(df), "sp")]
map(nm1, ~ df %>%
group_by(month, site) %>%
summarise(!!str_c("covar_species", "_", .x) :=
cor(!! rlang::sym(.x), rowSums(select(cur_data(), nm1, - !!.x)) ),
.groups = 'drop')) %>%
reduce(left_join)
-输出
# A tibble: 6 x 6
month site covar_species_sp1 covar_species_sp2 covar_species_sp3 covar_species_sp4
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 F 1 0.479 0.987 -0.170 -0.980
2 F 2 -0.858 -0.454 -0.160 0.359
3 F 3 -0.999 -1.00 -0.933 NA
4 J 1 -0.945 -0.963 NA 0.596
5 J 2 -0.516 -0.148 -0.792 0.629
6 J 3 0.277 -0.591 -0.702 0.277
- 用
pivot_longer
重塑
library(tidyr)
df %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = starts_with('sp'), names_to = "sp") %>%
group_by(rn) %>%
mutate(newvalue = sum(value) - value) %>%
group_by(month, site, sp = str_c('covar_species_', sp)) %>%
summarise(value = cor(value, newvalue), .groups = 'drop') %>%
pivot_wider(names_from = sp, values_from = value)
-输出
# A tibble: 6 x 6
month site covar_species_sp1 covar_species_sp2 covar_species_sp3 covar_species_sp4
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 F 1 0.479 0.987 -0.170 -0.980
2 F 2 -0.858 -0.454 -0.160 0.359
3 F 3 -0.999 -1.00 -0.933 NA
4 J 1 -0.945 -0.963 NA 0.596
5 J 2 -0.516 -0.148 -0.792 0.629
6 J 3 0.277 -0.591 -0.702 0.277
- 使用
across
df %>%
mutate(Sum = select(cur_data(), starts_with('sp')) %>%
rowSums) %>%
group_by(month, site) %>%
summarise(across(starts_with('sp'),
~ cor(., Sum - .), .names = "covar_species_{.col}"), .groups = 'drop')
-输出
# A tibble: 6 x 6
month site covar_species_sp1 covar_species_sp2 covar_species_sp3 covar_species_sp4
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 F 1 0.479 0.987 -0.170 -0.980
2 F 2 -0.858 -0.454 -0.160 0.359
3 F 3 -0.999 -1.00 -0.933 NA
4 J 1 -0.945 -0.963 NA 0.596
5 J 2 -0.516 -0.148 -0.792 0.629
6 J 3 0.277 -0.591 -0.702 0.277
数据
set.seed(111)
month <- rep(c("J","J","J","F","F","F"), time = 3)
site <- rep(c(1,2,3), each = 6)
quadrant <- rep(c(1,2,3), times = 6)
sp1 <- sample(0:20, 18, replace = TRUE)
sp2 <- sample(0:15, 18, replace = TRUE)
sp3 <- sample(0:10, 18, replace = TRUE)
sp4 <- sample(0:4, 18, replace = TRUE)
df <- data.frame(month, site, quadrant, sp1, sp2, sp3, sp4)