汇总 AND,按组计数
summerize NA, count BY GROUP
我有这个测试数据框:
test_df <- structure(list(plant_sp = c("plant_1", "plant_1", "plant_2", "plant_2", "plant_3",
"plant_3", "plant_3", "plant_3", "plant_3", "plant_4",
"plant_4", "plant_4", "plant_4", "plant_4", "plant_4",
"plant_5", "plant_5", "plant_5", "plant_5", "plant_5"),
sp_rich = c(1, 1, NA, 1, NA,
1, 0, 0, NA, 0,
0, 1, 0, 0, 1,
0, NA, NA, 0,NA)),
row.names = c(NA, -20L), class = "data.frame",
.Names = c("plant_sp", "sp_rich"))
我想创建一个新的数据框,从该数据中提取数据:
表示每个组中的计数和NA(例如在组plant_1中,组中只有2个“1”和0个“NA”
你能帮帮我吗?
谢谢
伊多
这应该有效
library(dplyr)
test_df %>%
group_by(plant_sp) %>%
summarize(count = sum(sp_rich > 0 & !is.na(sp_rich)),
miss = sum(is.na(sp_rich)))
# A tibble: 5 x 3
plant_sp count miss
<chr> <int> <int>
1 plant_1 2 0
2 plant_2 1 1
3 plant_3 1 2
4 plant_4 2 0
5 plant_5 0 3
非常接近 @latlio 但使用整形:
library(dplyr)
library(tidyr)
#Code
new <- test_df %>%
mutate(Var=ifelse(is.na(sp_rich),'miss','count')) %>%
group_by(plant_sp,Var) %>%
summarise(N=n()) %>%
pivot_wider(names_from = Var,values_from=N) %>%
replace(is.na(.),0)
输出:
# A tibble: 5 x 3
# Groups: plant_sp [5]
plant_sp count miss
<chr> <int> <int>
1 plant_1 2 0
2 plant_2 1 1
3 plant_3 3 2
4 plant_4 6 0
5 plant_5 2 3
一个data.table
选项
> setDT(test_df)[, .(count = sum(na.omit(sp_rich) > 0), miss = sum(is.na(sp_rich))), plant_sp]
plant_sp count miss
1: plant_1 2 0
2: plant_2 1 1
3: plant_3 1 2
4: plant_4 2 0
5: plant_5 0 3
我有这个测试数据框:
test_df <- structure(list(plant_sp = c("plant_1", "plant_1", "plant_2", "plant_2", "plant_3",
"plant_3", "plant_3", "plant_3", "plant_3", "plant_4",
"plant_4", "plant_4", "plant_4", "plant_4", "plant_4",
"plant_5", "plant_5", "plant_5", "plant_5", "plant_5"),
sp_rich = c(1, 1, NA, 1, NA,
1, 0, 0, NA, 0,
0, 1, 0, 0, 1,
0, NA, NA, 0,NA)),
row.names = c(NA, -20L), class = "data.frame",
.Names = c("plant_sp", "sp_rich"))
我想创建一个新的数据框,从该数据中提取数据:
表示每个组中的计数和NA(例如在组plant_1中,组中只有2个“1”和0个“NA”
你能帮帮我吗? 谢谢 伊多
这应该有效
library(dplyr)
test_df %>%
group_by(plant_sp) %>%
summarize(count = sum(sp_rich > 0 & !is.na(sp_rich)),
miss = sum(is.na(sp_rich)))
# A tibble: 5 x 3
plant_sp count miss
<chr> <int> <int>
1 plant_1 2 0
2 plant_2 1 1
3 plant_3 1 2
4 plant_4 2 0
5 plant_5 0 3
非常接近 @latlio 但使用整形:
library(dplyr)
library(tidyr)
#Code
new <- test_df %>%
mutate(Var=ifelse(is.na(sp_rich),'miss','count')) %>%
group_by(plant_sp,Var) %>%
summarise(N=n()) %>%
pivot_wider(names_from = Var,values_from=N) %>%
replace(is.na(.),0)
输出:
# A tibble: 5 x 3
# Groups: plant_sp [5]
plant_sp count miss
<chr> <int> <int>
1 plant_1 2 0
2 plant_2 1 1
3 plant_3 3 2
4 plant_4 6 0
5 plant_5 2 3
一个data.table
选项
> setDT(test_df)[, .(count = sum(na.omit(sp_rich) > 0), miss = sum(is.na(sp_rich))), plant_sp]
plant_sp count miss
1: plant_1 2 0
2: plant_2 1 1
3: plant_3 1 2
4: plant_4 2 0
5: plant_5 0 3