使用 R 聚合和汇总数据集信息
Aggregate and summarise dataset information with R
我有一个育种生产力数据集:
df1
# Nest.box Obs.type individual.number Clutch
# 1 Nest1 Egg 1 First
# 2 Nest1 Egg 2 First
# 3 Nest1 Egg 3 First
# 4 Nest2 Egg 1 First
# 5 Nest2 Egg 2 First
# 6 Nest2 Egg 1 First
# 7 Nest1 Chick 1 First
# 8 Nest1 Chick 2 First
# 9 Nest2 Chick 1 First
# 10 Nest2 Chick 2 First
# 11 Nest2 Chick 1 Second
# 12 Nest2 Chick 2 Second
我想通过 Nest.box 和 Clutch 汇总来总结这些数据(显示最大鸡蛋数量, nest.box,离合器)
的最大小鸡数量
想要的输出是这样的:
output
# Nest.box Clutch Eggs Chicks
# 1 Nest1 First 3 2
# 2 Nest2 First 2 2
# 3 Nest2 Second NA 2
data.table
解决方案
library(data.table)
setDT(df)[, .(Eggs = uniqueN(individual.number[Obs.type == "Egg"]),
Chicks = uniqueN(individual.number[Obs.type == "Chick"])),
by = .(Nest.box, Clutch)]
# Nest.box Clutch Eggs Chicks
# 1: Nest1 First 3 2
# 2: Nest2 First 2 2
# 3: Nest2 Second 0 2
编辑
还有一种可能性:转换为宽格式
dcast(setDT(df), Nest.box + Clutch ~ Obs.type, value.var = "individual.number", fun.aggregate = uniqueN)
# Nest.box Clutch Chick Egg
# 1: Nest1 First 2 3
# 2: Nest2 First 2 2
# 3: Nest2 Second 2 0
Tidyverse 方法是
df %>% group_by(Nest.box, Clutch, Obs.type) %>%
summarise(max = max(individual.number)) %>%
pivot_wider(id_cols = 1:2, names_from = Obs.type, values_from = max)
# A tibble: 3 x 4
# Groups: Nest.box, Clutch [3]
Nest.box Clutch Chick Egg
<chr> <chr> <dbl> <dbl>
1 Nest1 First 2 3
2 Nest2 First 2 2
3 Nest2 Second 2 NA
这一切都可以在 tidyr::pivot_wider()
内完成 data.frames:
library(tidyr)
df1 %>%
pivot_wider(
id_cols = c(Nest.box, Clutch),
names_from = Obs.type,
values_from = individual.number,
values_fn = max
)
#> # A tibble: 3 × 4
#> Nest.box Clutch Egg Chick
#> <chr> <chr> <dbl> <dbl>
#> 1 Nest1 First 3 2
#> 2 Nest2 First 2 2
#> 3 Nest2 Second NA 2
由 reprex package (v2.0.1)
创建于 2022-04-01
数据
df1 <-
tibble::tribble(
~Nest.box, ~Obs.type, ~individual.number, ~Clutch,
"Nest1", "Egg", 1, "First",
"Nest1", "Egg", 2, "First",
"Nest1", "Egg", 3, "First",
"Nest2", "Egg", 1, "First",
"Nest2", "Egg", 2, "First",
"Nest2", "Egg", 1, "First",
"Nest1", "Chick", 1, "First",
"Nest1", "Chick", 2, "First",
"Nest2", "Chick", 1, "First",
"Nest2", "Chick", 2, "First",
"Nest2", "Chick", 1, "Second",
"Nest2", "Chick", 2, "Second"
)
我有一个育种生产力数据集:
df1
# Nest.box Obs.type individual.number Clutch
# 1 Nest1 Egg 1 First
# 2 Nest1 Egg 2 First
# 3 Nest1 Egg 3 First
# 4 Nest2 Egg 1 First
# 5 Nest2 Egg 2 First
# 6 Nest2 Egg 1 First
# 7 Nest1 Chick 1 First
# 8 Nest1 Chick 2 First
# 9 Nest2 Chick 1 First
# 10 Nest2 Chick 2 First
# 11 Nest2 Chick 1 Second
# 12 Nest2 Chick 2 Second
我想通过 Nest.box 和 Clutch 汇总来总结这些数据(显示最大鸡蛋数量, nest.box,离合器)
的最大小鸡数量想要的输出是这样的:
output
# Nest.box Clutch Eggs Chicks
# 1 Nest1 First 3 2
# 2 Nest2 First 2 2
# 3 Nest2 Second NA 2
data.table
解决方案
library(data.table)
setDT(df)[, .(Eggs = uniqueN(individual.number[Obs.type == "Egg"]),
Chicks = uniqueN(individual.number[Obs.type == "Chick"])),
by = .(Nest.box, Clutch)]
# Nest.box Clutch Eggs Chicks
# 1: Nest1 First 3 2
# 2: Nest2 First 2 2
# 3: Nest2 Second 0 2
编辑
还有一种可能性:转换为宽格式
dcast(setDT(df), Nest.box + Clutch ~ Obs.type, value.var = "individual.number", fun.aggregate = uniqueN)
# Nest.box Clutch Chick Egg
# 1: Nest1 First 2 3
# 2: Nest2 First 2 2
# 3: Nest2 Second 2 0
Tidyverse 方法是
df %>% group_by(Nest.box, Clutch, Obs.type) %>%
summarise(max = max(individual.number)) %>%
pivot_wider(id_cols = 1:2, names_from = Obs.type, values_from = max)
# A tibble: 3 x 4
# Groups: Nest.box, Clutch [3]
Nest.box Clutch Chick Egg
<chr> <chr> <dbl> <dbl>
1 Nest1 First 2 3
2 Nest2 First 2 2
3 Nest2 Second 2 NA
这一切都可以在 tidyr::pivot_wider()
内完成 data.frames:
library(tidyr)
df1 %>%
pivot_wider(
id_cols = c(Nest.box, Clutch),
names_from = Obs.type,
values_from = individual.number,
values_fn = max
)
#> # A tibble: 3 × 4
#> Nest.box Clutch Egg Chick
#> <chr> <chr> <dbl> <dbl>
#> 1 Nest1 First 3 2
#> 2 Nest2 First 2 2
#> 3 Nest2 Second NA 2
由 reprex package (v2.0.1)
创建于 2022-04-01数据
df1 <-
tibble::tribble(
~Nest.box, ~Obs.type, ~individual.number, ~Clutch,
"Nest1", "Egg", 1, "First",
"Nest1", "Egg", 2, "First",
"Nest1", "Egg", 3, "First",
"Nest2", "Egg", 1, "First",
"Nest2", "Egg", 2, "First",
"Nest2", "Egg", 1, "First",
"Nest1", "Chick", 1, "First",
"Nest1", "Chick", 2, "First",
"Nest2", "Chick", 1, "First",
"Nest2", "Chick", 2, "First",
"Nest2", "Chick", 1, "Second",
"Nest2", "Chick", 2, "Second"
)