使用 R 聚合和汇总数据集信息

Aggregate and summarise dataset information with R

我有一个育种生产力数据集:

df1
    # Nest.box Obs.type individual.number Clutch
    # 1 Nest1 Egg 1 First
    # 2 Nest1 Egg 2 First
    # 3 Nest1 Egg 3 First
    # 4 Nest2 Egg 1 First
    # 5 Nest2 Egg 2 First
    # 6 Nest2 Egg 1 First
    # 7 Nest1 Chick 1 First
    # 8 Nest1 Chick 2 First
    # 9 Nest2 Chick 1 First
    # 10 Nest2 Chick 2 First
    # 11 Nest2 Chick 1 Second
    # 12 Nest2 Chick 2 Second

我想通过 Nest.boxClutch 汇总来总结这些数据(显示最大鸡蛋数量, nest.box,离合器)

的最大小鸡数量

想要的输出是这样的:

output
        # Nest.box Clutch Eggs Chicks
        # 1 Nest1 First 3 2
        # 2 Nest2 First 2 2
        # 3 Nest2 Second NA 2

data.table解决方案

library(data.table)
setDT(df)[, .(Eggs = uniqueN(individual.number[Obs.type == "Egg"]),
              Chicks = uniqueN(individual.number[Obs.type == "Chick"])), 
          by = .(Nest.box, Clutch)]

#    Nest.box Clutch Eggs Chicks
# 1:    Nest1  First    3      2
# 2:    Nest2  First    2      2
# 3:    Nest2 Second    0      2

编辑

还有一种可能性:转换为宽格式

dcast(setDT(df), Nest.box + Clutch ~ Obs.type, value.var = "individual.number", fun.aggregate = uniqueN)
#    Nest.box Clutch Chick Egg
# 1:    Nest1  First     2   3
# 2:    Nest2  First     2   2
# 3:    Nest2 Second     2   0

Tidyverse 方法是

df %>% group_by(Nest.box, Clutch, Obs.type) %>% 
  summarise(max = max(individual.number)) %>% 
  pivot_wider(id_cols = 1:2, names_from = Obs.type, values_from = max)

# A tibble: 3 x 4
# Groups:   Nest.box, Clutch [3]
  Nest.box Clutch Chick   Egg
  <chr>    <chr>  <dbl> <dbl>
1 Nest1    First      2     3
2 Nest2    First      2     2
3 Nest2    Second     2    NA

这一切都可以在 tidyr::pivot_wider() 内完成 data.frames:

library(tidyr)

df1 %>% 
  pivot_wider(
    id_cols = c(Nest.box, Clutch),
    names_from = Obs.type, 
    values_from = individual.number, 
    values_fn = max
  )
#> # A tibble: 3 × 4
#>   Nest.box Clutch   Egg Chick
#>   <chr>    <chr>  <dbl> <dbl>
#> 1 Nest1    First      3     2
#> 2 Nest2    First      2     2
#> 3 Nest2    Second    NA     2

reprex package (v2.0.1)

创建于 2022-04-01

数据

df1 <- 
  tibble::tribble(
    ~Nest.box, ~Obs.type, ~individual.number, ~Clutch,
    "Nest1", "Egg", 1, "First",
    "Nest1", "Egg", 2, "First",
    "Nest1", "Egg", 3, "First",
    "Nest2", "Egg", 1, "First",
    "Nest2", "Egg", 2, "First",
    "Nest2", "Egg", 1, "First",
    "Nest1", "Chick", 1, "First",
    "Nest1", "Chick", 2, "First",
    "Nest2", "Chick", 1, "First",
    "Nest2", "Chick", 2, "First",
    "Nest2", "Chick", 1, "Second",
    "Nest2", "Chick", 2, "Second"
  )