动物个体观察结果的汇总和交叉制表
Aggregation and crosstabulation of observations of animal individuals
我正在对 30 年的动物个体观察进行基本汇总,我有一个原始数据文件,其中每个动物个体观察一行,其中个体名称和观察时间是参数。
这个脚本给出了一个长table格式,我需要一个长格式:
table(dis2$who, lubridate::year(dis2$tid))
这是数据:
> dput(dis2)
structure(list(who = c("KA-3", "KA-4", "KA-4", "KA-5", "KA-6",
"KA-6", "KA-7", "KA-7", "KA-8", "KA-8", "KA-12", "KA-12", "KA-12",
"KB-4", "KB-4", "KB-4", "KB-4", "KB-4", "KB-4", "KB-4", "KB-6",
"KB-6", "KB-6", "KB-6", "KB-6", "KB-6", "KB-6", "KB-6", "KB-6",
"KB-6", "KB-6", "KB-6", "KB-6", "KB-6", "KB-7", "KB-7", "KB-7",
"KB-7", "KB-11", "KB-11", "KB-11", "KB-11", "KB-11", "KB-11",
"KB-11", "KB-11", "KB-11", "KB-11", "KB-11", "KB-11", "KC-1",
"KC-1", "KC-1", "KC-3", "KC-3", "KE-1", "KE-1", "KE-1", "KE-1",
"KE-1", "KE-1", "KE-3", "KF-1", "KF-1", "KF-1", "KF-1", "KF-1",
"KG-6", "KG-6", "KG-6", "KH-2", "KH-2", "KH-2", "KH-3", "KH-3",
"KH-4", "KH-4", "KH-5", "KH-5", "KH-6", "KH-6", "KH-7", "KH-7",
"KH-7", "KI-1", "KI-1", "KI-1", "KI-1", "KI-1", "KI-1", "KI-1",
"KI-1", "KI-1", "KI-1", "KI-1", "KI-1", "KI-1", "KI-1", "KI-1"
), film = c(14, 10, 14, 18, 10, 18, 14, 14, 10, 14, 15, 18, 19,
20, 30, 37, 47, 48, 48, 49, 23, 23, 30, 32, 35, 38, 41, 43, 47,
48, 48, 48, 49, 49, 36, 37, 43, 49, 36, 36, 36, 36, 37, 38, 39,
42, 43, 47, 48, 49, 50, 51, 998, 50, 50, 73, 73, 73, 74, 74,
74, 74, 8, 8, 8, 8, 52, 88, 90, 91, 84, 84, 92, 92, 93, 92, 104,
92, 93, 84, 93, 93, 93, 94, 999, 999, 95, 95, 96, 96, 97, 97,
97, 98, 98, 98, 99, 99, 102), frame = c(36, 15, 34, 16, 18, 15,
35, 37, 13, 35, 27, 18, 14, 17, 24, 17, 11, 22, 24, 34, 29, 31,
25, 15, 18, 12, 5, 23, 19, 9, 10, 14, 16, 17, 37, 15, 8, 14,
29, 33, 35, 37, 18, 22, 22, 33, 10, 8, 21, 36, 30, 8, 1, 2, 3,
2, 19, 36, 3, 5, 21, 8, 4, 8, 11, 14, 2, 19, 16, 16, 24, 26,
6, 21, 19, 27, 8, 30, 26, 25, 30, 20, 33, 19, 1, 2, 1, 24, 19,
22, 15, 17, 19, 5, 9, 27, 1, 8, 1), tid = structure(c(570067200,
570067200, 570067200, 570412800, 570067200, 570412800, 570067200,
570067200, 570067200, 570067200, 570067200, 570412800, 570412800,
633744000, 634435200, 634608000, 635126400, 635126400, 635126400,
635126400, 633744000, 633744000, 634435200, 634435200, 634521600,
634694400, 635040000, 635040000, 635126400, 635126400, 635126400,
635126400, 635126400, 635126400, 634608000, 634608000, 635040000,
635126400, 634608000, 634608000, 634608000, 634608000, 634608000,
634694400, 635040000, 635040000, 635040000, 635126400, 635126400,
635126400, 640483200, 640483200, 640483200, 640483200, 640483200,
659664000, 659664000, 659664000, 659664000, 659664000, 659664000,
659664000, 569721600, 569721600, 569721600, 569721600, 656035200,
668476800, 668476800, 668476800, 668476800, 668476800, 668476800,
668476800, 668476800, 668476800, 668736000, 668476800, 668476800,
668476800, 668476800, 668476800, 668476800, 668476800, 575424000,
655948800, 668563200, 668563200, 668563200, 668563200, 668563200,
668563200, 668563200, 668563200, 668563200, 668563200, 668563200,
668563200, 668649600), class = c("POSIXct", "POSIXt"), tzone = "UTC")),
class = "data.frame", row.names = c(NA,-99L))
我们可以用 'frame' 的 sum
做一个 group_by
'year' 和 'who' 和 summarise
,然后重塑成 'wide' 格式为 pivot_wider
library(dplyr)
library(tidyr)
library(lubridate)
df3 %>%
group_by(who, year = year(tid)) %>%
summarise(frame = sum(frame)) %>%
pivot_wider(names_from = year, values_from = frame)
如果我们只需要频率
df3 %>%
count(who, year = year(tid)) %>%
pivot_wider(names_from = year, values_from = n, values_fill = list(n = 0))
# A tibble: 13 x 7
# who `1992` `1993` `1994` `1995` `1996` `1991`
# <chr> <int> <int> <int> <int> <int> <int>
# 1 NP-3 9 3 4 4 1 0
# 2 NQ-1 17 0 0 0 0 0
# 3 NQ-2 5 1 0 0 0 0
# 4 NQ-5 2 0 0 0 0 0
# 5 NR-3 3 0 0 0 0 0
# 6 NT-1 1 2 3 0 0 0
# 7 NT-6 2 0 0 0 0 0
# 8 NU-1 16 0 0 0 0 0
# 9 NU-2 6 0 0 0 0 0
#10 NU-3 11 6 0 0 0 0
#11 NU-5 17 0 0 0 0 0
#12 NU-6 14 8 0 0 0 3
#13 NU-9 4 0 0 0 0 0
也许只使用 table
?
table(df3$who, lubridate::year(df3$tid))
#>
#> 1991 1992 1993 1994 1995 1996
#> NP-3 0 9 3 4 4 1
#> NQ-1 0 17 0 0 0 0
#> NQ-2 0 5 1 0 0 0
#> NQ-5 0 2 0 0 0 0
#> NR-3 0 3 0 0 0 0
#> NT-1 0 1 2 3 0 0
#> NT-6 0 2 0 0 0 0
#> NU-1 0 16 0 0 0 0
#> NU-2 0 6 0 0 0 0
#> NU-3 0 11 6 0 0 0
#> NU-5 0 17 0 0 0 0
#> NU-6 3 14 8 0 0 0
#> NU-9 0 4 0 0 0 0
编辑
更新修改后的数据:
table(dis2$who, lubridate::year(dis2$tid))
#>
#> 1988 1990 1991
#> KA-12 3 0 0
#> KA-3 1 0 0
#> KA-4 2 0 0
#> KA-5 1 0 0
#> KA-6 2 0 0
#> KA-7 2 0 0
#> KA-8 2 0 0
#> KB-11 0 12 0
#> KB-4 0 7 0
#> KB-6 0 14 0
#> KB-7 0 4 0
#> KC-1 0 3 0
#> KC-3 0 2 0
#> KE-1 0 6 0
#> KE-3 0 1 0
#> KF-1 4 1 0
#> KG-6 0 0 3
#> KH-2 0 0 3
#> KH-3 0 0 2
#> KH-4 0 0 2
#> KH-5 0 0 2
#> KH-6 0 0 2
#> KH-7 0 0 3
#> KI-1 1 1 13
我正在对 30 年的动物个体观察进行基本汇总,我有一个原始数据文件,其中每个动物个体观察一行,其中个体名称和观察时间是参数。
这个脚本给出了一个长table格式,我需要一个长格式:
table(dis2$who, lubridate::year(dis2$tid))
这是数据:
> dput(dis2)
structure(list(who = c("KA-3", "KA-4", "KA-4", "KA-5", "KA-6",
"KA-6", "KA-7", "KA-7", "KA-8", "KA-8", "KA-12", "KA-12", "KA-12",
"KB-4", "KB-4", "KB-4", "KB-4", "KB-4", "KB-4", "KB-4", "KB-6",
"KB-6", "KB-6", "KB-6", "KB-6", "KB-6", "KB-6", "KB-6", "KB-6",
"KB-6", "KB-6", "KB-6", "KB-6", "KB-6", "KB-7", "KB-7", "KB-7",
"KB-7", "KB-11", "KB-11", "KB-11", "KB-11", "KB-11", "KB-11",
"KB-11", "KB-11", "KB-11", "KB-11", "KB-11", "KB-11", "KC-1",
"KC-1", "KC-1", "KC-3", "KC-3", "KE-1", "KE-1", "KE-1", "KE-1",
"KE-1", "KE-1", "KE-3", "KF-1", "KF-1", "KF-1", "KF-1", "KF-1",
"KG-6", "KG-6", "KG-6", "KH-2", "KH-2", "KH-2", "KH-3", "KH-3",
"KH-4", "KH-4", "KH-5", "KH-5", "KH-6", "KH-6", "KH-7", "KH-7",
"KH-7", "KI-1", "KI-1", "KI-1", "KI-1", "KI-1", "KI-1", "KI-1",
"KI-1", "KI-1", "KI-1", "KI-1", "KI-1", "KI-1", "KI-1", "KI-1"
), film = c(14, 10, 14, 18, 10, 18, 14, 14, 10, 14, 15, 18, 19,
20, 30, 37, 47, 48, 48, 49, 23, 23, 30, 32, 35, 38, 41, 43, 47,
48, 48, 48, 49, 49, 36, 37, 43, 49, 36, 36, 36, 36, 37, 38, 39,
42, 43, 47, 48, 49, 50, 51, 998, 50, 50, 73, 73, 73, 74, 74,
74, 74, 8, 8, 8, 8, 52, 88, 90, 91, 84, 84, 92, 92, 93, 92, 104,
92, 93, 84, 93, 93, 93, 94, 999, 999, 95, 95, 96, 96, 97, 97,
97, 98, 98, 98, 99, 99, 102), frame = c(36, 15, 34, 16, 18, 15,
35, 37, 13, 35, 27, 18, 14, 17, 24, 17, 11, 22, 24, 34, 29, 31,
25, 15, 18, 12, 5, 23, 19, 9, 10, 14, 16, 17, 37, 15, 8, 14,
29, 33, 35, 37, 18, 22, 22, 33, 10, 8, 21, 36, 30, 8, 1, 2, 3,
2, 19, 36, 3, 5, 21, 8, 4, 8, 11, 14, 2, 19, 16, 16, 24, 26,
6, 21, 19, 27, 8, 30, 26, 25, 30, 20, 33, 19, 1, 2, 1, 24, 19,
22, 15, 17, 19, 5, 9, 27, 1, 8, 1), tid = structure(c(570067200,
570067200, 570067200, 570412800, 570067200, 570412800, 570067200,
570067200, 570067200, 570067200, 570067200, 570412800, 570412800,
633744000, 634435200, 634608000, 635126400, 635126400, 635126400,
635126400, 633744000, 633744000, 634435200, 634435200, 634521600,
634694400, 635040000, 635040000, 635126400, 635126400, 635126400,
635126400, 635126400, 635126400, 634608000, 634608000, 635040000,
635126400, 634608000, 634608000, 634608000, 634608000, 634608000,
634694400, 635040000, 635040000, 635040000, 635126400, 635126400,
635126400, 640483200, 640483200, 640483200, 640483200, 640483200,
659664000, 659664000, 659664000, 659664000, 659664000, 659664000,
659664000, 569721600, 569721600, 569721600, 569721600, 656035200,
668476800, 668476800, 668476800, 668476800, 668476800, 668476800,
668476800, 668476800, 668476800, 668736000, 668476800, 668476800,
668476800, 668476800, 668476800, 668476800, 668476800, 575424000,
655948800, 668563200, 668563200, 668563200, 668563200, 668563200,
668563200, 668563200, 668563200, 668563200, 668563200, 668563200,
668563200, 668649600), class = c("POSIXct", "POSIXt"), tzone = "UTC")),
class = "data.frame", row.names = c(NA,-99L))
我们可以用 'frame' 的 sum
做一个 group_by
'year' 和 'who' 和 summarise
,然后重塑成 'wide' 格式为 pivot_wider
library(dplyr)
library(tidyr)
library(lubridate)
df3 %>%
group_by(who, year = year(tid)) %>%
summarise(frame = sum(frame)) %>%
pivot_wider(names_from = year, values_from = frame)
如果我们只需要频率
df3 %>%
count(who, year = year(tid)) %>%
pivot_wider(names_from = year, values_from = n, values_fill = list(n = 0))
# A tibble: 13 x 7
# who `1992` `1993` `1994` `1995` `1996` `1991`
# <chr> <int> <int> <int> <int> <int> <int>
# 1 NP-3 9 3 4 4 1 0
# 2 NQ-1 17 0 0 0 0 0
# 3 NQ-2 5 1 0 0 0 0
# 4 NQ-5 2 0 0 0 0 0
# 5 NR-3 3 0 0 0 0 0
# 6 NT-1 1 2 3 0 0 0
# 7 NT-6 2 0 0 0 0 0
# 8 NU-1 16 0 0 0 0 0
# 9 NU-2 6 0 0 0 0 0
#10 NU-3 11 6 0 0 0 0
#11 NU-5 17 0 0 0 0 0
#12 NU-6 14 8 0 0 0 3
#13 NU-9 4 0 0 0 0 0
也许只使用 table
?
table(df3$who, lubridate::year(df3$tid))
#>
#> 1991 1992 1993 1994 1995 1996
#> NP-3 0 9 3 4 4 1
#> NQ-1 0 17 0 0 0 0
#> NQ-2 0 5 1 0 0 0
#> NQ-5 0 2 0 0 0 0
#> NR-3 0 3 0 0 0 0
#> NT-1 0 1 2 3 0 0
#> NT-6 0 2 0 0 0 0
#> NU-1 0 16 0 0 0 0
#> NU-2 0 6 0 0 0 0
#> NU-3 0 11 6 0 0 0
#> NU-5 0 17 0 0 0 0
#> NU-6 3 14 8 0 0 0
#> NU-9 0 4 0 0 0 0
编辑
更新修改后的数据:
table(dis2$who, lubridate::year(dis2$tid))
#>
#> 1988 1990 1991
#> KA-12 3 0 0
#> KA-3 1 0 0
#> KA-4 2 0 0
#> KA-5 1 0 0
#> KA-6 2 0 0
#> KA-7 2 0 0
#> KA-8 2 0 0
#> KB-11 0 12 0
#> KB-4 0 7 0
#> KB-6 0 14 0
#> KB-7 0 4 0
#> KC-1 0 3 0
#> KC-3 0 2 0
#> KE-1 0 6 0
#> KE-3 0 1 0
#> KF-1 4 1 0
#> KG-6 0 0 3
#> KH-2 0 0 3
#> KH-3 0 0 2
#> KH-4 0 0 2
#> KH-5 0 0 2
#> KH-6 0 0 2
#> KH-7 0 0 3
#> KI-1 1 1 13