汇总R中列表中的数据

Summarising data in a list in R

我有很多数据框都包含在一个名为 1a1 的列表中,列表中的名称是收集数据的日期,例如

 names(1a1)
[1] "Jan4" "Jan5" "Jan6" "Jan7" "Jan8" "Jan9" "Jan10"

列表中的所有数据帧都是相同的格式

例如

 dput(Jan4)
structure(list(Species = c("bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti"), Pit.tag = c("01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD77C", "01103FD77C", "01103FD77C", "01103FD77C", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD77C", "01103FD77C", "01103FD77C", 
"01103FD77C", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103F9F29", 
"01103F9F29", "01103F9F29", "01103F9F29", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "0700EDADB8", "01103F9F29", "01103F9F29", 
"01103F9F29", "01103F9F29", "0700EDADB8", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "01103F9F29", "01103F9F29", "01103F9F29", 
"01103F9F29"), Date = c("04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021"), Time = c("08:01:41", 
"08:01:42", "08:01:42", "08:03:09", "08:03:09", "08:03:10", "08:02:57", 
"08:02:57", "08:02:58", "08:03:36", "08:01:41", "08:01:42", "08:01:42", 
"08:03:09", "08:03:09", "08:03:10", "08:01:41", "08:01:42", "08:01:42", 
"08:03:09", "08:03:09", "08:03:10", "08:01:41", "08:01:42", "08:01:42", 
"08:03:09", "08:03:09", "08:03:10", "08:02:57", "08:02:57", "08:02:58", 
"08:03:36", "08:01:41", "08:01:42", "08:01:42", "08:03:09", "08:03:09", 
"08:03:10", "08:01:41", "08:01:42", "08:01:42", "08:03:09", "08:03:09", 
"08:03:10", "08:01:41", "08:01:42", "08:01:42", "08:03:09", "08:03:09", 
"08:03:10", "08:02:26", "08:02:26", "08:03:37", "08:03:38", "08:00:43", 
"08:00:44", "08:00:44", "08:01:39", "08:01:39", "08:01:40", "08:01:40", 
"08:02:54", "08:02:54", "08:02:55", "08:00:43", "08:00:44", "08:00:44", 
"08:01:39", "08:01:39", "08:01:40", "08:01:40", "08:02:54", "08:02:54", 
"08:02:55", "08:02:26", "08:02:26", "08:03:37", "08:03:38", "08:00:43", 
"08:00:44", "08:00:44", "08:01:39", "08:01:39", "08:01:40", "08:01:40", 
"08:02:54", "08:02:54", "08:02:55", "08:02:26", "08:02:26", "08:03:37", 
"08:03:38")), row.names = c(NA, -92L), class = "data.frame")

我想做的是创建一个新的数据框来总结这些数据,在那里我可以看到一个个体 Pit.tag 被看到了多少次(创建一个名为 No_of_visits 的新变量)以及Species它属于

例如

Pit.tag     Species   No_of_visits
01103FD6EF   bluti    47

我可以通过使用

的变体获得我需要的大部分信息
Visitsbypit<-sapply(tapply(1a1$`Jan4`$Species, 1a1$`Jan4`$Pit.tag, length), unique)
Vistsbyspecies<-sapply(tapply(1a1$`Jan4`$Pit.tag, 1a1$`Jan4`$Species, length), unique)


Visitsbypit
01103F9776 01103FA8DD 01103FC9DE 
        10        133        255

Vistsbyspecies
greti bluti 
   10   388

注意:上面的这些数据与我在此处提供的数据不匹配,它们来自另一个数据框

这也不能真正让我得到我想要的东西,所以 Visitsbyspecies 只是每个物种的总访问量,而不是 linked 到 Pit.tag 记录。我想link将Visitsbypit中的Pit.tag记录到它们对应的物种

但我无法进行下一次飞跃并将这些信息放在一起。 我也在想,必须有更好的方法来在列表中完成这项工作,而不是必须为例如指定。 1a1$`Jan4`$Species 在我看来,这违背了将数据框放入列表并使用 sapplytapply

的目的

你想要这个吗?

library(dplyr)

Jan4 %>% count(Species, Pit.tag)

#  Species    Pit.tag  n
#1   bluti 01103FD6EF 42
#2   bluti 01103FD77C  8
#3   greti 01103F9F29 12
#4   greti 0700EDADB8 30

要应用于数据帧列表,请使用 lapply/purrr::map -

lapply(`1a1`, function(x) x %>% count(Species, Pit.tag))