确定两个分组变量的每个组合中计数和总和最高的因素

Identify the factors with the highest counts and sums within each combination of two grouping variables

对于这项研究,我们记录了 100m2 圆形区域内每棵高度 > 1.5m 和直径 >1.8 cm 的树的种类和胸径 (dbh)阴谋。采样了 100 多个圆形图。

我想在四个直径 classes 中找出最具代表性的树种。直径 classes 是树苗 (2.5-8 cm)、杆 (>8-18 cm)、成熟 (>18-28 cm) 和大 (>28 cm)。

如果可能的话,我想要一个例子,说明如何 select 每个 class 中最具代表性的物种按密度(每个 class 中每个物种的数量)和体积(基础面积).

我已经在 crossvalidated.com 上发布了关于哪种方法(密度或体积)更合适的问题...

https://stats.stackexchange.com/q/148734/57117

请注意,每个样本站点的名称 (location) 不遵循任何模式。这些名称是根据它们从中进行子采样的数据集中的位置分配的。此外,如果某个站点的某个直径 class 内没有物种,则 NA 将起作用。

这是一个示例数据集,其中采样了 15 个位置并测量了 1,200 棵树。我的数据与以下示例数据非常相似。

tree.species<-c("PSME", "PIEN", "LAOC", "POTR", "SALIX")
tree.diameters<- data.frame(location= paste0(sample(LETTERS[c(2,4,6,8)], 4, replace=TRUE),sample(seq(006,250,57), 1200, replace = TRUE)), 
                            dbh= c(rep(3.81,200),rnorm(350, mean = 6.32, sd=1.5),rnorm(50, mean = 75, sd=6),
                                   runif(550, min=20, max=100),rnorm(50, mean = 150, sd=2.3)), #Units in cm
                            species = factor(sample(tree.species, 1200, replace=TRUE)))

#Add basal area (m sq. per ha)
tree.diameters$basal.area=100*(pi*((tree.diameters$dbh/2)^2)/(4*10000)) 

#Order the data
tree.diameters<-tree.diameters[order(tree.diameters$location, tree.diameters$dbh,tree.diameters$species),]

> head(tree.diameters, n=15)
    location      dbh species basal.area
426     B120 3.303363    PSME 0.02142607
358     B120 3.657538   SALIX 0.02626682
450     B120 3.667190    PSME 0.02640565
150     B120 3.810000    PIEN 0.02850230
94      B120 3.810000    POTR 0.02850230
10      B120 3.810000    PSME 0.02850230
90      B120 3.810000    PSME 0.02850230
18      B120 3.810000   SALIX 0.02850230
134     B120 3.810000   SALIX 0.02850230
194     B120 3.810000   SALIX 0.02850230
274     B120 3.979974   SALIX 0.03110214
290     B120 5.345510   SALIX 0.05610586
310     B120 5.480217    POTR 0.05896921
254     B120 5.625061   SALIX 0.06212757
478     B120 5.852126    LAOC 0.06724456

我一直在为 density 研究 selecting 的一般方法看起来类似于下面的 location=="B120" & location=="B177" 代码,但我不确定如何迭代它并将所有结果放入列表或 data.frame。我也不确定如何为 volume (basal.area)

计算相同的东西
> names(which(table(subset(tree.diameters, 
+                          location == "B120" & dbh < 8, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B120" & dbh < 8,
+                                select=species)))))
[1] "SALIX"
> 
> names(which(table(subset(tree.diameters, 
+                          location == "B120" & dbh >= 8 | dbh < 18, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B120" & dbh >= 8 | dbh < 18,
+                                select=species)))))
[1] "POTR"
> 
> names(which(table(subset(tree.diameters, 
+                          location == "B120" & dbh >= 18 | dbh < 28, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B120" & dbh >= 18 | dbh < 28,
+                                select=species)))))
[1] "POTR"
> names(which(table(subset(tree.diameters, 
+                          location == "B120" & dbh > 28, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B120" & dbh > 28, 
+                                select=species)))))
[1] "PIEN"
> #Location "B120"
> names(which(table(subset(tree.diameters, 
+                          location == "B120" & dbh < 8, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B120" & dbh < 8,
+                                select=species)))))
[1] "SALIX"
> 
> names(which(table(subset(tree.diameters, 
+                          location == "B120" & dbh >= 8 | dbh < 18, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B120" & dbh >= 8 | dbh < 18,
+                                select=species)))))
[1] "POTR"
> 
> names(which(table(subset(tree.diameters, 
+                          location == "B120" & dbh >= 18 | dbh < 28, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B120" & dbh >= 18 | dbh < 28,
+                                select=species)))))
[1] "POTR"
> 
> names(which(table(subset(tree.diameters, 
+                          location == "B120" & dbh > 28, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B120" & dbh > 28, 
+                                select=species)))))
[1] "PIEN"
> 
> #Location "B177"
> names(which(table(subset(tree.diameters, 
+                          location == "B177" & dbh < 8, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B177" & dbh < 8,
+                                select=species)))))
[1] "POTR"
> 
> names(which(table(subset(tree.diameters, 
+                          location == "B177" & dbh >= 8 | dbh < 18, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B177" & dbh >= 8 | dbh < 18,
+                                select=species)))))
[1] "POTR"
> 
> names(which(table(subset(tree.diameters, 
+                          location == "B177" & dbh >= 18 | dbh < 28, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B177" & dbh >= 18 | dbh < 28,
+                                select=species)))))
[1] "POTR"
> 
> names(which(table(subset(tree.diameters, 
+                          location == "B177" & dbh > 28, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B177" & dbh > 28, 
+                                select=species)))))
[1] "PSME"

我希望输出类似于这样(除了为每个 class 填充 vol.i)。

location den.sap den.pole den.mat den.lrg vol.sap vol.pole vol.mat vol.lrg
B120     SALIX   POTR     POTR    PIEN    ?       ?        ?       ?
B177     POTR    POTR     POTR    PSME    ?       ?        ?       ?

这个问题问了很多 - 但它有据可查,它提供了一个很好的机会来展示 dplyr and tidyr 包。就这样吧。

首先按大小对树进行分类:

library(dplyr) ; library(tidyr)
tree.diameters <- tree.diameters %>%
  mutate(size = ifelse(dbh <= 8, "sapling",
                        ifelse(dbh <= 18, "pole",
                              ifelse(dbh <= 28, "mature", "large"))))

计算每个 location/species/size 组合的计数和总体积:

treesummary <- tree.diameters %>%
  group_by(location, species, size) %>%
  summarise(vol = sum(basal.area), count = n()) 

最后,select 每个 location/size 组合的最大种类,并将那个长 table 扩展为宽格式。重复体积并将两个结果 tables 连接在一起。

result <- inner_join(
  treesummary %>%
    group_by(location, size) %>%
    arrange(-count) %>%
    slice(1) %>%
    select(-count, -vol) %>%
    spread(size, species) %>%
    setNames(c("location", paste0("den.", names(.)[-1]))),

  treesummary %>%
    group_by(location, size) %>%
    arrange(-vol) %>%
    slice(1) %>%
    select(-count, -vol) %>%
    spread(size, species) %>%
    setNames(c("location", paste0("vol.", names(.)[-1]))),

  by = "location"
)

产量:

result

   location den.large den.mature den.pole den.sapling vol.large vol.mature vol.pole vol.sapling
1      B120      LAOC       LAOC     PIEN        LAOC      LAOC       LAOC     PSME        POTR
2      B177      PSME       PIEN     LAOC        POTR      PSME       PIEN     LAOC        POTR
3      B234      POTR       PIEN     LAOC        POTR      PSME       PIEN     LAOC        POTR
4        B6     SALIX       PIEN     PSME        POTR      PSME       PIEN     PSME        POTR
5       B63     SALIX       PIEN     PIEN        PIEN     SALIX       PIEN     PIEN        PIEN
6      F120      PSME       POTR     POTR        PIEN      PSME       PSME     POTR        PIEN
7      F177      PIEN       POTR    SALIX       SALIX      PIEN       POTR    SALIX       SALIX
8      F234      PIEN       PIEN     LAOC       SALIX      PIEN       PIEN     LAOC       SALIX
9        F6      LAOC      SALIX     PIEN        PSME      PSME      SALIX     PIEN        PSME
10      F63      PIEN      SALIX     POTR        PIEN      PIEN      SALIX     POTR        PIEN