将紧凑的字母显示结果添加到数据框

Question

我有一些数据显示了对一系列树种的处理效果，我正在对每个树种执行单向 anova 的 RESULT ~ TREATMENT。使用 dplyr，我创建了一个按物种分组的处理方式的新数据框，以及标准误差和偏差。我想将压缩字母显示结果添加到同一数据框中。

这里有一些可以玩的虚拟数据

library(dplyr)
library(tidyverse)
library(ggplot2)

# Generate species 
species <- rep (c("Oak", "Elm", "Ash"), each = 10)

# Generate treatments
dose_1 <- rep (c("Ctrl"), 30)
dose_2 <- rep (c ("L"), 30)

# Generate results
result_1 <- c((runif(10, 9, 12)), runif(10, 14, 16), runif(10, 6, 8), (runif(10, 2, 5)), runif(10, 1, 4), runif(10, 2, 4))


# Combine into a sinlge dataframe
  data <- data.frame (SPECIES = rep(species, 2), TREATMENT = c(dose_1, dose_2), RESULT = result_1)

# Consolidate into a new data frame for ggplot and add errors
dat <- data %>%
  group_by(SPECIES, TREATMENT) %>%
  summarise( 
    n=n(),
    mean=mean(RESULT),
    sd=sd(RESULT)
  ) %>%
  mutate( se=sd/sqrt(n))  %>%
  mutate( ic=se * qt((1-0.05)/2 + .5, n-1))

# Plot the results
ggplot(dat, aes(x= reorder(SPECIES, -mean), y = mean, fill = TREATMENT))+
  geom_bar(position = 'dodge', stat = 'identity')+
  geom_errorbar(aes(ymin = mean-se, ymax = mean+se), position = position_dodge(.9), width = 0.2)

现在我想从Tukey测试每个物种的处理效果中获得紧凑的字母显示结果，并将其添加到dat

# First perform anova tests for treatment effects for each species
df_aov <- data %>%
  dplyr::group_by(SPECIES) %>%
  tidyr::nest() %>%
  dplyr::mutate(.data = .,
                aov_results = data %>% purrr::map(.x = ., .f = ~ summary(aov(RESULT ~ TREATMENT, data = .))))

# Inspect the results
df_aov$aov_results[[1]]
df_aov$aov_results[[2]]
df_aov$aov_results[[3]]

此时我只能对每个物种的anova结果进行Tukey测试，像这样

# Tukey's test
tukey <- TukeyHSD(df_aov$aov_results[[1]])

# compact letter display
cld <- multcompLetters4(df_aov$aov_results[[1]], tukey)

我希望能够对所有物种执行 Tukey 批量测试，并将紧凑的字母显示结果添加到 dat data frame我正在使用 ggplot。 dat 最后应该看起来像这样

dat

# A tibble: 6 x 8
# Groups:   SPECIES [3]
  SPECIES TREATMENT     n  mean    sd    se    ic   cld
  <chr>   <chr>     <int> <dbl> <dbl> <dbl> <dbl>  <chr>
1 Ash     Ctrl         10  7.17 0.556 0.176 0.398    a
2 Ash     L            10  2.78 0.454 0.143 0.324    b
3 Elm     Ctrl         10 15.0  0.653 0.206 0.467    a
4 Elm     L            10  2.52 0.468 0.148 0.335    b
5 Oak     Ctrl         10 10.5  1.13  0.357 0.808    a
6 Oak     L            10  3.66 0.895 0.283 0.640    b

然后我想像这样在ggplot中添加一行代码，这样我就可以在误差线上方的每个物种中添加紧凑的字母显示结果。

 [first part of the plot code] + 
        geom_text(aes(label = cld, y = mean + se), vjust = -0.5, position = position_dodge(0.9),size = 3)

Answer 1

从您示例的数据帧 data 开始，您可以执行以下操作：

创建具有描述性统计数据的数据框（如您的示例所示）

library(multcompView)
library(dplyr)

df_summary <-
  data %>%
  group_by(SPECIES, TREATMENT) %>%
  summarise(n = n(),
            sd = sd(RESULT, na.rm = TRUE),
            mean=mean(RESULT)
            ) %>%
  mutate(se = sd/sqrt(n),
         ic = se * qt((1-0.05)/2 + .5, n-1)
         )

使用 Tukey 统计数据创建数据框（注意 rowwise 的使用）：

df_tukey <- 
  data %>%
  group_by(SPECIES) %>%
  nest %>%
  rowwise %>% ## !important
  mutate(aov_result = list(aov(RESULT ~ TREATMENT, data = data)),
         tukey = list(TukeyHSD(aov_result)),
         cld = list(multcompLetters4(aov_result, tukey)$TREATMENT$Letters )
         ) %>%
  unnest(cld) %>%
  select(SPECIES, LETTER = cld) %>%
  mutate(TREATMENT = names(LETTER))

加入两者：

df_summary %>%
  left_join(df_tukey)

Answer 2

使用 emmeans and multcomp 您可以很容易地获得 CLD 并通过嵌套和取消嵌套数据框的组合实现您正在寻找的内容：

library(dplyr)
library(ggplot2)
library(emmeans)
library(multcomp)
library(tidyr)

# Generate species 
species <- rep (c("Oak", "Elm", "Ash"), each = 10)

# Generate treatments
dose_1 <- rep (c("Ctrl"), 30)
dose_2 <- rep (c ("L"), 30)

# Generate results
result_1 <- c((runif(10, 9, 12)), runif(10, 14, 16), runif(10, 6, 8), (runif(10, 2, 5)), runif(10, 1, 4), runif(10, 2, 4))


# Combine into a sinlge dataframe
data <- data.frame (species = rep(species, 2), treatment = c(dose_1, dose_2), result = result_1)

df_aov <- data %>%
  dplyr::group_by(species) %>%
  tidyr::nest() %>%
  rowwise() %>% 
  dplyr::mutate(aov_results = list(aov(result ~ treatment, data = data)), 
         emm = list(emmeans::emmeans(aov_results, "treatment", type= "response")), 
         cld = list(multcomp::cld(emm, Letters = LETTERS, reverse = TRUE))) %>% 
  dplyr::select(-data, -aov_results, -emm) %>% 
  unnest(cols = c(cld)) %>%
  dplyr::mutate(cld = trimws(.group)) %>% 
  dplyr::select(-.group)
  
 ggplot(df_aov, aes(x= reorder(species, -emmean), y = emmean, fill = treatment))+
  geom_bar(position = 'dodge', stat = 'identity')+
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), position = position_dodge(.9), width = 0.2) + 
  geom_text(aes(label = cld, y = upper.CL), vjust = -0.5, position = position_dodge(0.9),size = 3)

将紧凑的字母显示结果添加到数据框

Adding compact letter display results to a data frame

r

anova

dplyr

tukey

tidyverse