将紧凑的字母显示结果添加到数据框
Adding compact letter display results to a data frame
我有一些数据显示了对一系列树种的处理效果,我正在对每个树种执行单向 anova
的 RESULT ~ TREATMENT
。使用 dplyr
,我创建了一个按物种分组的处理方式的新数据框,以及标准误差和偏差。我想将压缩字母显示结果添加到同一数据框中。
这里有一些可以玩的虚拟数据
library(dplyr)
library(tidyverse)
library(ggplot2)
# Generate species
species <- rep (c("Oak", "Elm", "Ash"), each = 10)
# Generate treatments
dose_1 <- rep (c("Ctrl"), 30)
dose_2 <- rep (c ("L"), 30)
# Generate results
result_1 <- c((runif(10, 9, 12)), runif(10, 14, 16), runif(10, 6, 8), (runif(10, 2, 5)), runif(10, 1, 4), runif(10, 2, 4))
# Combine into a sinlge dataframe
data <- data.frame (SPECIES = rep(species, 2), TREATMENT = c(dose_1, dose_2), RESULT = result_1)
# Consolidate into a new data frame for ggplot and add errors
dat <- data %>%
group_by(SPECIES, TREATMENT) %>%
summarise(
n=n(),
mean=mean(RESULT),
sd=sd(RESULT)
) %>%
mutate( se=sd/sqrt(n)) %>%
mutate( ic=se * qt((1-0.05)/2 + .5, n-1))
# Plot the results
ggplot(dat, aes(x= reorder(SPECIES, -mean), y = mean, fill = TREATMENT))+
geom_bar(position = 'dodge', stat = 'identity')+
geom_errorbar(aes(ymin = mean-se, ymax = mean+se), position = position_dodge(.9), width = 0.2)
现在我想从Tukey
测试每个物种的处理效果中获得紧凑的字母显示结果,并将其添加到dat
# First perform anova tests for treatment effects for each species
df_aov <- data %>%
dplyr::group_by(SPECIES) %>%
tidyr::nest() %>%
dplyr::mutate(.data = .,
aov_results = data %>% purrr::map(.x = ., .f = ~ summary(aov(RESULT ~ TREATMENT, data = .))))
# Inspect the results
df_aov$aov_results[[1]]
df_aov$aov_results[[2]]
df_aov$aov_results[[3]]
此时我只能对每个物种的anova
结果进行Tukey
测试,像这样
# Tukey's test
tukey <- TukeyHSD(df_aov$aov_results[[1]])
# compact letter display
cld <- multcompLetters4(df_aov$aov_results[[1]], tukey)
我希望能够对所有物种执行 Tukey
批量测试,并将紧凑的字母显示结果添加到 dat
data frame
我正在使用 ggplot
。 dat
最后应该看起来像这样
dat
# A tibble: 6 x 8
# Groups: SPECIES [3]
SPECIES TREATMENT n mean sd se ic cld
<chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <chr>
1 Ash Ctrl 10 7.17 0.556 0.176 0.398 a
2 Ash L 10 2.78 0.454 0.143 0.324 b
3 Elm Ctrl 10 15.0 0.653 0.206 0.467 a
4 Elm L 10 2.52 0.468 0.148 0.335 b
5 Oak Ctrl 10 10.5 1.13 0.357 0.808 a
6 Oak L 10 3.66 0.895 0.283 0.640 b
然后我想像这样在ggplot
中添加一行代码,这样我就可以在误差线上方的每个物种中添加紧凑的字母显示结果。
[first part of the plot code] +
geom_text(aes(label = cld, y = mean + se), vjust = -0.5, position = position_dodge(0.9),size = 3)
从您示例的数据帧 data 开始,您可以执行以下操作:
- 创建具有描述性统计数据的数据框(如您的示例所示)
library(multcompView)
library(dplyr)
df_summary <-
data %>%
group_by(SPECIES, TREATMENT) %>%
summarise(n = n(),
sd = sd(RESULT, na.rm = TRUE),
mean=mean(RESULT)
) %>%
mutate(se = sd/sqrt(n),
ic = se * qt((1-0.05)/2 + .5, n-1)
)
- 使用 Tukey 统计数据创建数据框(注意
rowwise
的使用):
df_tukey <-
data %>%
group_by(SPECIES) %>%
nest %>%
rowwise %>% ## !important
mutate(aov_result = list(aov(RESULT ~ TREATMENT, data = data)),
tukey = list(TukeyHSD(aov_result)),
cld = list(multcompLetters4(aov_result, tukey)$TREATMENT$Letters )
) %>%
unnest(cld) %>%
select(SPECIES, LETTER = cld) %>%
mutate(TREATMENT = names(LETTER))
- 加入两者:
df_summary %>%
left_join(df_tukey)
使用 emmeans
and multcomp
您可以很容易地获得 CLD 并通过嵌套和取消嵌套数据框的组合实现您正在寻找的内容:
library(dplyr)
library(ggplot2)
library(emmeans)
library(multcomp)
library(tidyr)
# Generate species
species <- rep (c("Oak", "Elm", "Ash"), each = 10)
# Generate treatments
dose_1 <- rep (c("Ctrl"), 30)
dose_2 <- rep (c ("L"), 30)
# Generate results
result_1 <- c((runif(10, 9, 12)), runif(10, 14, 16), runif(10, 6, 8), (runif(10, 2, 5)), runif(10, 1, 4), runif(10, 2, 4))
# Combine into a sinlge dataframe
data <- data.frame (species = rep(species, 2), treatment = c(dose_1, dose_2), result = result_1)
df_aov <- data %>%
dplyr::group_by(species) %>%
tidyr::nest() %>%
rowwise() %>%
dplyr::mutate(aov_results = list(aov(result ~ treatment, data = data)),
emm = list(emmeans::emmeans(aov_results, "treatment", type= "response")),
cld = list(multcomp::cld(emm, Letters = LETTERS, reverse = TRUE))) %>%
dplyr::select(-data, -aov_results, -emm) %>%
unnest(cols = c(cld)) %>%
dplyr::mutate(cld = trimws(.group)) %>%
dplyr::select(-.group)
ggplot(df_aov, aes(x= reorder(species, -emmean), y = emmean, fill = treatment))+
geom_bar(position = 'dodge', stat = 'identity')+
geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), position = position_dodge(.9), width = 0.2) +
geom_text(aes(label = cld, y = upper.CL), vjust = -0.5, position = position_dodge(0.9),size = 3)
我有一些数据显示了对一系列树种的处理效果,我正在对每个树种执行单向 anova
的 RESULT ~ TREATMENT
。使用 dplyr
,我创建了一个按物种分组的处理方式的新数据框,以及标准误差和偏差。我想将压缩字母显示结果添加到同一数据框中。
这里有一些可以玩的虚拟数据
library(dplyr)
library(tidyverse)
library(ggplot2)
# Generate species
species <- rep (c("Oak", "Elm", "Ash"), each = 10)
# Generate treatments
dose_1 <- rep (c("Ctrl"), 30)
dose_2 <- rep (c ("L"), 30)
# Generate results
result_1 <- c((runif(10, 9, 12)), runif(10, 14, 16), runif(10, 6, 8), (runif(10, 2, 5)), runif(10, 1, 4), runif(10, 2, 4))
# Combine into a sinlge dataframe
data <- data.frame (SPECIES = rep(species, 2), TREATMENT = c(dose_1, dose_2), RESULT = result_1)
# Consolidate into a new data frame for ggplot and add errors
dat <- data %>%
group_by(SPECIES, TREATMENT) %>%
summarise(
n=n(),
mean=mean(RESULT),
sd=sd(RESULT)
) %>%
mutate( se=sd/sqrt(n)) %>%
mutate( ic=se * qt((1-0.05)/2 + .5, n-1))
# Plot the results
ggplot(dat, aes(x= reorder(SPECIES, -mean), y = mean, fill = TREATMENT))+
geom_bar(position = 'dodge', stat = 'identity')+
geom_errorbar(aes(ymin = mean-se, ymax = mean+se), position = position_dodge(.9), width = 0.2)
现在我想从Tukey
测试每个物种的处理效果中获得紧凑的字母显示结果,并将其添加到dat
# First perform anova tests for treatment effects for each species
df_aov <- data %>%
dplyr::group_by(SPECIES) %>%
tidyr::nest() %>%
dplyr::mutate(.data = .,
aov_results = data %>% purrr::map(.x = ., .f = ~ summary(aov(RESULT ~ TREATMENT, data = .))))
# Inspect the results
df_aov$aov_results[[1]]
df_aov$aov_results[[2]]
df_aov$aov_results[[3]]
此时我只能对每个物种的anova
结果进行Tukey
测试,像这样
# Tukey's test
tukey <- TukeyHSD(df_aov$aov_results[[1]])
# compact letter display
cld <- multcompLetters4(df_aov$aov_results[[1]], tukey)
我希望能够对所有物种执行 Tukey
批量测试,并将紧凑的字母显示结果添加到 dat
data frame
我正在使用 ggplot
。 dat
最后应该看起来像这样
dat
# A tibble: 6 x 8
# Groups: SPECIES [3]
SPECIES TREATMENT n mean sd se ic cld
<chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <chr>
1 Ash Ctrl 10 7.17 0.556 0.176 0.398 a
2 Ash L 10 2.78 0.454 0.143 0.324 b
3 Elm Ctrl 10 15.0 0.653 0.206 0.467 a
4 Elm L 10 2.52 0.468 0.148 0.335 b
5 Oak Ctrl 10 10.5 1.13 0.357 0.808 a
6 Oak L 10 3.66 0.895 0.283 0.640 b
然后我想像这样在ggplot
中添加一行代码,这样我就可以在误差线上方的每个物种中添加紧凑的字母显示结果。
[first part of the plot code] +
geom_text(aes(label = cld, y = mean + se), vjust = -0.5, position = position_dodge(0.9),size = 3)
从您示例的数据帧 data 开始,您可以执行以下操作:
- 创建具有描述性统计数据的数据框(如您的示例所示)
library(multcompView)
library(dplyr)
df_summary <-
data %>%
group_by(SPECIES, TREATMENT) %>%
summarise(n = n(),
sd = sd(RESULT, na.rm = TRUE),
mean=mean(RESULT)
) %>%
mutate(se = sd/sqrt(n),
ic = se * qt((1-0.05)/2 + .5, n-1)
)
- 使用 Tukey 统计数据创建数据框(注意
rowwise
的使用):
df_tukey <-
data %>%
group_by(SPECIES) %>%
nest %>%
rowwise %>% ## !important
mutate(aov_result = list(aov(RESULT ~ TREATMENT, data = data)),
tukey = list(TukeyHSD(aov_result)),
cld = list(multcompLetters4(aov_result, tukey)$TREATMENT$Letters )
) %>%
unnest(cld) %>%
select(SPECIES, LETTER = cld) %>%
mutate(TREATMENT = names(LETTER))
- 加入两者:
df_summary %>%
left_join(df_tukey)
使用 emmeans
and multcomp
您可以很容易地获得 CLD 并通过嵌套和取消嵌套数据框的组合实现您正在寻找的内容:
library(dplyr)
library(ggplot2)
library(emmeans)
library(multcomp)
library(tidyr)
# Generate species
species <- rep (c("Oak", "Elm", "Ash"), each = 10)
# Generate treatments
dose_1 <- rep (c("Ctrl"), 30)
dose_2 <- rep (c ("L"), 30)
# Generate results
result_1 <- c((runif(10, 9, 12)), runif(10, 14, 16), runif(10, 6, 8), (runif(10, 2, 5)), runif(10, 1, 4), runif(10, 2, 4))
# Combine into a sinlge dataframe
data <- data.frame (species = rep(species, 2), treatment = c(dose_1, dose_2), result = result_1)
df_aov <- data %>%
dplyr::group_by(species) %>%
tidyr::nest() %>%
rowwise() %>%
dplyr::mutate(aov_results = list(aov(result ~ treatment, data = data)),
emm = list(emmeans::emmeans(aov_results, "treatment", type= "response")),
cld = list(multcomp::cld(emm, Letters = LETTERS, reverse = TRUE))) %>%
dplyr::select(-data, -aov_results, -emm) %>%
unnest(cols = c(cld)) %>%
dplyr::mutate(cld = trimws(.group)) %>%
dplyr::select(-.group)
ggplot(df_aov, aes(x= reorder(species, -emmean), y = emmean, fill = treatment))+
geom_bar(position = 'dodge', stat = 'identity')+
geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), position = position_dodge(.9), width = 0.2) +
geom_text(aes(label = cld, y = upper.CL), vjust = -0.5, position = position_dodge(0.9),size = 3)