为 ggplot barplot 复制了错误栏
Errorbar duplicated for ggplot barplot
我是 ggplot 的新手,在条形图中绘制错误栏时遇到问题。
一个最小的工作示例如下所示:
abun_all <- data.frame("Tree.genus" = c(rep("Acer", 5), rep("Betula", 5), rep("Larix", 5), rep("Picea", 5), rep("Pinus", 5), rep("Quercus", 5)),
"P.sampled" = c(sample(c(seq(from = 0.001, to = 0.06, by = 0.0005)), 30)),
"Insects.sampled" = c(sample(c(seq(from = 1.667, to = 533, by = 1.335)), 30)),
"Category" = as.factor(c(sample(c(seq(from = 1, to = 3, by = 1)), 30, replace = T))),
"P.sampled_mean" = c(sample(c(seq(from = 0.006, to = 0.178, by = 0.0005)), 30)),
"P.sampled_sd" = c(sample(c(seq(from = 0.004, to = 0.2137, by = 0.0005)), 30)))
ggplot(data = abun_all, aes(x = as.factor(Tree.genus), y = P.sampled , fill = Category)) +
geom_bar(stat = "identity", position = position_dodge(1)) +
geom_errorbar(aes(ymin = P.sampled - (P.sampled_mean+P.sampled_sd), ymax = P.sampled + (P.sampled_mean+P.sampled_sd)), width = 0.1, position = position_dodge(1)) + scale_fill_discrete(name = "Category",
breaks = c(1, 2, 3),
labels = c("NrAm in SSM", "NrAm in FR", "Eurp in FR")) +
xlab("Genus") + ylab("No. of Focus sp. per total insect abundance")
注意:这些值只是随机的,并不代表实际数据,但应该足以证明问题!
问题似乎是为每个类别的每个 Tree.genus 的整体数量绘制了误差线。我怎样才能让它工作?
编辑:我用每个 P.sampled 组合的最大值手动创建了另一个 Df,现在绘图看起来像我想要的那样(除了两个缺失的错误栏)。
abun_plot <- data.frame("Tree.genus" = rep(genera, each = 3),
"P.sampled" = c(0.400000000, 0.100000000, 0.500000000, 0.200000000, 0.100000000, 0.042857143, 0.016666667, 0.0285714286, 0.0222222222, 0.020000000, 0, 0.010000000, 0.060000000, 0.025000000, 0.040000000, 0.250000000, 0.150000000, 0.600000000),
"Category" = as.factor(rep(c(1,2,3), 3)),
"P.sampled_SD" = as.numeric(c(0.08493057, 0.02804758, 0.19476489, 0.04533747, 0.02447665, 0.01308939, 0.004200168, "NA", 0.015356359, 0.005724859, "NA", "NA", 0.01633612, 0.01013794, 0.02045931, 0.07584737, 0.05760980, 0.21374053)),
"P.sampled_Mean" = as.numeric(c(0.07837134, 0.05133333, 0.14089286, 0.04537983, 0.02686200, 0.01680721, 0.005833333, 0.028571429, 0.011363636, 0.01101331, "NA", 0.01000000, 0.02162986, 0.01333333, 0.01668582, 0.08705221, 0.04733333, 0.17870370)))
ggplot(data = abun_plot, aes(x = as.factor(Tree.genus), y = P.sampled , fill = Category)) +
geom_bar(stat = "identity", position = position_dodge(1)) +
geom_errorbar(aes(ymin = P.sampled - P.sampled_SD, ymax = P.sampled + P.sampled_SD), width = 0.1, position = position_dodge(1)) +
scale_fill_discrete(name = "Category",
breaks = c(1, 2, 3),
labels = c("NrAm in SSM", "NrAm in FR", "Eurp in FR")) +
xlab("Genus") + ylab("No. of Focus sp. per total insect abundance")
由于手动执行此操作需要花费大量时间,而且其他几个绘图也存在同样的问题,因此我更愿意使用原始 df (abun_all)。我可以只在 ggplot()
函数中对我的 df 进行子集化以获得所需的输出吗?
由于您只想显示每个属和类别组合的最大值,您可以使用几个 dplyr
函数(在 tidyverse
和 ggplot2
中)来按属和类别分组,然后为每个取最高值。这样,您就不会像在第二个街区中那样手动构建 abun_plot
。
library(dplyr)
library(ggplot2)
abun_plot <- abun_all %>%
group_by(Tree.genus, Category) %>%
top_n(1, P.sampled_mean)
head(abun_plot)
#> # A tibble: 6 x 6
#> # Groups: Tree.genus, Category [6]
#> Tree.genus P.sampled Insects.sampled Category P.sampled_mean P.sampled_sd
#> <fct> <dbl> <dbl> <fct> <dbl> <dbl>
#> 1 Acer 0.041 295. 3 0.0125 0.044
#> 2 Acer 0.044 81.8 1 0.166 0.037
#> 3 Acer 0.0085 379. 2 0.155 0.134
#> 4 Betula 0.0505 183. 2 0.170 0.0805
#> 5 Betula 0.0325 61.7 3 0.0405 0.0995
#> 6 Betula 0.0465 326. 1 0.0985 0.188
之后,绘图如您最初预期的那样工作:
ggplot(data = abun_plot, aes(x = as.factor(Tree.genus), y = P.sampled , fill = Category)) +
geom_col(position = position_dodge(1)) +
geom_errorbar(aes(ymin = P.sampled - P.sampled_sd, ymax = P.sampled + P.sampled_sd), width = 0.1, position = position_dodge(1)) +
scale_fill_discrete(name = "Category",
breaks = c(1, 2, 3),
labels = c("NrAm in SSM", "NrAm in FR", "Eurp in FR")) +
xlab("Genus") + ylab("No. of Focus sp. per total insect abundance")
还值得注意的是,在 ggplot2
之后的几个版本中,您可以使用 geom_col()
代替 geom_bar(stat = "identity")
。
由 reprex package (v0.2.1)
创建于 2018-10-03
我是 ggplot 的新手,在条形图中绘制错误栏时遇到问题。 一个最小的工作示例如下所示:
abun_all <- data.frame("Tree.genus" = c(rep("Acer", 5), rep("Betula", 5), rep("Larix", 5), rep("Picea", 5), rep("Pinus", 5), rep("Quercus", 5)),
"P.sampled" = c(sample(c(seq(from = 0.001, to = 0.06, by = 0.0005)), 30)),
"Insects.sampled" = c(sample(c(seq(from = 1.667, to = 533, by = 1.335)), 30)),
"Category" = as.factor(c(sample(c(seq(from = 1, to = 3, by = 1)), 30, replace = T))),
"P.sampled_mean" = c(sample(c(seq(from = 0.006, to = 0.178, by = 0.0005)), 30)),
"P.sampled_sd" = c(sample(c(seq(from = 0.004, to = 0.2137, by = 0.0005)), 30)))
ggplot(data = abun_all, aes(x = as.factor(Tree.genus), y = P.sampled , fill = Category)) +
geom_bar(stat = "identity", position = position_dodge(1)) +
geom_errorbar(aes(ymin = P.sampled - (P.sampled_mean+P.sampled_sd), ymax = P.sampled + (P.sampled_mean+P.sampled_sd)), width = 0.1, position = position_dodge(1)) + scale_fill_discrete(name = "Category",
breaks = c(1, 2, 3),
labels = c("NrAm in SSM", "NrAm in FR", "Eurp in FR")) +
xlab("Genus") + ylab("No. of Focus sp. per total insect abundance")
注意:这些值只是随机的,并不代表实际数据,但应该足以证明问题!
问题似乎是为每个类别的每个 Tree.genus 的整体数量绘制了误差线。我怎样才能让它工作?
编辑:我用每个 P.sampled 组合的最大值手动创建了另一个 Df,现在绘图看起来像我想要的那样(除了两个缺失的错误栏)。
abun_plot <- data.frame("Tree.genus" = rep(genera, each = 3),
"P.sampled" = c(0.400000000, 0.100000000, 0.500000000, 0.200000000, 0.100000000, 0.042857143, 0.016666667, 0.0285714286, 0.0222222222, 0.020000000, 0, 0.010000000, 0.060000000, 0.025000000, 0.040000000, 0.250000000, 0.150000000, 0.600000000),
"Category" = as.factor(rep(c(1,2,3), 3)),
"P.sampled_SD" = as.numeric(c(0.08493057, 0.02804758, 0.19476489, 0.04533747, 0.02447665, 0.01308939, 0.004200168, "NA", 0.015356359, 0.005724859, "NA", "NA", 0.01633612, 0.01013794, 0.02045931, 0.07584737, 0.05760980, 0.21374053)),
"P.sampled_Mean" = as.numeric(c(0.07837134, 0.05133333, 0.14089286, 0.04537983, 0.02686200, 0.01680721, 0.005833333, 0.028571429, 0.011363636, 0.01101331, "NA", 0.01000000, 0.02162986, 0.01333333, 0.01668582, 0.08705221, 0.04733333, 0.17870370)))
ggplot(data = abun_plot, aes(x = as.factor(Tree.genus), y = P.sampled , fill = Category)) +
geom_bar(stat = "identity", position = position_dodge(1)) +
geom_errorbar(aes(ymin = P.sampled - P.sampled_SD, ymax = P.sampled + P.sampled_SD), width = 0.1, position = position_dodge(1)) +
scale_fill_discrete(name = "Category",
breaks = c(1, 2, 3),
labels = c("NrAm in SSM", "NrAm in FR", "Eurp in FR")) +
xlab("Genus") + ylab("No. of Focus sp. per total insect abundance")
由于手动执行此操作需要花费大量时间,而且其他几个绘图也存在同样的问题,因此我更愿意使用原始 df (abun_all)。我可以只在 ggplot()
函数中对我的 df 进行子集化以获得所需的输出吗?
由于您只想显示每个属和类别组合的最大值,您可以使用几个 dplyr
函数(在 tidyverse
和 ggplot2
中)来按属和类别分组,然后为每个取最高值。这样,您就不会像在第二个街区中那样手动构建 abun_plot
。
library(dplyr)
library(ggplot2)
abun_plot <- abun_all %>%
group_by(Tree.genus, Category) %>%
top_n(1, P.sampled_mean)
head(abun_plot)
#> # A tibble: 6 x 6
#> # Groups: Tree.genus, Category [6]
#> Tree.genus P.sampled Insects.sampled Category P.sampled_mean P.sampled_sd
#> <fct> <dbl> <dbl> <fct> <dbl> <dbl>
#> 1 Acer 0.041 295. 3 0.0125 0.044
#> 2 Acer 0.044 81.8 1 0.166 0.037
#> 3 Acer 0.0085 379. 2 0.155 0.134
#> 4 Betula 0.0505 183. 2 0.170 0.0805
#> 5 Betula 0.0325 61.7 3 0.0405 0.0995
#> 6 Betula 0.0465 326. 1 0.0985 0.188
之后,绘图如您最初预期的那样工作:
ggplot(data = abun_plot, aes(x = as.factor(Tree.genus), y = P.sampled , fill = Category)) +
geom_col(position = position_dodge(1)) +
geom_errorbar(aes(ymin = P.sampled - P.sampled_sd, ymax = P.sampled + P.sampled_sd), width = 0.1, position = position_dodge(1)) +
scale_fill_discrete(name = "Category",
breaks = c(1, 2, 3),
labels = c("NrAm in SSM", "NrAm in FR", "Eurp in FR")) +
xlab("Genus") + ylab("No. of Focus sp. per total insect abundance")
还值得注意的是,在 ggplot2
之后的几个版本中,您可以使用 geom_col()
代替 geom_bar(stat = "identity")
。
由 reprex package (v0.2.1)
创建于 2018-10-03