facet_wrap 中各组的平均值和观测值

Question

我阅读了很多关于如何在每个 facet_wrap 中添加标签的帖子，但我希望能在这里得到一些帮助，因为我不得不添加一个带有组均值 (λ) 和数量的标签数据集中的观测值 (k)。我使用了其中的大部分内容，但遇到了以下问题：

将正确的均值和按组的观察次数添加到构面
将平均值四舍五入到两位数
将 'mean' 标签更改为正确的希腊字母 lambda - unicode (u+03BB)

df_dztpois <- subset(df_full, select=c("PIA_ITEM", "PIA_TYPE", "DELY_QTY_WINS", "DELY_QTY_DZTPOIS"))

df_dztpois_summary = df_dztpois %>% group_by(PIA_ITEM) %>%
  summarize(count = length(DELY_QTY_WINS),
            mean = mean(DELY_QTY_WINS)) %>%
  mutate(lab = paste("count = ", count, "\nmean = ", mean))

p <- ggplot(data = df_dztpois, aes(x=DELY_QTY_WINS))
p <- p + geom_line(aes(y=DELY_QTY_DZTPOIS))
p <- p + facet_wrap( ~ PIA_TYPE, nrow=3, scales = "free")
p <- p + xlab("Observation") + ylab("Probability")
p <- p + theme(
  legend.position = "bottom",
  legend.text = element_text(size = 12),
  strip.text.x = element_text(size = 12))
p <- p + labs(title = "Zero-truncated Poisson distribution of order quantity",
              subtitle = "Grouped by product (PIA_TYPE)",
              caption = "Data source: df_dztpois$DELY_QTY_WINS and df_full$DELY_QTY_DZTPOIS")
p <- p + geom_text(data = df_dztpois_summary, aes(label = lab), x=Inf, y=Inf, hjust=1, vjust=1.2)
p

数据集的简要样本：

> dput(df_dztpois[1:10,])
structure(list(PIA_ITEM = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L), .Label = c("RX20 1.5t Type 6209-10", "RX20 1.6t Type 6211", 
"RX20 1.6t Type 6221", "RX20 1.8t Type 6213", "RX20 2.0t Type 6215", 
"RX20 2.0t Type 6225", "RX20 2.0t Type 6230"), class = "factor"), 
    PIA_TYPE = c("6211", "6211", "6211", "6211", "6211", "6211", 
    "6211", "6211", "6211", "6211"), DELY_QTY_WINS = c(75, 62, 
    57, 57, 67, 57, 53, 70, 70, 60), DELY_QTY_DZTPOIS = c(1.09669678480388e-08, 
    3.8744657910606e-05, 0.000448728658516301, 0.000448728658516301, 
    2.24150656988175e-06, 0.000448728658516301, 0.0023318386482722, 
    3.38775858809732e-07, 3.38775858809732e-07, 0.000108487832825478
    )), row.names = c(NA, 10L), class = "data.frame")

> str(df_dztpois)
'data.frame':   959 obs. of  4 variables:
 $ PIA_ITEM        : Factor w/ 7 levels "RX20 1.5t Type 6209-10",..: 2 2 2 2 2 2 2 2 2 2 ...
 $ PIA_TYPE        : chr  "6211" "6211" "6211" "6211" ...
 $ DELY_QTY_WINS   : num  75 62 57 57 67 57 53 70 70 60 ...
 $ DELY_QTY_DZTPOIS: num  1.10e-08 3.87e-05 4.49e-04 4.49e-04 2.24e-06 ...

当前绘图输出：

Answer 1

您的代码存在一个问题：

您按 PIA_TYPE 分面，但摘要 df 按 PIA_ITEM

我不确定这是否是根本原因，因为在您的示例数据中，PIA_TYPE 和 PIA_ITEM 相同，所以我将 PIA_TYPE 更改为 2不同级别。

创建summary df时，可以使用n()计算观察次数，使用round获取正确位数，将lambda表示为\U03BB。

df_dztpois <- structure(list(PIA_ITEM = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 
                                                   2L, 2L, 2L, 2L), .Label = c("RX20 1.5t Type 6209-10", "RX20 1.6t Type 6211", 
                                                                               "RX20 1.6t Type 6221", "RX20 1.8t Type 6213", "RX20 2.0t Type 6215", 
                                                                               "RX20 2.0t Type 6225", "RX20 2.0t Type 6230"), class = "factor"), 
                            PIA_TYPE = c("6211", "6211", "6211", "6211", "6211", "6212", 
                                         "6212", "6212", "6212", "6212"), DELY_QTY_WINS = c(75, 62, 
                                                                                            57, 57, 67, 57, 53, 70, 70, 60), DELY_QTY_DZTPOIS = c(1.09669678480388e-08, 
                                                                                                                                                  3.8744657910606e-05, 0.000448728658516301, 0.000448728658516301, 
                                                                                                                                                  2.24150656988175e-06, 0.000448728658516301, 0.0023318386482722, 
                                                                                                                                                  3.38775858809732e-07, 3.38775858809732e-07, 0.000108487832825478
                                                                                            )), row.names = c(NA, 10L), class = "data.frame")

library(ggplot2)
library(dplyr)

df_dztpois_summary <- df_dztpois %>%
  group_by(PIA_TYPE) %>%
  summarize(count = n(),
            mean = mean(DELY_QTY_WINS)) %>%
  mutate(lab = paste("count = ", count, "\n\U03BB = ", round(mean, digits = 2)))

p <- ggplot(data = df_dztpois, aes(x=DELY_QTY_WINS))
p <- p + geom_line(aes(y=DELY_QTY_DZTPOIS))
p <- p + facet_wrap( ~ PIA_TYPE, nrow=3, scales = "free")
p <- p + xlab("Observation") + ylab("Probability")
p <- p + theme(
  legend.position = "bottom",
  legend.text = element_text(size = 12),
  strip.text.x = element_text(size = 12))
p <- p + labs(title = "Zero-truncated Poisson distribution of order quantity",
              subtitle = "Grouped by product (PIA_TYPE)",
              caption = "Data source: df_dztpois$DELY_QTY_WINS and df_full$DELY_QTY_DZTPOIS")
p <- p + geom_text(data = df_dztpois_summary, aes(label = lab), x=Inf, y=Inf, hjust=1, vjust=1.2)
p

facet_wrap 中各组的平均值和观测值

Mean and observations value by group in facet_wrap

r

ggplot2

facet-wrap

数据集的简要样本：

当前绘图输出：