facet_wrap 中各组的平均值和观测值
Mean and observations value by group in facet_wrap
我阅读了很多关于如何在每个 facet_wrap
中添加标签的帖子,但我希望能在这里得到一些帮助,因为我不得不添加一个带有组均值 (λ) 和数量的标签数据集中的观测值 (k)。我使用了其中的大部分内容 ,但遇到了以下问题:
- 将正确的均值和按组的观察次数添加到构面
- 将平均值四舍五入到两位数
- 将 'mean' 标签更改为正确的希腊字母 lambda - unicode (u+03BB)
df_dztpois <- subset(df_full, select=c("PIA_ITEM", "PIA_TYPE", "DELY_QTY_WINS", "DELY_QTY_DZTPOIS"))
df_dztpois_summary = df_dztpois %>% group_by(PIA_ITEM) %>%
summarize(count = length(DELY_QTY_WINS),
mean = mean(DELY_QTY_WINS)) %>%
mutate(lab = paste("count = ", count, "\nmean = ", mean))
p <- ggplot(data = df_dztpois, aes(x=DELY_QTY_WINS))
p <- p + geom_line(aes(y=DELY_QTY_DZTPOIS))
p <- p + facet_wrap( ~ PIA_TYPE, nrow=3, scales = "free")
p <- p + xlab("Observation") + ylab("Probability")
p <- p + theme(
legend.position = "bottom",
legend.text = element_text(size = 12),
strip.text.x = element_text(size = 12))
p <- p + labs(title = "Zero-truncated Poisson distribution of order quantity",
subtitle = "Grouped by product (PIA_TYPE)",
caption = "Data source: df_dztpois$DELY_QTY_WINS and df_full$DELY_QTY_DZTPOIS")
p <- p + geom_text(data = df_dztpois_summary, aes(label = lab), x=Inf, y=Inf, hjust=1, vjust=1.2)
p
数据集的简要样本:
> dput(df_dztpois[1:10,])
structure(list(PIA_ITEM = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L), .Label = c("RX20 1.5t Type 6209-10", "RX20 1.6t Type 6211",
"RX20 1.6t Type 6221", "RX20 1.8t Type 6213", "RX20 2.0t Type 6215",
"RX20 2.0t Type 6225", "RX20 2.0t Type 6230"), class = "factor"),
PIA_TYPE = c("6211", "6211", "6211", "6211", "6211", "6211",
"6211", "6211", "6211", "6211"), DELY_QTY_WINS = c(75, 62,
57, 57, 67, 57, 53, 70, 70, 60), DELY_QTY_DZTPOIS = c(1.09669678480388e-08,
3.8744657910606e-05, 0.000448728658516301, 0.000448728658516301,
2.24150656988175e-06, 0.000448728658516301, 0.0023318386482722,
3.38775858809732e-07, 3.38775858809732e-07, 0.000108487832825478
)), row.names = c(NA, 10L), class = "data.frame")
> str(df_dztpois)
'data.frame': 959 obs. of 4 variables:
$ PIA_ITEM : Factor w/ 7 levels "RX20 1.5t Type 6209-10",..: 2 2 2 2 2 2 2 2 2 2 ...
$ PIA_TYPE : chr "6211" "6211" "6211" "6211" ...
$ DELY_QTY_WINS : num 75 62 57 57 67 57 53 70 70 60 ...
$ DELY_QTY_DZTPOIS: num 1.10e-08 3.87e-05 4.49e-04 4.49e-04 2.24e-06 ...
当前绘图输出:
您的代码存在一个问题:
- 您按
PIA_TYPE
分面,但摘要 df 按 PIA_ITEM
分组
我不确定这是否是根本原因,因为在您的示例数据中,PIA_TYPE
和 PIA_ITEM
相同,所以我将 PIA_TYPE
更改为 2不同级别。
创建summary df时,可以使用n()
计算观察次数,使用round
获取正确位数,将lambda表示为\U03BB
。
df_dztpois <- structure(list(PIA_ITEM = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L), .Label = c("RX20 1.5t Type 6209-10", "RX20 1.6t Type 6211",
"RX20 1.6t Type 6221", "RX20 1.8t Type 6213", "RX20 2.0t Type 6215",
"RX20 2.0t Type 6225", "RX20 2.0t Type 6230"), class = "factor"),
PIA_TYPE = c("6211", "6211", "6211", "6211", "6211", "6212",
"6212", "6212", "6212", "6212"), DELY_QTY_WINS = c(75, 62,
57, 57, 67, 57, 53, 70, 70, 60), DELY_QTY_DZTPOIS = c(1.09669678480388e-08,
3.8744657910606e-05, 0.000448728658516301, 0.000448728658516301,
2.24150656988175e-06, 0.000448728658516301, 0.0023318386482722,
3.38775858809732e-07, 3.38775858809732e-07, 0.000108487832825478
)), row.names = c(NA, 10L), class = "data.frame")
library(ggplot2)
library(dplyr)
df_dztpois_summary <- df_dztpois %>%
group_by(PIA_TYPE) %>%
summarize(count = n(),
mean = mean(DELY_QTY_WINS)) %>%
mutate(lab = paste("count = ", count, "\n\U03BB = ", round(mean, digits = 2)))
p <- ggplot(data = df_dztpois, aes(x=DELY_QTY_WINS))
p <- p + geom_line(aes(y=DELY_QTY_DZTPOIS))
p <- p + facet_wrap( ~ PIA_TYPE, nrow=3, scales = "free")
p <- p + xlab("Observation") + ylab("Probability")
p <- p + theme(
legend.position = "bottom",
legend.text = element_text(size = 12),
strip.text.x = element_text(size = 12))
p <- p + labs(title = "Zero-truncated Poisson distribution of order quantity",
subtitle = "Grouped by product (PIA_TYPE)",
caption = "Data source: df_dztpois$DELY_QTY_WINS and df_full$DELY_QTY_DZTPOIS")
p <- p + geom_text(data = df_dztpois_summary, aes(label = lab), x=Inf, y=Inf, hjust=1, vjust=1.2)
p
我阅读了很多关于如何在每个 facet_wrap
中添加标签的帖子,但我希望能在这里得到一些帮助,因为我不得不添加一个带有组均值 (λ) 和数量的标签数据集中的观测值 (k)。我使用了其中的大部分内容
- 将正确的均值和按组的观察次数添加到构面
- 将平均值四舍五入到两位数
- 将 'mean' 标签更改为正确的希腊字母 lambda - unicode (u+03BB)
df_dztpois <- subset(df_full, select=c("PIA_ITEM", "PIA_TYPE", "DELY_QTY_WINS", "DELY_QTY_DZTPOIS"))
df_dztpois_summary = df_dztpois %>% group_by(PIA_ITEM) %>%
summarize(count = length(DELY_QTY_WINS),
mean = mean(DELY_QTY_WINS)) %>%
mutate(lab = paste("count = ", count, "\nmean = ", mean))
p <- ggplot(data = df_dztpois, aes(x=DELY_QTY_WINS))
p <- p + geom_line(aes(y=DELY_QTY_DZTPOIS))
p <- p + facet_wrap( ~ PIA_TYPE, nrow=3, scales = "free")
p <- p + xlab("Observation") + ylab("Probability")
p <- p + theme(
legend.position = "bottom",
legend.text = element_text(size = 12),
strip.text.x = element_text(size = 12))
p <- p + labs(title = "Zero-truncated Poisson distribution of order quantity",
subtitle = "Grouped by product (PIA_TYPE)",
caption = "Data source: df_dztpois$DELY_QTY_WINS and df_full$DELY_QTY_DZTPOIS")
p <- p + geom_text(data = df_dztpois_summary, aes(label = lab), x=Inf, y=Inf, hjust=1, vjust=1.2)
p
数据集的简要样本:
> dput(df_dztpois[1:10,])
structure(list(PIA_ITEM = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L), .Label = c("RX20 1.5t Type 6209-10", "RX20 1.6t Type 6211",
"RX20 1.6t Type 6221", "RX20 1.8t Type 6213", "RX20 2.0t Type 6215",
"RX20 2.0t Type 6225", "RX20 2.0t Type 6230"), class = "factor"),
PIA_TYPE = c("6211", "6211", "6211", "6211", "6211", "6211",
"6211", "6211", "6211", "6211"), DELY_QTY_WINS = c(75, 62,
57, 57, 67, 57, 53, 70, 70, 60), DELY_QTY_DZTPOIS = c(1.09669678480388e-08,
3.8744657910606e-05, 0.000448728658516301, 0.000448728658516301,
2.24150656988175e-06, 0.000448728658516301, 0.0023318386482722,
3.38775858809732e-07, 3.38775858809732e-07, 0.000108487832825478
)), row.names = c(NA, 10L), class = "data.frame")
> str(df_dztpois)
'data.frame': 959 obs. of 4 variables:
$ PIA_ITEM : Factor w/ 7 levels "RX20 1.5t Type 6209-10",..: 2 2 2 2 2 2 2 2 2 2 ...
$ PIA_TYPE : chr "6211" "6211" "6211" "6211" ...
$ DELY_QTY_WINS : num 75 62 57 57 67 57 53 70 70 60 ...
$ DELY_QTY_DZTPOIS: num 1.10e-08 3.87e-05 4.49e-04 4.49e-04 2.24e-06 ...
当前绘图输出:
您的代码存在一个问题:
- 您按
PIA_TYPE
分面,但摘要 df 按PIA_ITEM
分组
我不确定这是否是根本原因,因为在您的示例数据中,PIA_TYPE
和 PIA_ITEM
相同,所以我将 PIA_TYPE
更改为 2不同级别。
创建summary df时,可以使用n()
计算观察次数,使用round
获取正确位数,将lambda表示为\U03BB
。
df_dztpois <- structure(list(PIA_ITEM = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L), .Label = c("RX20 1.5t Type 6209-10", "RX20 1.6t Type 6211",
"RX20 1.6t Type 6221", "RX20 1.8t Type 6213", "RX20 2.0t Type 6215",
"RX20 2.0t Type 6225", "RX20 2.0t Type 6230"), class = "factor"),
PIA_TYPE = c("6211", "6211", "6211", "6211", "6211", "6212",
"6212", "6212", "6212", "6212"), DELY_QTY_WINS = c(75, 62,
57, 57, 67, 57, 53, 70, 70, 60), DELY_QTY_DZTPOIS = c(1.09669678480388e-08,
3.8744657910606e-05, 0.000448728658516301, 0.000448728658516301,
2.24150656988175e-06, 0.000448728658516301, 0.0023318386482722,
3.38775858809732e-07, 3.38775858809732e-07, 0.000108487832825478
)), row.names = c(NA, 10L), class = "data.frame")
library(ggplot2)
library(dplyr)
df_dztpois_summary <- df_dztpois %>%
group_by(PIA_TYPE) %>%
summarize(count = n(),
mean = mean(DELY_QTY_WINS)) %>%
mutate(lab = paste("count = ", count, "\n\U03BB = ", round(mean, digits = 2)))
p <- ggplot(data = df_dztpois, aes(x=DELY_QTY_WINS))
p <- p + geom_line(aes(y=DELY_QTY_DZTPOIS))
p <- p + facet_wrap( ~ PIA_TYPE, nrow=3, scales = "free")
p <- p + xlab("Observation") + ylab("Probability")
p <- p + theme(
legend.position = "bottom",
legend.text = element_text(size = 12),
strip.text.x = element_text(size = 12))
p <- p + labs(title = "Zero-truncated Poisson distribution of order quantity",
subtitle = "Grouped by product (PIA_TYPE)",
caption = "Data source: df_dztpois$DELY_QTY_WINS and df_full$DELY_QTY_DZTPOIS")
p <- p + geom_text(data = df_dztpois_summary, aes(label = lab), x=Inf, y=Inf, hjust=1, vjust=1.2)
p