ggplot密度与因子变量

ggplot density with factor variable

我有一个长格式的数据集。该数据包含来自波兰和德国这两个国家的政策评估。有五列分别是:cntry(国家),wgt_2(权重),type(被评估的政策),value(resp.给政策的分数),labels(价值的含义作为字符串).

我想绘制一个加权密度曲线分数,将国家/地区作为两条线,并将类型作为方面。我运行分为两期:

  1. 我不知道如何将权重整合到密度图中。权重包含在数据集中 (wgt_2)

  2. 我想在横轴上使用标签而不是值,以便 reader 立即知道评估的规模。问题在于添加标签会创建一条线,其中 ggplot 还试图平衡因子水平之间的“中间”值,因此该线变得摇摆不定。我尝试使用 scale_x_discrete,我还尝试了此处建议的方法 none,其中有帮助。我附上一张图片来表达我的意思:

这是我使用的命令:

    ggplot(plot_dat, aes(x=labels, color=cntry, group=cntry)) +
  geom_density() +
  facet_wrap(~type)

这是一个 100 行的数据集示例,用于复制问题:

structure(list(cntry = structure(c(1L, 1L, 1L, 1L, 2L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 
1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 
2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L), .Label = c("Germany", 
"Poland"), class = "factor"), wgt_2 = structure(c(1.27960370623135, 
1.12172797554474, 1.12172797554474, 0.894262014366493, 1.00972997045152, 
1.13313617678755, 1.32877801805357, 0.759155232925338, 1.13313617678755, 
0.884543585038424, 1.13313617678755, 0.884543585038424, 1.26672089753564, 
1.08715705397184, 1.20856396838766, 1.09821366192373, 0.944801135944303, 
0.84461528487141, 1.08715705397184, 1.13313617678755, 1.00733073227995, 
0.853205193791076, 0.853205193791076, 1.09821366192373, 0.66171219592128, 
1.01923047425237, 1.19639637436972, 0.767496027664015, 1.00733073227995, 
0.835436393423981, 0.791262177881762, 0.535937860607983, 0.903356840604329, 
1.01494775076143, 0.95965888977453, 1.05528409877768, 1.27960370623135, 
1.13313617678755, 0.766875995766742, 0.987425989567564, 1.13313617678755, 
1.19639637436972, 0.948787865326323, 1.12172797554474, 1.34229196026369, 
1.00295405332661, 0.959796632690522, 1.00733073227995, 0.84461528487141, 
1.05528409877768, 0.84461528487141, 1.08715705397184, 1.20856396838766, 
1.09821366192373, 1.12172797554474, 0.893539572876972, 1.01923047425237, 
0.759155232925338, 0.84461528487141, 0.971134847547882, 1.26672089753564, 
1.13313617678755, 0.947612622945283, 0.766875995766742, 0.843932951154142, 
0.84461528487141, 1.00309801053618, 1.01494775076143, 0.655050202375811, 
0.655050202375811, 1.01923047425237, 1.01923047425237, 1.19639637436972, 
1.26672089753564, 1.12172797554474, 0.84461528487141, 0.938072237840432, 
1.34229196026369, 1.13313617678755, 0.955626481232642, 1.09821366192373, 
1.08715705397184, 0.84461528487141, 1.00309801053618, 0.95965888977453, 
0.84461528487141, 1.20856396838766, 1.08715705397184, 0.558604275386284, 
0.853205193791076, 0.775301618081247, 0.938072237840432, 1.00548716730424, 
0.894262014366493, 0.937314403677854, 1.09821366192373, 1.00309801053618, 
1.19639637436972, 1.00548716730424, 1.32877801805357), label = "weight with 2 lvl education", format.stata = "%9.0g"), 
    type = c("Economic meassures", "Health meassures", "Health meassures", 
    "Economic meassures", "Health meassures", "Economic meassures", 
    "Health meassures", "Health meassures", "Economic meassures", 
    "Health meassures", "Economic meassures", "Economic meassures", 
    "Health meassures", "Health meassures", "Economic meassures", 
    "Economic meassures", "Health meassures", "Economic meassures", 
    "Economic meassures", "Health meassures", "Health meassures", 
    "Economic meassures", "Economic meassures", "Economic meassures", 
    "Health meassures", "Economic meassures", "Health meassures", 
    "Health meassures", "Health meassures", "Health meassures", 
    "Economic meassures", "Economic meassures", "Health meassures", 
    "Health meassures", "Health meassures", "Health meassures", 
    "Economic meassures", "Health meassures", "Health meassures", 
    "Economic meassures", "Health meassures", "Health meassures", 
    "Economic meassures", "Economic meassures", "Economic meassures", 
    "Economic meassures", "Economic meassures", "Health meassures", 
    "Economic meassures", "Economic meassures", "Economic meassures", 
    "Health meassures", "Economic meassures", "Economic meassures", 
    "Economic meassures", "Health meassures", "Health meassures", 
    "Economic meassures", "Economic meassures", "Economic meassures", 
    "Health meassures", "Economic meassures", "Economic meassures", 
    "Health meassures", "Health meassures", "Economic meassures", 
    "Economic meassures", "Health meassures", "Health meassures", 
    "Economic meassures", "Health meassures", "Health meassures", 
    "Health meassures", "Health meassures", "Economic meassures", 
    "Health meassures", "Health meassures", "Economic meassures", 
    "Economic meassures", "Health meassures", "Health meassures", 
    "Economic meassures", "Health meassures", "Economic meassures", 
    "Economic meassures", "Economic meassures", "Economic meassures", 
    "Health meassures", "Health meassures", "Economic meassures", 
    "Economic meassures", "Economic meassures", "Health meassures", 
    "Economic meassures", "Health meassures", "Health meassures", 
    "Health meassures", "Health meassures", "Economic meassures", 
    "Health meassures"), value = structure(c(2, 2, 2, 4, 1, 2, 
    3, 4, 1, 3, 2, 3, 4, 5, 1, 3, 3, 3, 3, 3, 4, 1, 3, 1, 3, 
    3, 2, 3, 3, 1, 3, 3, 4, 3, 2, 2, 3, 3, 3, 1, 3, 2, 2, 3, 
    1, 3, 2, 3, 2, 1, 1, 3, 4, 3, 1, 3, 2, 2, 2, 3, 3, 1, 2, 
    5, 1, 3, 1, 3, 5, 2, 1, 4, 1, 2, 2, 3, 2, 3, 3, 1, 3, 2, 
    3, 1, 2, 3, 2, 2, 3, 3, 2, 5, 2, 2, 2, 3, 2, 3, 1, 3), labels = c(`not at all sufficient` = 1, 
    `rather not sufficient` = 2, appropriate = 3, `rather too restrictive` = 4, 
    `extremely restrictive` = 5), label = "measures to overcome health risks due to corona", class = c("haven_labelled", 
    "vctrs_vctr", "double")), labels = structure(c(2L, 2L, 2L, 
    4L, 1L, 2L, 3L, 4L, 1L, 3L, 2L, 3L, 4L, 5L, 1L, 3L, 3L, 3L, 
    3L, 3L, 4L, 1L, 3L, 1L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 3L, 4L, 
    3L, 2L, 2L, 3L, 3L, 3L, 1L, 3L, 2L, 2L, 3L, 1L, 3L, 2L, 3L, 
    2L, 1L, 1L, 3L, 4L, 3L, 1L, 3L, 2L, 2L, 2L, 3L, 3L, 1L, 2L, 
    5L, 1L, 3L, 1L, 3L, 5L, 2L, 1L, 4L, 1L, 2L, 2L, 3L, 2L, 3L, 
    3L, 1L, 3L, 2L, 3L, 1L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 5L, 2L, 
    2L, 2L, 3L, 2L, 3L, 1L, 3L), .Label = c("not at all sufficient", 
    "rather not sufficient", "appropriate", "rather too restrictive", 
    "extremely restrictive"), class = "factor")), row.names = c(NA, 
-100L), class = c("tbl_df", "tbl", "data.frame"))

将权重合并到密度图中的一种方法是使用 uncount 根据每个观察值的权重按比例制作更多副本。您可以通过使用 bwadjust 调整平滑带宽来调整线条的摆动度。这里我设置调整为1.5,让它使用更宽的带宽,更流畅。

library(tidyverse)

plot_dat %>%
  mutate(labels_wrap = str_wrap(labels, width = 12)) %>% 
  uncount(wgt_2*100) %>%
ggplot(aes(x=labels_wrap, color=cntry, group=cntry)) +
  geom_density(adjust = 1.5) +
  facet_wrap(~type) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

我也不确定。截然不同。

ggplot(df, aes(x=labels, weight = wgt_2, color=cntry, group=cntry)) +
  geom_density() +
  facet_wrap(~type)