ggstance(),一起绘制箱线图和密度图 - R

ggstance(), plotting together boxplot and density plot - R

我正在尝试为数据框中的每个数值变量绘制箱线图和密度图。我正在调整 this 解决方案来解决我的问题。但是,我正在尝试为数据框中的每个数字变量绘制(不只是上面解决方案中的一个):

df %>% 
  select_if(is.numeric) %>%
  gather(na.rm=TRUE) %>%
  ggplot(aes(x = value, y = -0.5)) +
  geom_boxploth() +
  geom_density(aes(x = value), inherit.aes = FALSE) +
  facet_wrap(~key, scales = 'free')

主要问题在于每个变量的尺度。我已经尝试在 facet_wrap()scales 参数中使用 free_xfree_yfree,它们都给我带来了一些美学问题:

由于数值变量的范围非常不同,因此 x 轴需要自由绘制才能绘制易于理解的箱线图。但是,如果我固定 y 轴(使用 scales="free_x"),密度图的峰值例如 0.1 将显示为固定轴上从 0 到 1 的直线(air_temperature变量,例如)。

我实现的最接近的解决方案是 "free_x",但我无法使密度图更具可读性。

有什么建议吗?

数据(100行):

df <- structure(list(site_id = c("2", "13", "2", "2", "14", "13", "9", 
"2", "4", "13", "3", "13", "6", "13", "13", "14", "11", "13", 
"14", "8", "2", "2", "0", "15", "14", "14", "5", "15", "3", "4", 
"13", "13", "7", "9", "8", "2", "9", "13", "13", "13", "3", "0", 
"6", "2", "4", "2", "3", "3", "13", "8", "2", "13", "4", "8", 
"7", "5", "9", "2", "13", "5", "2", "9", "3", "9", "3", "13", 
"13", "1", "9", "14", "8", "13", "13", "14", "13", "7", "13", 
"5", "3", "14", "9", "15", "9", "9", "11", "3", "9", "13", "2", 
"9", "9", "3", "9", "9", "14", "9", "9", "3", "2", "13"), building_id = c("187", 
"1137", "214", "254", "1234", "1218", "878", "236", "631", "1169", 
"472", "1148", "747", "1179", "1152", "1233", "1031", "1154", 
"1242", "826", "271", "217", "62", "1341", "1245", "1320", "660", 
"1352", "526", "632", "1204", "1083", "797", "891", "858", "252", 
"879", "1145", "1154", "1158", "372", "76", "775", "271", "607", 
"247", "337", "353", "1089", "826", "168", "1114", "566", "836", 
"796", "662", "988", "191", "1200", "697", "207", "980", "343", 
"966", "353", "1162", "1217", "115", "925", "1319", "848", "1139", 
"1075", "1253", "1217", "798", "1220", "741", "491", "1226", 
"945", "1429", "952", "916", "1032", "380", "919", "1112", "256", 
"884", "890", "295", "965", "987", "1224", "946", "928", "350", 
"280", "1143"), primary_use = c("Education", "Office", "Education", 
"Entertainment/public assembly", "Education", "Entertainment/public assembly", 
"Lodging/residential", "Entertainment/public assembly", "Education", 
"Education", "Education", "Office", "Education", "Entertainment/public assembly", 
"Office", "Education", "Education", "Lodging/residential", "Office", 
"Public services", "Education", "Education", "Lodging/residential", 
"Education", "Healthcare", "Entertainment/public assembly", "Entertainment/public assembly", 
"Office", "Public services", "Education", "Office", "Education", 
"Education", "Education", "Other", "Education", "Education", 
"Manufacturing/industrial", "Lodging/residential", "Public services", 
"Education", "Education", "Office", "Education", "Education", 
"Education", "Entertainment/public assembly", "Education", "Office", 
"Public services", "Education", "Other", "Education", "Public services", 
"Education", "Entertainment/public assembly", "Office", "Education", 
"Entertainment/public assembly", "Education", "Public services", 
"Entertainment/public assembly", "Public services", "Education", 
"Education", "Office", "Education", "Education", "Education", 
"Entertainment/public assembly", "Office", "Office", "Office", 
"Education", "Education", "Education", "Public services", "Education", 
"Education", "Education", "Office", "Education", "Education", 
"Education", "Education", "Education", "Education", "Office", 
"Education", "Education", "Education", "Education", "Lodging/residential", 
"Education", "Office", "Education", "Education", "Education", 
"Entertainment/public assembly", "Office"), square_feet = c(44203L, 
27452L, 91273L, 254766L, 46037L, 268480L, 41241L, 144086L, 23533L, 
155439L, 69400L, 861524L, 30531L, 143228L, 118858L, 43143L, 93206L, 
262156L, 23871L, 19446L, 71421L, 282946L, 42731L, 18342L, 35354L, 
34565L, 5447L, 42755L, 7636L, 41297L, 38382L, 100813L, 764237L, 
50846L, 1566L, 4314L, 110235L, 35201L, 262156L, 294812L, 49100L, 
128887L, 104355L, 71421L, 34825L, 64560L, 22334L, 99700L, 62837L, 
19446L, 183460L, 64810L, 2010L, 8168L, 226042L, 12777L, 24463L, 
78268L, 545351L, 18697L, 99700L, 34819L, 22000L, 393530L, 99700L, 
72958L, 73044L, 129716L, 54405L, 287419L, 8552L, 336650L, 72709L, 
134748L, 73044L, 409028L, 381221L, 14025L, 66600L, 27995L, 332447L, 
40461L, 218024L, 51778L, 127632L, 95100L, 31233L, 190471L, 16744L, 
38598L, 202937L, 83400L, 45086L, 166395L, 23481L, 177216L, 179115L, 
83800L, 105505L, 96612L), year_built = c(1950L, NA, 1951L, 1989L, 
NA, NA, NA, 1989L, 1906L, NA, 1911L, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, 2003L, 2001L, 1960L, NA, NA, 1976L, NA, NA, 1959L, 
NA, NA, 1979L, NA, NA, 1907L, NA, NA, NA, NA, 1911L, 2009L, NA, 
NA, 1999L, 1965L, 1977L, 1955L, NA, NA, 2005L, NA, 1957L, NA, 
1965L, 1976L, NA, 1964L, NA, 1919L, NA, NA, NA, NA, 1955L, NA, 
NA, 1968L, NA, NA, NA, NA, NA, NA, NA, 1970L, NA, 1966L, 1970L, 
NA, NA, 2002L, NA, NA, NA, 1927L, NA, NA, NA, NA, NA, 1942L, 
NA, NA, NA, NA, NA, 1944L, 2008L, NA), floor_count = c(NA, NA, 
NA, NA, NA, NA, NA, NA, 3L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, 1L, NA, NA, NA, NA, NA, NA, 1L, NA, NA, 6L, NA, NA, 13L, 
NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 2L, NA, NA, NA, NA, 
1L, NA, NA, 1L, 1L, 2L, 2L, NA, NA, NA, 2L, NA, NA, NA, NA, NA, 
NA, NA, 6L, NA, NA, 1L, NA, NA, NA, NA, 21L, NA, 1L, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA), meter = c("hotwater", "electricity", "chilledwater", 
"electricity", "chilledwater", "chilledwater", "chilledwater", 
"hotwater", "electricity", "electricity", "electricity", "electricity", 
"chilledwater", "electricity", "steam", "chilledwater", "chilledwater", 
"steam", "hotwater", "electricity", "electricity", "hotwater", 
"electricity", "steam", "electricity", "electricity", "electricity", 
"chilledwater", "electricity", "electricity", "steam", "chilledwater", 
"steam", "electricity", "electricity", "electricity", "steam", 
"steam", "chilledwater", "electricity", "electricity", "chilledwater", 
"electricity", "electricity", "electricity", "hotwater", "electricity", 
"electricity", "electricity", "electricity", "electricity", "electricity", 
"electricity", "electricity", "steam", "electricity", "electricity", 
"electricity", "steam", "electricity", "electricity", "electricity", 
"electricity", "electricity", "electricity", "chilledwater", 
"electricity", "electricity", "electricity", "electricity", "electricity", 
"chilledwater", "steam", "hotwater", "chilledwater", "steam", 
"steam", "electricity", "electricity", "electricity", "steam", 
"electricity", "chilledwater", "steam", "hotwater", "electricity", 
"electricity", "steam", "chilledwater", "electricity", "chilledwater", 
"electricity", "electricity", "chilledwater", "hotwater", "steam", 
"steam", "electricity", "electricity", "electricity"), timestamp = structure(c(1456984800, 
1464206400, 1478361600, 1464498000, 1472634000, 1465473600, 1480856400, 
1473026400, 1464656400, 1464451200, 1454306400, 1464332400, 1452088800, 
1473721200, 1462356000, 1475690400, 1453737600, 1456063200, 1477494000, 
1470546000, 1466690400, 1479898800, 1467774000, 1470502800, 1464285600, 
1479355200, 1475762400, 1473930000, 1452841200, 1468245600, 1470355200, 
1465923600, 1461913200, 1456610400, 1476234000, 1452837600, 1469300400, 
1466226000, 1477594800, 1453582800, 1461042000, 1460062800, 1470020400, 
1467921600, 1476741600, 1470337200, 1482840000, 1472911200, 1467972000, 
1460707200, 1475946000, 1466064000, 1464739200, 1471417200, 1461250800, 
1462464000, 1475593200, 1458612000, 1461812400, 1464490800, 1464134400, 
1453935600, 1452762000, 1476414000, 1480496400, 1469430000, 1464714000, 
1463119200, 1472698800, 1477155600, 1456045200, 1480888800, 1475528400, 
1463544000, 1463274000, 1455429600, 1482138000, 1471478400, 1461909600, 
1466841600, 1457560800, 1463551200, 1481346000, 1453608000, 1478336400, 
1460030400, 1458090000, 1464274800, 1478458800, 1478559600, 1470582000, 
1453190400, 1467662400, 1478887200, 1461481200, 1460746800, 1459778400, 
1479168000, 1461967200, 1482433200), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), meter_reading = c(14.6536, 29, 6.1193, 179.17, 
487.708, 90.1306, 92.9855, 0, 7.0825, 0, 77.34, 1018.46, 0, 1204, 
6148.44, 0, 0, 5781.25, 162.07, 8, 140.2, 5.8614, 600.926, 366.595, 
78.8438, 71.7563, 13.765, 63.5949, 27.84, 66.5, 143.555, 550.446, 
843.81, 27, 8.8333, 33.34, 286.7, 271.484, 0, 163.625, 32.69, 
4305.46, 133.25, 120.94, 20.75, 43.9606, 88.04, 100.74, 321.53, 
56.25, 407.15, 3.6, 0.563, 4.0833, 0, 12.1, 36, 60.11, 1636.54, 
3.274, 108.52, 18, 21.92, 500, 136.76, 100.626, 123.881, 449, 
15, 445.716, 0, 1219.51, 0, 1127.07, 0, 3429.23, 20414.1, 2.1, 
54.12, 40.1146, 460.55, 24.525, 184.705, 76.25, 161.753, 137.54, 
18, 2843.75, 3.4817, 51, 2026.83, 64.53, 0, 763.578, 340.092, 
82.35, 250.1, 88.15, 16.02, 68), air_temperature = c(21.1, 23.3, 
20, 29.4, 22.2, 18.3, 10, 38.3, 20.6, 18.9, 8.3, 21.1, -7.2, 
23.9, 8.3, 19.4, -3, -2.8, 8.3, 23.9, 33.9, 11.7, 28.3, 26.7, 
31.1, 10.6, 14, 5, 5.6, 15.6, 26.7, 21.7, 1.2, 22.2, 22.8, 10, 
37.2, 23.9, 8.3, -5.6, 16.1, 27.2, 23.3, 38.3, 20.6, 33.9, 13.3, 
23.9, 19.4, 20, 30.6, 18.3, 23.9, 25, 14.8, 18, 25, 30.6, 7.2, 
12, 31.1, 13.3, -2.2, 23.9, 13.3, 21.7, 20.6, 11.4, 29.4, 10, 
13.3, 2.8, 23.9, 11.1, 9.4, -27.2, -15, 17, 8.9, 18.9, 15.6, 
3.3, 5, 8.3, 3.3, 12.8, 27.8, 20.6, 23.9, 21.7, 30, -8.9, 37.2, 
19.4, 8.3, 23.9, 16.1, 11.7, 26.7, 1.1), cloud_coverage = c(4L, 
NA, 2L, 0L, NA, 8L, NA, 0L, 2L, NA, 6L, NA, 0L, NA, 0L, 0L, NA, 
NA, 0L, NA, 4L, NA, NA, NA, 0L, NA, NA, 0L, NA, 0L, 2L, NA, NA, 
0L, NA, 2L, 0L, NA, NA, NA, NA, NA, 0L, 2L, 2L, 4L, 6L, NA, NA, 
NA, 4L, NA, 2L, 2L, NA, 0L, NA, NA, NA, 0L, 0L, 0L, 6L, NA, 8L, 
0L, NA, NA, 0L, NA, NA, NA, 2L, 0L, 4L, NA, 0L, NA, 8L, NA, NA, 
0L, NA, 0L, NA, 8L, 0L, 2L, 2L, NA, 4L, 0L, 0L, 2L, 0L, 0L, 0L, 
8L, 4L, 2L), dew_temperature = c(-1.7, 18.3, 12.8, 1.1, 21.1, 
16.7, 8.9, -2.8, 11.7, 15, 2.2, 11.7, -11.1, 16.7, 0, 8.9, -5.8, 
-5.6, -3.3, 22.2, 8.9, 7.2, 25.6, 16.7, 8.3, 6.1, 5, 4.4, -1.1, 
11.7, 15.6, 17.2, -6.9, 8.9, 19.4, 2.8, 21.1, 15.6, 5, -10, 11.7, 
12.8, 21.7, 12.8, 11.7, 21.1, 11.7, 16.7, 16.7, 18.9, 11.1, 15, 
11.7, 23.9, -1.9, 3, 21.1, -6.1, -0.6, 10, -1.1, -6.7, -12.2, 
20.6, 13.3, 15.6, 16.1, 9.6, 22.2, 5, 11.7, 0, 10.6, 10, -8.9, 
-32.3, -20, 16, 7.2, 16.7, 13.9, 2.2, -3.3, 0, 2.2, 10.6, 7.2, 
13.9, 11.1, 20.6, 23.9, -19.4, 22.2, 15, -4.4, 15.6, 9.4, 3.3, 
2.2, -7.2), precip_depth_1_hr = c(0L, -1L, 0L, 0L, 0L, -1L, 3L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, 0L, 0L, 0L, 0L, 0L, -1L, 
NA, 0L, 0L, NA, NA, 0L, 0L, 0L, -1L, NA, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, NA, 0L, -1L, 0L, 0L, -1L, 0L, 0L, 0L, 
0L, NA, NA, 0L, 0L, 0L, NA, 0L, 0L, 0L, -1L, 0L, 0L, -1L, NA, 
0L, -1L, 0L, 0L, 0L, 0L, 0L, NA, 0L, NA, 0L, 0L, -1L, NA, 0L, 
0L, NA, 8L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L), sea_level_pressure = c(1014.2, 1009.7, 1016.2, 1007.2, 1015.7, 
1010.6, 1015.1, 1001.9, 1013.4, 1007.6, 1010.4, 1010.3, 1036.1, 
NA, 1013.5, 1024.8, 1023.2, 1023.4, 1030.4, 1015.2, 1009.3, 1017.1, 
1020.3, 1009, 1017, 1011.8, NA, 1026.5, 1014.2, 1014.7, 1008.5, 
NA, 1021.1, 1020.9, 1019.5, 1015.9, 1013.3, 1017.6, 1022.3, 1016.5, 
1019.5, 1011.6, 1016.8, 1009.8, 1017.2, 1009.1, 1015, 1018.3, 
1007.9, 1012.7, 1011.5, 1012.2, 1009.6, 1021.2, 1016, NA, 1010, 
1008.9, 1012.4, NA, 1008.6, 1024.5, 1015.5, 1019.2, 1012.2, 1014.9, 
1011.2, 1007.9, 1013.2, 999.7, 1021.4, 1008.6, 1013.5, 1019.8, 
1017.9, 1032.6, 1030.4, NA, 1016.2, 1019.2, 1007.2, 1022, 1034.1, 
1022, 1018.1, 1003.2, 1009, 1009, 1015.6, 1017.4, 1014, 1024.5, 
1012.2, 1024.8, 1015.1, 1011.1, 1021.9, 1016.5, 1007.8, 1023.6
), wind_direction = c(100L, 110L, 110L, 260L, 240L, 160L, 340L, 
240L, 310L, 20L, 210L, 180L, 0L, 300L, 360L, 110L, 60L, 0L, 350L, 
160L, 80L, 160L, 0L, 290L, 250L, 290L, 100L, 70L, 210L, 0L, 310L, 
110L, 70L, 160L, 20L, 80L, NA, 170L, 150L, 170L, 180L, 260L, 
190L, 130L, 270L, 160L, 210L, 30L, 230L, 20L, 90L, 20L, 300L, 
350L, 90L, 110L, 180L, 280L, 80L, 60L, 290L, 330L, 250L, 0L, 
20L, 290L, 230L, 20L, 0L, 290L, 0L, 220L, 150L, 0L, 330L, 280L, 
200L, NA, 50L, 30L, NA, 70L, 0L, 220L, 210L, 170L, 350L, 230L, 
0L, NA, 230L, 300L, 200L, 310L, 350L, NA, 210L, 230L, 270L, 230L
), wind_speed = c(3.6, 5.1, 3.6, 7.7, 1.5, 5.7, 2.6, 3.1, 5.7, 
1.5, 3.1, 3.1, 0, 4.6, 2.1, 3.1, 1.5, 0, 5.7, 1.5, 3.6, 1.5, 
0, 6.7, 4.6, 2.6, 6.2, 1.5, 1.5, 0, 7.7, 3.1, 3.1, 5.1, 3.6, 
2.1, 2.1, 5.7, 5.1, 6.7, 3.1, 6.7, 2.1, 2.6, 5.7, 4.1, 7.2, 10.3, 
3.1, 2.1, 4.6, 4.1, 5.1, 1.5, 1.5, 4.6, 1.5, 4.6, 11.8, 3.1, 
6.7, 3.6, 1.5, 0, 2.1, 2.6, 4.6, 4.1, 0, 6.7, 0, 4.6, 6.7, 0, 
6.2, 3.6, 4.6, 2.6, 3.6, 1.5, 2.6, 2.6, 0, 1.5, 1.5, 5.1, 2.1, 
5.1, 0, 2.6, 2.6, 3.1, 4.6, 2.1, 4.1, 2.1, 1.5, 1.5, 5.7, 4.6
)), row.names = c(NA, 100L), class = "data.frame")

geom_density(),或更准确地说 stat_density() 也提供了计算美学。使用这些,您可以设置 geom_density(aes(x = value, y = stat(density))) 以获得统一的内核密度估计:

df %>% 
  select_if(is.numeric) %>%
  gather(na.rm=TRUE) %>%
  ggplot(aes(x = value, y = -0.5)) +
  geom_boxploth() +
  geom_density(aes(x = value, y = stat(scaled)), inherit.aes = FALSE) +
  facet_wrap(~key, scales = 'free')

这可能会解决您的轴问题。

值得注意的是,由于 this pull request 的合并,我们将来可能不再需要 ggstance。