ggplot2:使用不同比例的刻面时,箱线图宽度不正确
ggplot2: incorrect boxplot width when facetting with facets of different scales
我需要多面箱线图。绘图的 x 轴是一个定量变量,我想在绘图上反映此信息。各个方面的横坐标比例差异很大。
我的问题是方框的宽度对于大比例尺的小平面来说非常小。
一个可能的解释是方框的宽度对于所有方面都是相同的,而理想情况下它应该由每个方面的 xlims 单独确定。
如果有两个输入,我将不胜感激:
- 您认为这是一个应该报告的错误吗?
- 你有解决办法吗?
提前致谢!
备注:将横坐标转换为分类变量可能是一种解决方案,但并不完美,因为它会导致一些信息丢失。
最小工作示例:
library(tidyverse)
c(1:4,7) %>%
c(.,10*.) %>% # Create abscissa on two different scales
lapply(FUN = function(x) {tibble(x = x, y = rnorm(50), idx = ifelse(test = x<8, yes = 'A', no = 'B'))}) %>% # Create sample (y) and label (idx)
bind_rows() %>%
ggplot(aes(x = x, y = y, group = x)) +
geom_boxplot() +
facet_wrap(~idx, scales = 'free')
结果:
一个麻烦的解决方案是从头开始重新绘制箱线图,但这不是很令人满意:
draw_boxplot = function(locations, width, ymin, lower, middle, upper, ymax, idx){
local_df = tibble(locations = locations, width = width, ymin = ymin, lower = lower, middle = middle, upper = upper, ymax = ymax, idx = idx)
ggplot(data = local_df) +
geom_rect(aes(xmin = locations - width/2, xmax = locations + width/2, ymin = lower, ymax = upper), fill = 'white', colour = 'black') +
geom_segment(aes(x = locations - width/2, xend = locations + width/2, y = middle, yend = middle), size = 0.8) +
geom_segment(aes(x = locations, xend = locations, y = upper, yend = ymax)) +
geom_segment(aes(x = locations, xend = locations, y = lower, yend = ymin)) +
facet_wrap(~idx, scales = 'free_x')
}
make_boxplot = function(to_plot){
to_plot %>%
cmp_boxplot %>%
(function(x){
draw_boxplot(locations = x$x, width = x$width, ymin = x$y0, lower = x$y25, middle = x$y50, upper = x$y75, ymax = x$y100, idx = x$idx)
})
}
cmp_boxplot = function(to_plot){
to_plot %>%
group_by(idx) %>%
mutate(width = 0.6*(max(x) - min(x))/length(unique(x))) %>% #hand specified width
group_by(x) %>%
mutate(y0 = min(y),
y25 = quantile(y, 0.25),
y50 = median(y),
y75 = quantile(y, 0.75),
y100 = max(y)) %>%
select(-y) %>%
unique()
}
c(1:4,7) %>%
c(.,10*.) %>%
lapply(FUN = function(x) {tibble(x = x, y = rnorm(50), idx = ifelse(test = x<8, yes = 'A', no = 'B'))}) %>%
bind_rows() %>%
make_boxplot
结果:
由于 geom_boxplot
不允许将 width
作为审美变化,因此您必须自己编写。幸运的是它并不太复杂。
bp_custom <- function(vals, type) {
bp = boxplot.stats(vals)
if(type == "whiskers") {
y = bp$stats[1]
yend = bp$stats[5]
return(data.frame(y = y, yend = yend))
}
if(type == "box") {
ymin = bp$stats[2]
ymax = bp$stats[4]
return(data.frame(ymin = ymin, ymax = ymax))
}
if(type == "median") {
y = median(vals)
yend = median(vals)
return(data.frame(y = y, yend = yend))
}
if(type == "outliers") {
y = bp$out
return(data.frame(y = y))
} else {
return(warning("Type must be one of 'whiskers', 'box', 'median', or 'outliers'."))
}
}
此函数执行所有计算和 returns 适合在 stat_summary
中使用的数据帧。然后我们在几个不同的层中调用它来构建箱线图的各个部分。请注意,您需要计算每组刻面的箱线图的宽度,在下面使用管道中的 dplyr
完成。我计算了宽度,使得 x 的范围根据唯一 x 值的数量分成相等的段,然后每个框大约该段宽度的 1/2。您的数据可能需要不同的调整。
library(dplyr)
c(1:4,7) %>%
c(.,10*.) %>% # Create abscissa on two different scales
lapply(FUN = function(x) {
tibble(x = x, y = rnorm(50), idx = ifelse(test = x<8, yes = 'A', no = 'B'))
}) %>%
bind_rows() %>%
group_by(idx) %>% # NOTE THIS LINE
mutate(width = 0.25*diff(range(x))/length(unique(x))) %>% # NOTE THIS LINE
ggplot(aes(x = x, y = y, group = x)) +
stat_summary(fun.data = bp_custom, fun.args = "whiskers",
geom = "segment", aes(xend = x)) +
stat_summary(fun.data = bp_custom, fun.args = "box",
geom = "rect", aes(xmin = x - width, xmax = x + width),
fill = "white", color = "black") +
stat_summary(fun.data = bp_custom, fun.args = "median",
geom = "segment", aes(x = x - width, xend = x + width), size = 1.5) +
stat_summary(fun.data = bp_custom, fun.args = "outliers",
geom = "point") +
facet_wrap(~idx, scales = 'free')
至于将其报告为错误(实际上是一个所需的功能),我认为这是一个不常见的用例,他们不会优先考虑它。如果您将此代码包装成自定义 geom
(基于 here)并提交拉取请求,您可能会更幸运。
我需要多面箱线图。绘图的 x 轴是一个定量变量,我想在绘图上反映此信息。各个方面的横坐标比例差异很大。
我的问题是方框的宽度对于大比例尺的小平面来说非常小。
一个可能的解释是方框的宽度对于所有方面都是相同的,而理想情况下它应该由每个方面的 xlims 单独确定。
如果有两个输入,我将不胜感激:
- 您认为这是一个应该报告的错误吗?
- 你有解决办法吗?
提前致谢!
备注:将横坐标转换为分类变量可能是一种解决方案,但并不完美,因为它会导致一些信息丢失。
最小工作示例:
library(tidyverse)
c(1:4,7) %>%
c(.,10*.) %>% # Create abscissa on two different scales
lapply(FUN = function(x) {tibble(x = x, y = rnorm(50), idx = ifelse(test = x<8, yes = 'A', no = 'B'))}) %>% # Create sample (y) and label (idx)
bind_rows() %>%
ggplot(aes(x = x, y = y, group = x)) +
geom_boxplot() +
facet_wrap(~idx, scales = 'free')
结果:
一个麻烦的解决方案是从头开始重新绘制箱线图,但这不是很令人满意:
draw_boxplot = function(locations, width, ymin, lower, middle, upper, ymax, idx){
local_df = tibble(locations = locations, width = width, ymin = ymin, lower = lower, middle = middle, upper = upper, ymax = ymax, idx = idx)
ggplot(data = local_df) +
geom_rect(aes(xmin = locations - width/2, xmax = locations + width/2, ymin = lower, ymax = upper), fill = 'white', colour = 'black') +
geom_segment(aes(x = locations - width/2, xend = locations + width/2, y = middle, yend = middle), size = 0.8) +
geom_segment(aes(x = locations, xend = locations, y = upper, yend = ymax)) +
geom_segment(aes(x = locations, xend = locations, y = lower, yend = ymin)) +
facet_wrap(~idx, scales = 'free_x')
}
make_boxplot = function(to_plot){
to_plot %>%
cmp_boxplot %>%
(function(x){
draw_boxplot(locations = x$x, width = x$width, ymin = x$y0, lower = x$y25, middle = x$y50, upper = x$y75, ymax = x$y100, idx = x$idx)
})
}
cmp_boxplot = function(to_plot){
to_plot %>%
group_by(idx) %>%
mutate(width = 0.6*(max(x) - min(x))/length(unique(x))) %>% #hand specified width
group_by(x) %>%
mutate(y0 = min(y),
y25 = quantile(y, 0.25),
y50 = median(y),
y75 = quantile(y, 0.75),
y100 = max(y)) %>%
select(-y) %>%
unique()
}
c(1:4,7) %>%
c(.,10*.) %>%
lapply(FUN = function(x) {tibble(x = x, y = rnorm(50), idx = ifelse(test = x<8, yes = 'A', no = 'B'))}) %>%
bind_rows() %>%
make_boxplot
结果:
由于 geom_boxplot
不允许将 width
作为审美变化,因此您必须自己编写。幸运的是它并不太复杂。
bp_custom <- function(vals, type) {
bp = boxplot.stats(vals)
if(type == "whiskers") {
y = bp$stats[1]
yend = bp$stats[5]
return(data.frame(y = y, yend = yend))
}
if(type == "box") {
ymin = bp$stats[2]
ymax = bp$stats[4]
return(data.frame(ymin = ymin, ymax = ymax))
}
if(type == "median") {
y = median(vals)
yend = median(vals)
return(data.frame(y = y, yend = yend))
}
if(type == "outliers") {
y = bp$out
return(data.frame(y = y))
} else {
return(warning("Type must be one of 'whiskers', 'box', 'median', or 'outliers'."))
}
}
此函数执行所有计算和 returns 适合在 stat_summary
中使用的数据帧。然后我们在几个不同的层中调用它来构建箱线图的各个部分。请注意,您需要计算每组刻面的箱线图的宽度,在下面使用管道中的 dplyr
完成。我计算了宽度,使得 x 的范围根据唯一 x 值的数量分成相等的段,然后每个框大约该段宽度的 1/2。您的数据可能需要不同的调整。
library(dplyr)
c(1:4,7) %>%
c(.,10*.) %>% # Create abscissa on two different scales
lapply(FUN = function(x) {
tibble(x = x, y = rnorm(50), idx = ifelse(test = x<8, yes = 'A', no = 'B'))
}) %>%
bind_rows() %>%
group_by(idx) %>% # NOTE THIS LINE
mutate(width = 0.25*diff(range(x))/length(unique(x))) %>% # NOTE THIS LINE
ggplot(aes(x = x, y = y, group = x)) +
stat_summary(fun.data = bp_custom, fun.args = "whiskers",
geom = "segment", aes(xend = x)) +
stat_summary(fun.data = bp_custom, fun.args = "box",
geom = "rect", aes(xmin = x - width, xmax = x + width),
fill = "white", color = "black") +
stat_summary(fun.data = bp_custom, fun.args = "median",
geom = "segment", aes(x = x - width, xend = x + width), size = 1.5) +
stat_summary(fun.data = bp_custom, fun.args = "outliers",
geom = "point") +
facet_wrap(~idx, scales = 'free')
至于将其报告为错误(实际上是一个所需的功能),我认为这是一个不常见的用例,他们不会优先考虑它。如果您将此代码包装成自定义 geom
(基于 here)并提交拉取请求,您可能会更幸运。