拉伸 x 轴并将不同的 binwidth 应用于 ggplot2 R 中直方图中的值范围
Stretching the x axis and applying a different binwidth to range of values in histogram in ggplot2 R
这是我想要构建的示例 ggplot。在我的数据中,我有一个问题,即我在直方图的小范围内有很多值。因此,我想让 x 轴不成比例地拉伸(这里在 80,81,82,83,84,85
的值之间)。因此,刻度线将 spaced 在图表上均匀分布,并且刻度线之间的 space 与该图表上值的增量增加不成比例。因此,我还想对直方图的那部分应用不同的 bin 大小(假设 binwidth = 1
)。
library(ggplot2)
set.seed(42)
data <- data.frame(c(rnorm(mean=80,sd=20,30)),seq(1,30,1),
c("A","B","B","A","A","B","B","A","A","A",
"A","B","B","A","A","B","B","A","A","B",
"B","A","A","B","B","A","A","B","B","A"))
colnames(data) <- c("vals","respondent","category")
# Plot the number of vals
ggplot(data,aes(x = vals,fill = category)) +
geom_histogram(position = "stack",binwidth = 5) +
ggtitle("plot")+
#scale_x_continuous(c(40,50,60,70,80,81,82,83,84,85,95,105,115))+
theme_minimal() +
ylab("Number of respondents")+xlab("Number of vals")
您可以自己计算尺寸(宽度/高度),作为一系列堆叠的矩形。
使用钻石数据集进行说明,假设这是我们的原始直方图,我们想要放大 [500, 1000] 价格范围:
ggplot(diamonds,
aes(x = price, fill = color)) +
geom_histogram(binwidth = 500) +
theme_bw()
定义您喜欢的轴中断:
x.axis.breaks <- c(0, # binwidth = 500
seq(500, 900, 100), # binwidth = 100
seq(1000, 19000, 500)) # binwidth = 500
> x.axis.breaks
[1] 0 500 600 700 800 900 1000 1500 2000 2500 3000 3500 4000 4500
[15] 5000 5500 6000 6500 7000 7500 8000 8500 9000 9500 10000 10500 11000 11500
[29] 12000 12500 13000 13500 14000 14500 15000 15500 16000 16500 17000 17500 18000 18500
[43] 19000
计算每个区间的 xmin / xmax / ymin / ymax:
library(dplyr)
diamonds2 <- diamonds %>%
mutate(price.cut = cut(price,
breaks = x.axis.breaks)) %>%
count(price.cut, color) %>%
mutate(xmin = x.axis.breaks[as.integer(price.cut)],
xmax = x.axis.breaks[as.integer(price.cut) + 1]) %>%
group_by(price.cut) %>%
arrange(desc(color)) %>%
mutate(ymax = cumsum(n)) %>%
mutate(ymin = lag(ymax)) %>%
mutate(ymin = ifelse(is.na(ymin), 0, ymin)) %>%
ungroup()
> diamonds2
# A tibble: 294 x 7
price.cut color n xmin xmax ymax ymin
<fct> <ord> <int> <dbl> <dbl> <int> <dbl>
1 0 J 158 0 500 158 0
2 500 J 80 500 600 80 0
3 600 J 84 600 700 84 0
4 700 J 51 700 800 51 0
5 800 J 43 800 900 43 0
6 900 J 47 900 1000 47 0
7 1000 J 145 1000 1500 145 0
8 1500 J 198 1500 2000 198 0
9 2000 J 163 2000 2500 163 0
10 2500 J 72 2500 3000 72 0
# ... with 284 more rows
剧情:
p <- ggplot(diamonds2,
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = color)) +
geom_rect() +
theme_bw()
p
我不倾向于 "stretch" 连续轴的一部分,因为它会扭曲解释。但是您可以使用 ggforce 包中的 facet_zoom
进行放大:
library(ggforce)
p + facet_zoom(x = xmin >= 500 & xmax <= 1000)
如果您不希望相邻条在缩放面中可见,请将 x 轴范围扩展参数设置为 0。
p +
facet_zoom(x = xmin >= 500 & xmax <= 1000) +
scale_x_continuous(expand = c(0, 0))
编辑
要在自定义标签的末尾有不同的 binwidth,您可以进行以下更改:
# use even binwidth (500) up to 15000, then jump to the end
x.axis.breaks <- c(0, # binwidth = 500
seq(500, 900, 100), # binwidth = 100
seq(1000, 15000, 500), # binwidth = 500
19000) # everything else
# reduce the largest xmax value in order to have the same bar width
diamonds2 <- diamonds2 %>%
mutate(xmax = ifelse(xmax == max(xmax),
xmin + 500,
xmax))
# define breaks & labels for x-axis
p <- p +
scale_x_continuous(breaks = seq(0, 15000, 5000),
labels = c(seq(0, 10000, 5000),
"15000+"))
这是我想要构建的示例 ggplot。在我的数据中,我有一个问题,即我在直方图的小范围内有很多值。因此,我想让 x 轴不成比例地拉伸(这里在 80,81,82,83,84,85
的值之间)。因此,刻度线将 spaced 在图表上均匀分布,并且刻度线之间的 space 与该图表上值的增量增加不成比例。因此,我还想对直方图的那部分应用不同的 bin 大小(假设 binwidth = 1
)。
library(ggplot2)
set.seed(42)
data <- data.frame(c(rnorm(mean=80,sd=20,30)),seq(1,30,1),
c("A","B","B","A","A","B","B","A","A","A",
"A","B","B","A","A","B","B","A","A","B",
"B","A","A","B","B","A","A","B","B","A"))
colnames(data) <- c("vals","respondent","category")
# Plot the number of vals
ggplot(data,aes(x = vals,fill = category)) +
geom_histogram(position = "stack",binwidth = 5) +
ggtitle("plot")+
#scale_x_continuous(c(40,50,60,70,80,81,82,83,84,85,95,105,115))+
theme_minimal() +
ylab("Number of respondents")+xlab("Number of vals")
您可以自己计算尺寸(宽度/高度),作为一系列堆叠的矩形。
使用钻石数据集进行说明,假设这是我们的原始直方图,我们想要放大 [500, 1000] 价格范围:
ggplot(diamonds,
aes(x = price, fill = color)) +
geom_histogram(binwidth = 500) +
theme_bw()
定义您喜欢的轴中断:
x.axis.breaks <- c(0, # binwidth = 500
seq(500, 900, 100), # binwidth = 100
seq(1000, 19000, 500)) # binwidth = 500
> x.axis.breaks
[1] 0 500 600 700 800 900 1000 1500 2000 2500 3000 3500 4000 4500
[15] 5000 5500 6000 6500 7000 7500 8000 8500 9000 9500 10000 10500 11000 11500
[29] 12000 12500 13000 13500 14000 14500 15000 15500 16000 16500 17000 17500 18000 18500
[43] 19000
计算每个区间的 xmin / xmax / ymin / ymax:
library(dplyr)
diamonds2 <- diamonds %>%
mutate(price.cut = cut(price,
breaks = x.axis.breaks)) %>%
count(price.cut, color) %>%
mutate(xmin = x.axis.breaks[as.integer(price.cut)],
xmax = x.axis.breaks[as.integer(price.cut) + 1]) %>%
group_by(price.cut) %>%
arrange(desc(color)) %>%
mutate(ymax = cumsum(n)) %>%
mutate(ymin = lag(ymax)) %>%
mutate(ymin = ifelse(is.na(ymin), 0, ymin)) %>%
ungroup()
> diamonds2
# A tibble: 294 x 7
price.cut color n xmin xmax ymax ymin
<fct> <ord> <int> <dbl> <dbl> <int> <dbl>
1 0 J 158 0 500 158 0
2 500 J 80 500 600 80 0
3 600 J 84 600 700 84 0
4 700 J 51 700 800 51 0
5 800 J 43 800 900 43 0
6 900 J 47 900 1000 47 0
7 1000 J 145 1000 1500 145 0
8 1500 J 198 1500 2000 198 0
9 2000 J 163 2000 2500 163 0
10 2500 J 72 2500 3000 72 0
# ... with 284 more rows
剧情:
p <- ggplot(diamonds2,
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = color)) +
geom_rect() +
theme_bw()
p
我不倾向于 "stretch" 连续轴的一部分,因为它会扭曲解释。但是您可以使用 ggforce 包中的 facet_zoom
进行放大:
library(ggforce)
p + facet_zoom(x = xmin >= 500 & xmax <= 1000)
如果您不希望相邻条在缩放面中可见,请将 x 轴范围扩展参数设置为 0。
p +
facet_zoom(x = xmin >= 500 & xmax <= 1000) +
scale_x_continuous(expand = c(0, 0))
编辑
要在自定义标签的末尾有不同的 binwidth,您可以进行以下更改:
# use even binwidth (500) up to 15000, then jump to the end
x.axis.breaks <- c(0, # binwidth = 500
seq(500, 900, 100), # binwidth = 100
seq(1000, 15000, 500), # binwidth = 500
19000) # everything else
# reduce the largest xmax value in order to have the same bar width
diamonds2 <- diamonds2 %>%
mutate(xmax = ifelse(xmax == max(xmax),
xmin + 500,
xmax))
# define breaks & labels for x-axis
p <- p +
scale_x_continuous(breaks = seq(0, 15000, 5000),
labels = c(seq(0, 10000, 5000),
"15000+"))