ggplot2 彩色直方图
Colored histogram with ggplot2
我有一个包含值及其相关权重的数据框。我想制作一个直方图,这样每个条形的高度对应于该 bin 中的值的数量,条形的颜色对应于它们的总重量。我该怎么做?
示例:
D <- data.frame(
x = c(-0.39, 0.12, 0.94, 1.67, 1.76, 2.44, 3.72, 4.28, 4.92, 5.53, 0.06,
0.48, 1.01, 1.68, 1.80, 3.25, 4.12, 4.60, 5.28, 6.22),
w = c(0.1810479, 0.2209460, 0.2974134, 0.3768152, 0.3871925, 0.4682943,
0.6220371, 0.6838944, 0.7473117, 0.7993555, 0.2159428, 0.2526883,
0.3046069, 0.3779629, 0.3918383, 0.5667588, 0.6667623, 0.7166747,
0.7790540, 0.8480375))
ggplot(D, aes(x)) +
geom_histogram(aes(y=..density..), binwidth=0.5, boundary=0.5)
解决方案
基于 eipi10 的回答,但使用标准函数:
breaks <- seq(-0.5, 6.5, 0.5)
bins <- cut(D$x, breaks)
h <- data.frame(
x = head(breaks, -1) + 0.25,
count = sapply(split(D$x, bins), length),
weight = sapply(split(D$w, bins), sum))
h$density <- h$count / sum(h$count)
ggplot(h) + geom_bar(aes(x, density, fill=weight), stat='identity')
ggplot(D, aes(x)) + geom_histogram(aes(y=..density.., fill=..count..), binwidth=0.5, boundary=0.5)
- "the bar's color (actually:fill) corresponds to their total weight.",你的意思是 "sum"。与
..count..
不同,没有内置 ..sum..
。也许您需要将数据预处理到 bin 中。
另一种选择是预先汇总数据:
library(dplyr)
D_bins = D %>%
mutate(bins = cut(x, seq(-0.5,6.5,0.5), labels=seq(-0.25,6.5,0.5)),
bins = as.numeric(as.character(bins()))) %>%
group_by(bins) %>%
summarise(count_x = n(),
sum_w = sum(w))
ggplot(D_bins) +
geom_bar(aes(bins, count_x, fill=sum_w), colour="white", stat="identity")
您也可以使用两组相对的条,而不是填充美学:
ggplot(D_bins) +
geom_bar(aes(bins, count_x), colour="white", fill="blue", stat="identity") +
geom_bar(aes(bins, -sum_w), colour="white", fill="red", stat="identity") +
scale_x_continuous(breaks=-1:10) +
scale_y_continuous(limits=c(-2,4), breaks=seq(-2,5,1), labels=c(2,1,0:5)) +
labs(y = c("Sum of w Count of x ")) +
coord_flip()
我有一个包含值及其相关权重的数据框。我想制作一个直方图,这样每个条形的高度对应于该 bin 中的值的数量,条形的颜色对应于它们的总重量。我该怎么做?
示例:
D <- data.frame(
x = c(-0.39, 0.12, 0.94, 1.67, 1.76, 2.44, 3.72, 4.28, 4.92, 5.53, 0.06,
0.48, 1.01, 1.68, 1.80, 3.25, 4.12, 4.60, 5.28, 6.22),
w = c(0.1810479, 0.2209460, 0.2974134, 0.3768152, 0.3871925, 0.4682943,
0.6220371, 0.6838944, 0.7473117, 0.7993555, 0.2159428, 0.2526883,
0.3046069, 0.3779629, 0.3918383, 0.5667588, 0.6667623, 0.7166747,
0.7790540, 0.8480375))
ggplot(D, aes(x)) +
geom_histogram(aes(y=..density..), binwidth=0.5, boundary=0.5)
解决方案
基于 eipi10 的回答,但使用标准函数:
breaks <- seq(-0.5, 6.5, 0.5)
bins <- cut(D$x, breaks)
h <- data.frame(
x = head(breaks, -1) + 0.25,
count = sapply(split(D$x, bins), length),
weight = sapply(split(D$w, bins), sum))
h$density <- h$count / sum(h$count)
ggplot(h) + geom_bar(aes(x, density, fill=weight), stat='identity')
ggplot(D, aes(x)) + geom_histogram(aes(y=..density.., fill=..count..), binwidth=0.5, boundary=0.5)
- "the bar's color (actually:fill) corresponds to their total weight.",你的意思是 "sum"。与
..count..
不同,没有内置..sum..
。也许您需要将数据预处理到 bin 中。
另一种选择是预先汇总数据:
library(dplyr)
D_bins = D %>%
mutate(bins = cut(x, seq(-0.5,6.5,0.5), labels=seq(-0.25,6.5,0.5)),
bins = as.numeric(as.character(bins()))) %>%
group_by(bins) %>%
summarise(count_x = n(),
sum_w = sum(w))
ggplot(D_bins) +
geom_bar(aes(bins, count_x, fill=sum_w), colour="white", stat="identity")
您也可以使用两组相对的条,而不是填充美学:
ggplot(D_bins) +
geom_bar(aes(bins, count_x), colour="white", fill="blue", stat="identity") +
geom_bar(aes(bins, -sum_w), colour="white", fill="red", stat="identity") +
scale_x_continuous(breaks=-1:10) +
scale_y_continuous(limits=c(-2,4), breaks=seq(-2,5,1), labels=c(2,1,0:5)) +
labs(y = c("Sum of w Count of x ")) +
coord_flip()