如何为几个正态分布下方的区域着色?
How can I color the area below several normal distributions?
经过多次尝试,我终于得到了一个具有多个正态分布的独特图形。在这些分布中,1sd 也被绘制为垂直矩形。我使用的代码是这个:
x1<-50:200
a1<-dnorm(x1,134,20)
b1<-dnorm(x1,130,14)
c1<-dnorm(x1,132,12)
d1<-dnorm(x1,105,10)
scale<-range(pretty(range(a1,b1,c1,d1)))
remap<-function(x, to, from=range(x)) {
(x-from[1]) / (from[2]-from[1]) * (to[2]-to[1]) + to[1]
}
plot(NA, NA, xaxt="n", yaxt="n", type="n", xlim=scale, ylim=scale, xlab="Variable X", ylab="")
rect(remap(134-20, scale, range(x1)), scale[1],
remap(134+20, scale, range(x1)), scale[2], col="#ff606025")
rect(remap(130-14, scale, range(x1)), scale[1],
remap(130+14, scale, range(x1)), scale[2], col="#005ccd40")
rect(remap(132-12, scale, range(x1)), scale[1],
remap(132+12, scale, range(x1)), scale[2], col="#005ccd40")
rect(remap(105-10, scale, range(x1)), scale[1],
remap(105+10, scale, range(x1)), scale[2], col="#005ccd40")
#R1429
rect(remap(183, scale, range(x1)), scale[1],
remap(183, scale, range(x1)), scale[2], col="darkblue", lwd=3,lty=3)
lines(remap(x1,scale), a1, col="#ff6060", lwd=3)
lines(remap(x1,scale), b1, col="#005ccd", lwd=3, lty=3)
lines(remap(x1,scale), c1, col="#005ccd", lwd=3)
lines(remap(x1,scale), d1, col="#005ccd", lwd=3,lty=3)
axis(2);
axis(1, at=remap(pretty(x1), scale), pretty(x1))
我得到了运行代码后的下一张图:
但我的问题是:如何只为每个正态分布下方的区域着色,而不是对垂直矩形着色?
解释起来会容易得多。
提前致谢!
您可以使用 polygon
.
填充下曲线
## Some distributions
x1 <- 50:200
means <- c(134, 130, 132, 105)
sds <- c(20, 14, 12, 10)
dists <- lapply(seq_along(means), function(i) dnorm(x1, means[i], sds[i]))
## Some colors
cols <- colorRampPalette(c("red", "blue"))(length(dists))
## Blank plot
plot(c(x1[1], x1[length(x1)]), c(min(unlist(dists)), max(unlist(dists))),
type="n", xlab="X", ylab="Density")
## Add polygons
for (i in seq_along(dists))
polygon(c(x1, rev(x1)),
c(numeric(length(x1)), rev(dists[[i]])),
col=cols[i],
density=40)
编辑:对于 1sd 以内的多边形
xs <- sapply(seq_along(dists), function(i) # get supports on x1
do.call(`:`, as.list(which(x1 %in% (means[i] + c(-1,1)*sds[i])))))
plot(range(x1), range(unlist(dists)), type="n", xlab="X", ylab="Density")
for (i in seq_along(dists)) {
x <- x1[xs[[i]]]
polygon(c(x, rev(x)),
c(numeric(length(x)), rev(dists[[i]][xs[[i]]])),
col=cols[i],
density=40)
points(x1, dists[[i]], type="l", lty=2, col=cols[i])
}
这是一种使用 Hadley Wickham 的一些软件包的方法:
library("dplyr")
library("ggplot2")
library("tidyr")
data.frame(x = 50:200) %>%
mutate(a = dnorm(x,134,20),
b = dnorm(x,130,14),
c = dnorm(x,132,12),
d = dnorm(x,105,10)) %>%
gather(group, y, -x) %>%
ggplot(aes(x, y, fill = group)) %>%
+ geom_area(alpha = 0.3, position = "identity") %>%
+ geom_line() %>%
print
这里是一个只填满1个SD的版本:
data.frame(group = letters[1:4],
m = c(130, 134, 132, 105),
s = c(20, 14, 12, 10)
) %>%
group_by(group) %>%
do(data_frame(group = .$group,
x = 50:200,
y = dnorm(x, .$m, .$s),
withinSd = abs(x - .$m) <= .$s)
) %>% {
ggplot(., aes(x = x, y = y, colour = group)) +
geom_line() +
geom_area(aes(fill = group), filter(., withinSd),
position = "identity", alpha = 0.3) +
guides(colour = "none")
}
如果您希望所有图表的高度都相同,您可以添加一些额外的 dplyr
魔法:
data.frame(group = letters[1:4],
m = c(130, 134, 132, 105),
s = c(20, 14, 12, 10)
) %>%
group_by(group) %>%
do(data_frame(group = .$group,
x = 50:200,
y = dnorm(x, .$m, .$s),
withinSd = abs(x - .$m) <= .$s)
) %>%
group_by(group) %>%
mutate(y = y / max(y)) %>%
{
ggplot(., aes(x = x, y = y, colour = group)) +
geom_line() +
geom_area(aes(fill = group), filter(., withinSd),
position = "identity", alpha = 0.3) +
guides(colour = "none")
}
这是另一个使用 base R 的版本。这个版本使用 lines()
中的 type='h'
选项来绘制大量垂直线,多到最终会遮蔽该区域。请注意,这需要增加 x1
中的样本点数量(尝试将 x1
改回 50:200
以查看会发生什么)。
x1 <- seq(50,200,length=1000)
a1 <- dnorm(x1,134,20)
b1 <- dnorm(x1,130,14)
c1 <- dnorm(x1,132,12)
d1 <- dnorm(x1,105,10)
dists <- list(a1,b1,c1,d1)
# specify color names then convert them to RGB+alpha values
col <- c("red","green","blue","yellow")
col.rgba <- rgb(t(col2rgb(col))/255, alpha=0.2)
plot(NA, NA, xlim=range(x1), ylim=range(unlist(dists)), xlab="Variable X", ylab="")
# loop through each distribution
for (i in 1:length(dists)) {
lines(x1, dists[[i]], type='h', lwd=2, col=col.rgba[i]) # add shaded region
lines(x1, dists[[i]], type='l') # add solid line at top
}
这是输出:
这是使用 ggvis
的另一个版本:
library(dplyr)
library(ggvis)
## -- data generation copied from @NickK -- ##
data.frame(group = letters[1:4],
m = c(130, 134, 132, 105),
s = c(20, 14, 12, 10)) %>%
group_by(group) %>%
do(data_frame(group = .$group,
x = 50:200,
y = dnorm(x, .$m, .$s),
withinSd = abs(x - .$m) <= .$s)) %>%
## ---------------------------------------- ##
mutate(dash = ifelse(grepl("a|d", group), 5, 0),
color = ifelse(grepl("a|c|d", group), "blue", "red")) %>%
ggvis() %>%
layer_paths(~x, ~y, stroke := ~color, strokeDash := ~dash) %>%
filter(withinSd) %>%
layer_ribbons(~x, ~y, y2 = ~y-y, fill := ~color, fillOpacity := 0.2) %>%
hide_legend("fill") %>%
add_axis("y", title_offset = 50)
经过多次尝试,我终于得到了一个具有多个正态分布的独特图形。在这些分布中,1sd 也被绘制为垂直矩形。我使用的代码是这个:
x1<-50:200
a1<-dnorm(x1,134,20)
b1<-dnorm(x1,130,14)
c1<-dnorm(x1,132,12)
d1<-dnorm(x1,105,10)
scale<-range(pretty(range(a1,b1,c1,d1)))
remap<-function(x, to, from=range(x)) {
(x-from[1]) / (from[2]-from[1]) * (to[2]-to[1]) + to[1]
}
plot(NA, NA, xaxt="n", yaxt="n", type="n", xlim=scale, ylim=scale, xlab="Variable X", ylab="")
rect(remap(134-20, scale, range(x1)), scale[1],
remap(134+20, scale, range(x1)), scale[2], col="#ff606025")
rect(remap(130-14, scale, range(x1)), scale[1],
remap(130+14, scale, range(x1)), scale[2], col="#005ccd40")
rect(remap(132-12, scale, range(x1)), scale[1],
remap(132+12, scale, range(x1)), scale[2], col="#005ccd40")
rect(remap(105-10, scale, range(x1)), scale[1],
remap(105+10, scale, range(x1)), scale[2], col="#005ccd40")
#R1429
rect(remap(183, scale, range(x1)), scale[1],
remap(183, scale, range(x1)), scale[2], col="darkblue", lwd=3,lty=3)
lines(remap(x1,scale), a1, col="#ff6060", lwd=3)
lines(remap(x1,scale), b1, col="#005ccd", lwd=3, lty=3)
lines(remap(x1,scale), c1, col="#005ccd", lwd=3)
lines(remap(x1,scale), d1, col="#005ccd", lwd=3,lty=3)
axis(2);
axis(1, at=remap(pretty(x1), scale), pretty(x1))
我得到了运行代码后的下一张图:
但我的问题是:如何只为每个正态分布下方的区域着色,而不是对垂直矩形着色?
解释起来会容易得多。
提前致谢!
您可以使用 polygon
.
## Some distributions
x1 <- 50:200
means <- c(134, 130, 132, 105)
sds <- c(20, 14, 12, 10)
dists <- lapply(seq_along(means), function(i) dnorm(x1, means[i], sds[i]))
## Some colors
cols <- colorRampPalette(c("red", "blue"))(length(dists))
## Blank plot
plot(c(x1[1], x1[length(x1)]), c(min(unlist(dists)), max(unlist(dists))),
type="n", xlab="X", ylab="Density")
## Add polygons
for (i in seq_along(dists))
polygon(c(x1, rev(x1)),
c(numeric(length(x1)), rev(dists[[i]])),
col=cols[i],
density=40)
编辑:对于 1sd 以内的多边形
xs <- sapply(seq_along(dists), function(i) # get supports on x1
do.call(`:`, as.list(which(x1 %in% (means[i] + c(-1,1)*sds[i])))))
plot(range(x1), range(unlist(dists)), type="n", xlab="X", ylab="Density")
for (i in seq_along(dists)) {
x <- x1[xs[[i]]]
polygon(c(x, rev(x)),
c(numeric(length(x)), rev(dists[[i]][xs[[i]]])),
col=cols[i],
density=40)
points(x1, dists[[i]], type="l", lty=2, col=cols[i])
}
这是一种使用 Hadley Wickham 的一些软件包的方法:
library("dplyr")
library("ggplot2")
library("tidyr")
data.frame(x = 50:200) %>%
mutate(a = dnorm(x,134,20),
b = dnorm(x,130,14),
c = dnorm(x,132,12),
d = dnorm(x,105,10)) %>%
gather(group, y, -x) %>%
ggplot(aes(x, y, fill = group)) %>%
+ geom_area(alpha = 0.3, position = "identity") %>%
+ geom_line() %>%
print
这里是一个只填满1个SD的版本:
data.frame(group = letters[1:4],
m = c(130, 134, 132, 105),
s = c(20, 14, 12, 10)
) %>%
group_by(group) %>%
do(data_frame(group = .$group,
x = 50:200,
y = dnorm(x, .$m, .$s),
withinSd = abs(x - .$m) <= .$s)
) %>% {
ggplot(., aes(x = x, y = y, colour = group)) +
geom_line() +
geom_area(aes(fill = group), filter(., withinSd),
position = "identity", alpha = 0.3) +
guides(colour = "none")
}
如果您希望所有图表的高度都相同,您可以添加一些额外的 dplyr
魔法:
data.frame(group = letters[1:4],
m = c(130, 134, 132, 105),
s = c(20, 14, 12, 10)
) %>%
group_by(group) %>%
do(data_frame(group = .$group,
x = 50:200,
y = dnorm(x, .$m, .$s),
withinSd = abs(x - .$m) <= .$s)
) %>%
group_by(group) %>%
mutate(y = y / max(y)) %>%
{
ggplot(., aes(x = x, y = y, colour = group)) +
geom_line() +
geom_area(aes(fill = group), filter(., withinSd),
position = "identity", alpha = 0.3) +
guides(colour = "none")
}
这是另一个使用 base R 的版本。这个版本使用 lines()
中的 type='h'
选项来绘制大量垂直线,多到最终会遮蔽该区域。请注意,这需要增加 x1
中的样本点数量(尝试将 x1
改回 50:200
以查看会发生什么)。
x1 <- seq(50,200,length=1000)
a1 <- dnorm(x1,134,20)
b1 <- dnorm(x1,130,14)
c1 <- dnorm(x1,132,12)
d1 <- dnorm(x1,105,10)
dists <- list(a1,b1,c1,d1)
# specify color names then convert them to RGB+alpha values
col <- c("red","green","blue","yellow")
col.rgba <- rgb(t(col2rgb(col))/255, alpha=0.2)
plot(NA, NA, xlim=range(x1), ylim=range(unlist(dists)), xlab="Variable X", ylab="")
# loop through each distribution
for (i in 1:length(dists)) {
lines(x1, dists[[i]], type='h', lwd=2, col=col.rgba[i]) # add shaded region
lines(x1, dists[[i]], type='l') # add solid line at top
}
这是输出:
这是使用 ggvis
的另一个版本:
library(dplyr)
library(ggvis)
## -- data generation copied from @NickK -- ##
data.frame(group = letters[1:4],
m = c(130, 134, 132, 105),
s = c(20, 14, 12, 10)) %>%
group_by(group) %>%
do(data_frame(group = .$group,
x = 50:200,
y = dnorm(x, .$m, .$s),
withinSd = abs(x - .$m) <= .$s)) %>%
## ---------------------------------------- ##
mutate(dash = ifelse(grepl("a|d", group), 5, 0),
color = ifelse(grepl("a|c|d", group), "blue", "red")) %>%
ggvis() %>%
layer_paths(~x, ~y, stroke := ~color, strokeDash := ~dash) %>%
filter(withinSd) %>%
layer_ribbons(~x, ~y, y2 = ~y-y, fill := ~color, fillOpacity := 0.2) %>%
hide_legend("fill") %>%
add_axis("y", title_offset = 50)