用顶部的点拆分小提琴图以指示信息

Split violin plot with points on top to indicate info

这是 here 的后续 post 和

我已经成功地为需要比较的数据(两个中值估计密度,两种情况)实现了 split violin ggplot2。现在,因为我想添加一些置信区间。我正在遵循上面链接中的代码 posted:

编辑:一个可重现的例子

tmp <- rnorm(1000,0,1)
tmp.2 <- rnorm(1000,0,1)
x.1 <- density(tmp)
y.1 <- density(tmp.2)  

在这里,我正在制作密度,提取 (x,y) 对。然后我得到分位数,

# Make densities
densities <- as.data.frame(c(x.1$x,y.1$x))
colnames(densities) <- "loc"
densities$dens <- c(x.1$y,y.1$y)
densities$drop_case <- c(rep("B",512),rep("S",512))
densities$dens <- ifelse(densities$drop_case=="B",densities$dens*-1,densities$dens)
densities$dens <- ifelse(densities$drop_case=="S",densities$dens*1,densities$dens)

conf <- as.data.frame(c(quantile(tmp,c(0.025,0.975))[1],quantile(tmp,c(0.025,0.975))[2],quantile(tmp.2,c(0.025,0.975))[1],quantile(tmp.2,c(0.025,0.975))[2]))
colnames(conf) <- "intervals"
conf$drop_case <- c(rep("B",2),rep("S",2))
conf$length <- rep(1000,4)

现在我正在尝试提取密度内的值,如链接 posts

中所述

查找密度数据点

val.tmp <- rep(0,4)
val.tmp.2 <- rep(0,4)
for (i in 1:4) {
x.here <- densities$loc
y.here <- densities$dens
your.number<- conf$intervals[i]
pos.tmp <- which(abs(x.here-your.number)==min(abs(x.here-your.number)))
val.tmp[i] <- x.here[pos.tmp]
val.tmp.2[i] <- y.here[pos.tmp]
}
conf$positions <- val.tmp
conf$length <- val.tmp.2

conf$length <- ifelse(conf$drop_case=="B",conf$length*-1,conf$length)
conf$length <- ifelse(conf$drop_case=="S",conf$length*1,conf$length)

ggplot(densities,aes(dens, loc, fill = factor(drop_case)))+
geom_polygon()+
scale_x_continuous(breaks = 0, name = info$Name)+
ylab('Estimator Density') +
theme(axis.title.x = element_blank())+
geom_point(data = conf, aes(x = positions, y = length, fill = factor(drop_case), group = factor(drop_case))
         ,shape = 21, colour = "black", show.legend = FALSE)

然后不幸的是我面临以下问题,这些点没有映射到密度上而是映射到平面上。

代码中有一堆小错误。首先,在 for 循环中,您不能将 x.herey.here 设置为所有密度和位置值,因为这包括这两个组。其次,由于 densities 中的符号已经更改,因此之后无需使用那些 ifelse 语句。第三,无论如何你只需要顶部 ifelse ,因为底部的根本什么都不做。最后,您在 geom_point 中的 xy 映射是错误的!

还有很多其他的东西可以更改以使代码更易于理解和更漂亮,但我的时间有限,所以我将保持原样。

完整调整后的代码如下:

tmp <- rnorm(1000,0,1)
tmp.2 <- rnorm(1000,0,1)
x.1 <- density(tmp)
y.1 <- density(tmp.2)  

# Make densities
densities <- as.data.frame(c(x.1$x,y.1$x))
colnames(densities) <- "loc"
densities$dens <- c(x.1$y,y.1$y)
densities$drop_case <- c(rep("B",512),rep("S",512))
densities$dens <- ifelse(densities$drop_case=="B",densities$dens*-1,densities$dens)

conf <- as.data.frame(c(quantile(tmp,c(0.025,0.975)), quantile(tmp.2,c(0.025,0.975))))
colnames(conf) <- "intervals"
conf$drop_case <- c(rep("B",2),rep("S",2))
conf$length <- rep(1000,4)

val.tmp <- rep(0,4)
val.tmp.2 <- rep(0,4)
for (i in 1:4) {
  x.here <- densities$loc[densities$drop_case == conf$drop_case[i]]
  y.here <- densities$dens[densities$drop_case == conf$drop_case[i]]
  your.number<- conf$intervals[i]
  pos.tmp <- which(abs(x.here-your.number)==min(abs(x.here-your.number)))
  val.tmp[i] <- x.here[pos.tmp]
  val.tmp.2[i] <- y.here[pos.tmp]
}
conf$positions <- val.tmp
conf$length <- val.tmp.2

ggplot(densities, aes(dens, loc, fill = drop_case)) +
  geom_polygon()+
  ylab('Estimator Density') +
  theme(axis.title.x = element_blank())+
  geom_point(data = conf, aes(x = length, y = positions, fill = drop_case),
             shape = 21, colour = "black", show.legend = FALSE)

这导致:

我个人更喜欢带线段的图:

ggplot(densities, aes(dens, loc, fill = factor(drop_case)))+
  geom_polygon()+
  ylab('Estimator Density') +
  theme(axis.title.x = element_blank())+
  geom_segment(data = conf, aes(x = length, xend = 0, y = positions, yend = positions))