计算一个点在组中的密度

calculate density of one point in groups

我正在绘制一些密度曲线,我想在每组的平均值处添加一个点。但是,我想沿着密度曲线的顶部绘制这些点,而不是在 0 处。有没有办法得出组内平均点的密度值?代码如下:

# make df
df<- data.frame(group=c("a","b",'c'),
           value=rnorm(
             3000,
             mean=c(1,2,3),
             sd=c(1,1.5,1)
           )) 
library(tidyverse)
library(ggridges)
library(ggdist)

方式 1:来自 ggridges ppackage 的密度脊线

df %>%

  # calculate mean density per group to use later
  group_by(group)%>%
  mutate(mean_value=mean(value)) %>%
    
  
  ggplot()+
  aes(x=value,y=group)+
  geom_density_ridges()+
  
  # could do with stat summary - blue points
  stat_summary(
    orientation = "y",
    fun = mean,
    geom = "point", 
    color="blue"
  )+
  
  # or could do with geom_point using precalculated value (red points)
  # nudged so we can see both. 
  geom_point(aes(x=mean_value,y=group),
             color="red",
             position = position_nudge(x=.1)
             )

方式 2:stat_halfeye 来自 ggdist 包

df %>%
  group_by(group)%>%
  mutate(mean_value=mean(value)) %>%
  
  # mutate(mean_density = density(mean_value,value))
  
  
  ggplot()+
  aes(x=value,y=group)+
  stat_halfeye()+
  
  # could do with stat summary
  stat_summary(
    orientation = "y",
    fun = mean,
    geom = "point", 
    color="blue",
    alpha=.8
  )+
  
  # or could do with geom_point using precalculated value
  # nudged so we can see both. 
  geom_point(aes(x=mean_value,y=group),
             color="red",
             position = position_nudge(x=.1)
  )

期望输出:这些蓝色或红色点位于密度曲线的顶部。所以我需要一个类似“组 + 密度值”的 y 美学。

宁愿使用方法 2 (ggdist) 而不是 geom_density 山脊

谢谢

我不确定是否有办法在 ggplot geom/stat 函数中计算密度曲线的平均值的高度,所以我创建了几个辅助函数来做到这一点。

dens_at_mean 计算密度曲线在数据平均值处的高度。 get_mean_coords 按组运行 dens_at_mean,然后缩放高度值以匹配由 stat_halfeye 和 returns 生成的 y 值,一个可以传递给 [=16= 的数据框].

# Reproducible data
set.seed(394)
df<- data.frame(group=c("a","b",'c'),
                value=rnorm(
                  3000,
                  mean=c(1,2,3),
                  sd=c(1,1.5,1)
                )) 

# Function to get height of density curve at mean value
dens_at_mean = function(x) { 
  d = density(x)
  mean.x = mean(x)
  data.frame(mean.x = mean.x,
             max.y = max(d$y),
             mean.y = approx(d$x, d$y, xout=mean.x)$y)
}

# Function to return data frame with properly scaled heights 
#  to plot mean points
get_mean_coords = function(data, value.var, group.var) {

  data %>% 
    group_by({{group.var}}) %>% 
    summarise(vals = list(dens_at_mean({{value.var}}))) %>% 
    ungroup %>% 
    unnest_wider(vals) %>% 
    # Scale y-value to work properly with stat_halfeye
    mutate(mean.y = (mean.y/max(max.y) * 0.9 + 1:n())) %>% 
    select(-max.y)
}

df %>%
  ggplot()+
    aes(x=value, y=group)+
    stat_halfeye() +
    geom_point(data=get_mean_coords(df, value, group), 
               aes(x=mean.x, y=mean.y),
               color="red", size=2) +
    theme_bw() +
    scale_y_discrete(expand=c(0.08,0.05))