计算一个点在组中的密度
calculate density of one point in groups
我正在绘制一些密度曲线,我想在每组的平均值处添加一个点。但是,我想沿着密度曲线的顶部绘制这些点,而不是在 0 处。有没有办法得出组内平均点的密度值?代码如下:
# make df
df<- data.frame(group=c("a","b",'c'),
value=rnorm(
3000,
mean=c(1,2,3),
sd=c(1,1.5,1)
))
library(tidyverse)
library(ggridges)
library(ggdist)
方式 1:来自 ggridges ppackage 的密度脊线
df %>%
# calculate mean density per group to use later
group_by(group)%>%
mutate(mean_value=mean(value)) %>%
ggplot()+
aes(x=value,y=group)+
geom_density_ridges()+
# could do with stat summary - blue points
stat_summary(
orientation = "y",
fun = mean,
geom = "point",
color="blue"
)+
# or could do with geom_point using precalculated value (red points)
# nudged so we can see both.
geom_point(aes(x=mean_value,y=group),
color="red",
position = position_nudge(x=.1)
)
方式 2:stat_halfeye 来自 ggdist 包
df %>%
group_by(group)%>%
mutate(mean_value=mean(value)) %>%
# mutate(mean_density = density(mean_value,value))
ggplot()+
aes(x=value,y=group)+
stat_halfeye()+
# could do with stat summary
stat_summary(
orientation = "y",
fun = mean,
geom = "point",
color="blue",
alpha=.8
)+
# or could do with geom_point using precalculated value
# nudged so we can see both.
geom_point(aes(x=mean_value,y=group),
color="red",
position = position_nudge(x=.1)
)
期望输出:这些蓝色或红色点位于密度曲线的顶部。所以我需要一个类似“组 + 密度值”的 y 美学。
宁愿使用方法 2 (ggdist) 而不是 geom_density 山脊
谢谢
我不确定是否有办法在 ggplot geom/stat 函数中计算密度曲线的平均值的高度,所以我创建了几个辅助函数来做到这一点。
dens_at_mean
计算密度曲线在数据平均值处的高度。 get_mean_coords
按组运行 dens_at_mean
,然后缩放高度值以匹配由 stat_halfeye
和 returns 生成的 y 值,一个可以传递给 [=16= 的数据框].
# Reproducible data
set.seed(394)
df<- data.frame(group=c("a","b",'c'),
value=rnorm(
3000,
mean=c(1,2,3),
sd=c(1,1.5,1)
))
# Function to get height of density curve at mean value
dens_at_mean = function(x) {
d = density(x)
mean.x = mean(x)
data.frame(mean.x = mean.x,
max.y = max(d$y),
mean.y = approx(d$x, d$y, xout=mean.x)$y)
}
# Function to return data frame with properly scaled heights
# to plot mean points
get_mean_coords = function(data, value.var, group.var) {
data %>%
group_by({{group.var}}) %>%
summarise(vals = list(dens_at_mean({{value.var}}))) %>%
ungroup %>%
unnest_wider(vals) %>%
# Scale y-value to work properly with stat_halfeye
mutate(mean.y = (mean.y/max(max.y) * 0.9 + 1:n())) %>%
select(-max.y)
}
df %>%
ggplot()+
aes(x=value, y=group)+
stat_halfeye() +
geom_point(data=get_mean_coords(df, value, group),
aes(x=mean.x, y=mean.y),
color="red", size=2) +
theme_bw() +
scale_y_discrete(expand=c(0.08,0.05))
我正在绘制一些密度曲线,我想在每组的平均值处添加一个点。但是,我想沿着密度曲线的顶部绘制这些点,而不是在 0 处。有没有办法得出组内平均点的密度值?代码如下:
# make df
df<- data.frame(group=c("a","b",'c'),
value=rnorm(
3000,
mean=c(1,2,3),
sd=c(1,1.5,1)
))
library(tidyverse)
library(ggridges)
library(ggdist)
方式 1:来自 ggridges ppackage 的密度脊线
df %>%
# calculate mean density per group to use later
group_by(group)%>%
mutate(mean_value=mean(value)) %>%
ggplot()+
aes(x=value,y=group)+
geom_density_ridges()+
# could do with stat summary - blue points
stat_summary(
orientation = "y",
fun = mean,
geom = "point",
color="blue"
)+
# or could do with geom_point using precalculated value (red points)
# nudged so we can see both.
geom_point(aes(x=mean_value,y=group),
color="red",
position = position_nudge(x=.1)
)
方式 2:stat_halfeye 来自 ggdist 包
df %>%
group_by(group)%>%
mutate(mean_value=mean(value)) %>%
# mutate(mean_density = density(mean_value,value))
ggplot()+
aes(x=value,y=group)+
stat_halfeye()+
# could do with stat summary
stat_summary(
orientation = "y",
fun = mean,
geom = "point",
color="blue",
alpha=.8
)+
# or could do with geom_point using precalculated value
# nudged so we can see both.
geom_point(aes(x=mean_value,y=group),
color="red",
position = position_nudge(x=.1)
)
期望输出:这些蓝色或红色点位于密度曲线的顶部。所以我需要一个类似“组 + 密度值”的 y 美学。
宁愿使用方法 2 (ggdist) 而不是 geom_density 山脊
谢谢
我不确定是否有办法在 ggplot geom/stat 函数中计算密度曲线的平均值的高度,所以我创建了几个辅助函数来做到这一点。
dens_at_mean
计算密度曲线在数据平均值处的高度。 get_mean_coords
按组运行 dens_at_mean
,然后缩放高度值以匹配由 stat_halfeye
和 returns 生成的 y 值,一个可以传递给 [=16= 的数据框].
# Reproducible data
set.seed(394)
df<- data.frame(group=c("a","b",'c'),
value=rnorm(
3000,
mean=c(1,2,3),
sd=c(1,1.5,1)
))
# Function to get height of density curve at mean value
dens_at_mean = function(x) {
d = density(x)
mean.x = mean(x)
data.frame(mean.x = mean.x,
max.y = max(d$y),
mean.y = approx(d$x, d$y, xout=mean.x)$y)
}
# Function to return data frame with properly scaled heights
# to plot mean points
get_mean_coords = function(data, value.var, group.var) {
data %>%
group_by({{group.var}}) %>%
summarise(vals = list(dens_at_mean({{value.var}}))) %>%
ungroup %>%
unnest_wider(vals) %>%
# Scale y-value to work properly with stat_halfeye
mutate(mean.y = (mean.y/max(max.y) * 0.9 + 1:n())) %>%
select(-max.y)
}
df %>%
ggplot()+
aes(x=value, y=group)+
stat_halfeye() +
geom_point(data=get_mean_coords(df, value, group),
aes(x=mean.x, y=mean.y),
color="red", size=2) +
theme_bw() +
scale_y_discrete(expand=c(0.08,0.05))