如何从 r 中的 ggplot 中提取密度值
How to extract the density value from ggplot in r
如何提取每个密度图的值矩阵?
比如我感兴趣的是,当weight = 71时,橘子、苹果、梨、香蕉的密度是多少?
下面是最小的例子:
library(ggplot2)
set.seed(1234)
df = data.frame(
fruits = factor(rep(c("Orange", "Apple", "Pears", "Banana"), each = 200)),
weight = round(c(rnorm(200, mean = 55, sd=5),
rnorm(200, mean=65, sd=5),
rnorm(200, mean=70, sd=5),
rnorm(200, mean=75, sd=5)))
)
dim(df) [1] 800 2
ggplot(df, aes(x = weight)) +
geom_density() +
facet_grid(fruits ~ ., scales = "free", space = "free")
我认为 ggplot2
中的基础 density
类似于 stats::density
,因此我们可以使用它来构建相同的信息
df %>%
group_by(fruits) %>%
nest() %>%
ungroup() %>%
mutate(density = data %>% map(. %>%
`[[`("weight") %>%
density.default)) %>%
hoist(density, "x", "y") %>%
select(-density, -data) %>%
unnest(c(x,y)) %>%
group_by(fruits) %>%
slice_min(abs(x - 71), n = 1) %>%
ungroup() %>%
identity()
# A tibble: 5 x 3
fruits x y
<fct> <dbl> <dbl>
1 Apple 71.0 0.0409
2 Banana 71.0 0.0574
3 Orange 71.0 0.00131
4 Pears 71.0 0.0747
5 Pears 71.0 0.0747
如果你觉得还不够,那这就是剧情摘录,
您指定的:
gg_density_plot %>%
ggplot_build() %>%
`[[`("data") %>%
`[[`(1) %>%
as_tibble() %>%
# glimpse() %>%
# count(PANEL) # panel is fruit
group_by(PANEL) %>%
slice_min(abs(x - 71), n = 1) %>%
ungroup() %>%
select(PANEL, x, y, density)
# A tibble: 4 x 4
PANEL x y density
<fct> <dbl> <dbl> <dbl>
1 1 71.0 0.0410 0.0410
2 2 71.0 0.0568 0.0568
3 3 71.0 0.00135 0.00135
4 4 71.0 0.0747 0.0747
将绘图保存在变量中,使用 ggplot_build
构建数据结构并按面板拆分数据。然后用 approx
进行插值以获得新值。
g <- ggplot(df, aes(x = weight)) +
geom_density() +
facet_grid(fruits ~ ., scales = "free", space = "free")
p <- ggplot_build(g)
# These are the columns of interest
p$data[[1]]$x
p$data[[1]]$density
p$data[[1]]$PANEL
按面板拆分列表成员 p$data[[1]]
,但仅保留 x
和 density
值。然后循环遍历拆分数据以按水果组进行插值。
sp <- split(p$data[[1]][c("x", "density")], p$data[[1]]$PANEL)
new_weight <- 71
sapply(sp, \(DF){
with(DF, approx(x, density, xout = new_weight))
})
# 1 2 3 4
#x 71 71 71 71
#y 0.04066888 0.05716947 0.001319164 0.07467761
或者,之前不拆分数据,使用 by
。
b <- by(p$data[[1]][c("x", "density")], p$data[[1]]$PANEL, \(DF){
with(DF, approx(x, density, xout = new_weight))
})
do.call(rbind, lapply(b, as.data.frame))
# x y
#1 71 0.040668880
#2 71 0.057169474
#3 71 0.001319164
#4 71 0.074677607
如何提取每个密度图的值矩阵?
比如我感兴趣的是,当weight = 71时,橘子、苹果、梨、香蕉的密度是多少?
下面是最小的例子:
library(ggplot2)
set.seed(1234)
df = data.frame(
fruits = factor(rep(c("Orange", "Apple", "Pears", "Banana"), each = 200)),
weight = round(c(rnorm(200, mean = 55, sd=5),
rnorm(200, mean=65, sd=5),
rnorm(200, mean=70, sd=5),
rnorm(200, mean=75, sd=5)))
)
dim(df) [1] 800 2
ggplot(df, aes(x = weight)) +
geom_density() +
facet_grid(fruits ~ ., scales = "free", space = "free")
我认为 ggplot2
中的基础 density
类似于 stats::density
,因此我们可以使用它来构建相同的信息
df %>%
group_by(fruits) %>%
nest() %>%
ungroup() %>%
mutate(density = data %>% map(. %>%
`[[`("weight") %>%
density.default)) %>%
hoist(density, "x", "y") %>%
select(-density, -data) %>%
unnest(c(x,y)) %>%
group_by(fruits) %>%
slice_min(abs(x - 71), n = 1) %>%
ungroup() %>%
identity()
# A tibble: 5 x 3
fruits x y
<fct> <dbl> <dbl>
1 Apple 71.0 0.0409
2 Banana 71.0 0.0574
3 Orange 71.0 0.00131
4 Pears 71.0 0.0747
5 Pears 71.0 0.0747
如果你觉得还不够,那这就是剧情摘录, 您指定的:
gg_density_plot %>%
ggplot_build() %>%
`[[`("data") %>%
`[[`(1) %>%
as_tibble() %>%
# glimpse() %>%
# count(PANEL) # panel is fruit
group_by(PANEL) %>%
slice_min(abs(x - 71), n = 1) %>%
ungroup() %>%
select(PANEL, x, y, density)
# A tibble: 4 x 4
PANEL x y density
<fct> <dbl> <dbl> <dbl>
1 1 71.0 0.0410 0.0410
2 2 71.0 0.0568 0.0568
3 3 71.0 0.00135 0.00135
4 4 71.0 0.0747 0.0747
将绘图保存在变量中,使用 ggplot_build
构建数据结构并按面板拆分数据。然后用 approx
进行插值以获得新值。
g <- ggplot(df, aes(x = weight)) +
geom_density() +
facet_grid(fruits ~ ., scales = "free", space = "free")
p <- ggplot_build(g)
# These are the columns of interest
p$data[[1]]$x
p$data[[1]]$density
p$data[[1]]$PANEL
按面板拆分列表成员 p$data[[1]]
,但仅保留 x
和 density
值。然后循环遍历拆分数据以按水果组进行插值。
sp <- split(p$data[[1]][c("x", "density")], p$data[[1]]$PANEL)
new_weight <- 71
sapply(sp, \(DF){
with(DF, approx(x, density, xout = new_weight))
})
# 1 2 3 4
#x 71 71 71 71
#y 0.04066888 0.05716947 0.001319164 0.07467761
或者,之前不拆分数据,使用 by
。
b <- by(p$data[[1]][c("x", "density")], p$data[[1]]$PANEL, \(DF){
with(DF, approx(x, density, xout = new_weight))
})
do.call(rbind, lapply(b, as.data.frame))
# x y
#1 71 0.040668880
#2 71 0.057169474
#3 71 0.001319164
#4 71 0.074677607