使用ggplot2之前如何计算密度和频率?
How to calculate density and frequency before using ggplot2?
在 之后,我看到了如何制作可点击的直方图,我想知道是否有一种方法可以使用 brushedPoints
来从画笔中获取输出。我看到我需要 x 轴和 y 轴。然而,由于 ggplot2
允许我得到直方图(带有频率轴)和带有密度曲线的直方图(带有密度轴),所以我不知道如何在绘图之前获取这些信息。
有谁知道如何计算密度和频率以便在ggplot2
中绘制直方图? (请注意,我不想使用 ggplot2 特定函数来获取图表,我想要在绘制图表后使用此信息 data.frame)。
我用密度曲线绘制直方图的代码:
library(ggplot2)
library(dplyr)
val1 <- c(2.1490626,3.7928443,2.2035281,1.5927854,3.1399245,2.3967338,3.7915825,4.6691277,3.0727319,2.9230937,2.6239759,3.7664386,4.0160378,1.2500835,4.7648343,0.0000000,5.6740227,2.7510256,3.0709322,2.7998003,4.0809085,2.5178086,5.9713330,2.7779843,3.6724801,4.2648527,3.6841084,2.5597235,3.8477471,2.6587736,2.2742209,4.5862788,6.1989269,4.1167091,3.1769325,4.2404515,5.3627032,4.1576810,4.3387921,1.4024381,0.0000000,4.3999099,3.4381837,4.8269218,2.6308474,5.3481382,4.9549753,4.5389650,1.3002293,2.8648220,2.4015338,2.0962332,2.6774765,3.0581759,2.5786137,5.0539080,3.8545796,4.3429043,4.2233248,2.0434363,4.5980727)
val2 <- c(3.7691229,3.6478055,0.5435826,1.9665861,3.0802654,1.2248374,1.7311236,2.2492826,2.2365337,1.5726119,2.0147144,2.3550348,1.9527204,3.3689502,1.7847986,3.5901329,1.6833872,3.4240479,1.8372175,0.0000000,2.5701453,3.6551315,4.0327091,3.8781182)
df1 <- data.frame(value = val1)
df2 <- data.frame(value = val2)
data <- bind_rows(lst(df1, df2), .id = 'id')
data %>%
ggplot(aes(value)) +
geom_histogram(aes(y=..density.., fill = id), bins=10, col="black", alpha=0.4) +
geom_density(lwd = 1.2, colour = "red", show.legend = FALSE) +
facet_grid(id ~ .) +
scale_x_continuous(breaks=pretty(data$value, n=10)) +
ggtitle("My histogram....") +
guides(fill = guide_legend(title="My legend...")) +
theme(strip.text.x = element_blank(),strip.text.y = element_blank())
我用来绘制频率直方图的代码:
data %>%
ggplot(aes(value)) +
geom_histogram(fill="red", bins=10, col="black", alpha=0.4) +
facet_grid(id ~ .) +
scale_x_continuous(breaks=pretty(data$value, n=10)) +
ggtitle("My histogram....") +
guides(fill = guide_legend(title="My legend...")) +
theme(strip.text.x = element_blank(),strip.text.y = element_blank())
一旦我有了密度和频率列,我将不得不从代码中删除这些参数,但我不知道是否可以使用包含该信息的“y”列。
非常感谢
此致
如果您想从绘图中提取计数/密度信息,layer_data
是您的朋友。
library(ggplot2)
library(dplyr)
p <- data %>%
ggplot(aes(value)) +
geom_histogram(fill="red", bins=10, col="black", alpha=0.4) +
facet_grid(id ~ .) +
scale_x_continuous(breaks=pretty(data$value, n=10)) +
ggtitle("My histogram....") +
guides(fill = guide_legend(title="My legend...")) +
theme(strip.text.x = element_blank(),strip.text.y = element_blank())
head(layer_data(p))
#> y count x xmin xmax density ncount ndensity
#> 1 2 2 0.0000000 -0.3443848 0.3443848 0.04760210 0.1333333 0.1333333
#> 2 0 0 0.6887697 0.3443848 1.0331545 0.00000000 0.0000000 0.0000000
#> 3 4 4 1.3775393 1.0331545 1.7219241 0.09520421 0.2666667 0.2666667
#> 4 7 7 2.0663090 1.7219241 2.4106938 0.16660737 0.4666667 0.4666667
#> 5 15 15 2.7550786 2.4106938 3.0994635 0.35701579 1.0000000 1.0000000
#> 6 6 6 3.4438483 3.0994635 3.7882331 0.14280631 0.4000000 0.4000000
#> flipped_aes PANEL group ymin ymax colour fill size linetype alpha
#> 1 FALSE 1 -1 0 2 black red 0.5 1 0.4
#> 2 FALSE 1 -1 0 0 black red 0.5 1 0.4
#> 3 FALSE 1 -1 0 4 black red 0.5 1 0.4
#> 4 FALSE 1 -1 0 7 black red 0.5 1 0.4
#> 5 FALSE 1 -1 0 15 black red 0.5 1 0.4
#> 6 FALSE 1 -1 0 6 black red 0.5 1 0.4
但是,如果您打算创建自己的垃圾箱,则需要手动切割和计数。有很多方法可以做到这一点,我建议使用 ggplot2 自己的剪切函数。您可以根据需要添加标签,为了清楚起见,我刚刚添加了这些标签。
## Creating your own histogram
## you need something like binwidth or cuts, I'd use it as a variable
## the {{}} (curly curly) operator is dplyr semantic
count_bins <- function(data, group, val, cuts, labels = seq_len(cuts)){
cuts <- cuts
data %>%
## you can also use base::cut or another ggplot2 cut_ function
mutate(cuts = ggplot2::cut_interval({{val}}, n = cuts, labels = labels)) %>%
group_by({{group}}) %>%
count(cuts)
}
count_bins(data, id, value, 10) %>%
ggplot(aes(cuts, n)) +
geom_col(fill="red", col="black", alpha=0.4) +
facet_grid(id ~ .)
在 brushedPoints
来从画笔中获取输出。我看到我需要 x 轴和 y 轴。然而,由于 ggplot2
允许我得到直方图(带有频率轴)和带有密度曲线的直方图(带有密度轴),所以我不知道如何在绘图之前获取这些信息。
有谁知道如何计算密度和频率以便在ggplot2
中绘制直方图? (请注意,我不想使用 ggplot2 特定函数来获取图表,我想要在绘制图表后使用此信息 data.frame)。
我用密度曲线绘制直方图的代码:
library(ggplot2)
library(dplyr)
val1 <- c(2.1490626,3.7928443,2.2035281,1.5927854,3.1399245,2.3967338,3.7915825,4.6691277,3.0727319,2.9230937,2.6239759,3.7664386,4.0160378,1.2500835,4.7648343,0.0000000,5.6740227,2.7510256,3.0709322,2.7998003,4.0809085,2.5178086,5.9713330,2.7779843,3.6724801,4.2648527,3.6841084,2.5597235,3.8477471,2.6587736,2.2742209,4.5862788,6.1989269,4.1167091,3.1769325,4.2404515,5.3627032,4.1576810,4.3387921,1.4024381,0.0000000,4.3999099,3.4381837,4.8269218,2.6308474,5.3481382,4.9549753,4.5389650,1.3002293,2.8648220,2.4015338,2.0962332,2.6774765,3.0581759,2.5786137,5.0539080,3.8545796,4.3429043,4.2233248,2.0434363,4.5980727)
val2 <- c(3.7691229,3.6478055,0.5435826,1.9665861,3.0802654,1.2248374,1.7311236,2.2492826,2.2365337,1.5726119,2.0147144,2.3550348,1.9527204,3.3689502,1.7847986,3.5901329,1.6833872,3.4240479,1.8372175,0.0000000,2.5701453,3.6551315,4.0327091,3.8781182)
df1 <- data.frame(value = val1)
df2 <- data.frame(value = val2)
data <- bind_rows(lst(df1, df2), .id = 'id')
data %>%
ggplot(aes(value)) +
geom_histogram(aes(y=..density.., fill = id), bins=10, col="black", alpha=0.4) +
geom_density(lwd = 1.2, colour = "red", show.legend = FALSE) +
facet_grid(id ~ .) +
scale_x_continuous(breaks=pretty(data$value, n=10)) +
ggtitle("My histogram....") +
guides(fill = guide_legend(title="My legend...")) +
theme(strip.text.x = element_blank(),strip.text.y = element_blank())
我用来绘制频率直方图的代码:
data %>%
ggplot(aes(value)) +
geom_histogram(fill="red", bins=10, col="black", alpha=0.4) +
facet_grid(id ~ .) +
scale_x_continuous(breaks=pretty(data$value, n=10)) +
ggtitle("My histogram....") +
guides(fill = guide_legend(title="My legend...")) +
theme(strip.text.x = element_blank(),strip.text.y = element_blank())
一旦我有了密度和频率列,我将不得不从代码中删除这些参数,但我不知道是否可以使用包含该信息的“y”列。
非常感谢
此致
如果您想从绘图中提取计数/密度信息,layer_data
是您的朋友。
library(ggplot2)
library(dplyr)
p <- data %>%
ggplot(aes(value)) +
geom_histogram(fill="red", bins=10, col="black", alpha=0.4) +
facet_grid(id ~ .) +
scale_x_continuous(breaks=pretty(data$value, n=10)) +
ggtitle("My histogram....") +
guides(fill = guide_legend(title="My legend...")) +
theme(strip.text.x = element_blank(),strip.text.y = element_blank())
head(layer_data(p))
#> y count x xmin xmax density ncount ndensity
#> 1 2 2 0.0000000 -0.3443848 0.3443848 0.04760210 0.1333333 0.1333333
#> 2 0 0 0.6887697 0.3443848 1.0331545 0.00000000 0.0000000 0.0000000
#> 3 4 4 1.3775393 1.0331545 1.7219241 0.09520421 0.2666667 0.2666667
#> 4 7 7 2.0663090 1.7219241 2.4106938 0.16660737 0.4666667 0.4666667
#> 5 15 15 2.7550786 2.4106938 3.0994635 0.35701579 1.0000000 1.0000000
#> 6 6 6 3.4438483 3.0994635 3.7882331 0.14280631 0.4000000 0.4000000
#> flipped_aes PANEL group ymin ymax colour fill size linetype alpha
#> 1 FALSE 1 -1 0 2 black red 0.5 1 0.4
#> 2 FALSE 1 -1 0 0 black red 0.5 1 0.4
#> 3 FALSE 1 -1 0 4 black red 0.5 1 0.4
#> 4 FALSE 1 -1 0 7 black red 0.5 1 0.4
#> 5 FALSE 1 -1 0 15 black red 0.5 1 0.4
#> 6 FALSE 1 -1 0 6 black red 0.5 1 0.4
但是,如果您打算创建自己的垃圾箱,则需要手动切割和计数。有很多方法可以做到这一点,我建议使用 ggplot2 自己的剪切函数。您可以根据需要添加标签,为了清楚起见,我刚刚添加了这些标签。
## Creating your own histogram
## you need something like binwidth or cuts, I'd use it as a variable
## the {{}} (curly curly) operator is dplyr semantic
count_bins <- function(data, group, val, cuts, labels = seq_len(cuts)){
cuts <- cuts
data %>%
## you can also use base::cut or another ggplot2 cut_ function
mutate(cuts = ggplot2::cut_interval({{val}}, n = cuts, labels = labels)) %>%
group_by({{group}}) %>%
count(cuts)
}
count_bins(data, id, value, 10) %>%
ggplot(aes(cuts, n)) +
geom_col(fill="red", col="black", alpha=0.4) +
facet_grid(id ~ .)