如何在同一个 ggplot2 (R) 上拟合负二项式、正态和泊松密度函数,但缩放到计数数据?

How to fit a negative binomial, normal, and poisson density function on the same ggplot2 (R) but scaled to the count data?

我有一些计数数据。我想用计数数据绘制直方图并添加负二项式、正态和泊松密度函数,但将函数拟合到计数数据。

我尝试遵循 this example 但是 (a) 我在拟合负二项式和泊松函数时遇到了问题 (b) 无法将其缩放到计数数据级别 (c) 不知道如何拟合所有三个在同一张图上,每条线都有图例 (d) 另外,我怎样才能得到每个拟合的基本统计数据?例如,负二项式拟合将生成参数 k。我怎样才能让它出现在情节上

set.seed(111)
counts <- rbinom(500,100,0.1) 
df <- data.frame(counts)

ggplot(df, aes(x = counts)) + 
  geom_histogram(aes(y=..density..),colour = "black", fill = "white") +
  stat_function(fun=dnorm,args=fitdistr(df$counts,"normal")$estimate)

ggplot(df, aes(x = counts)) + 
  geom_histogram(aes(y=..density..),colour = "black", fill = "white") +
  stat_function(fun=poisson,args=fitdistr(df$counts,"poisson")$estimate)

ggplot(df, aes(x = counts)) + 
  geom_histogram(aes(y=..density..),colour = "black", fill = "white") +
  stat_function(fun=dnbinom,args=fitdistr(df$counts,"dnbinom")$estimate)

泊松密度的解。

library(MASS)
library(ggplot2)
ggplot(df, aes(x = counts)) + 
  geom_histogram(aes(y=..density..),colour = "black", fill = "white", bins=20) +
  stat_function(fun=dpois, args=fitdistr(df$counts,"poisson")$estimate, 
                xlim=c(0,19), n=20, size=1) +
  theme_bw()

这是负二项式的解。

ggplot(df, aes(x = counts)) + 
  geom_histogram(aes(y=..density..),colour = "black", fill = "white", bins=20) +
  stat_function(fun=dnbinom, 
                args=fitdistr(df$counts,"negative binomial", method="SANN")$estimate, 
                xlim=c(0,19), n=20, size=1) +
  theme_bw()

您有一些问题,首先 "dnbinom" 不是 MASS::fitdistr 的有效分配。其次,MASS::fitdistr 无法使用默认方法拟合,因此我们可以使用 method = "SANN"。第三,stat_function 尝试计算非整数值的 dnbinom 除非您另有说明,否则这是行不通的。

让参数显示在图例中有点棘手,因为您必须在 ggplot 调用之外估计它们。我很懒,用了 purrr::map2,但你可以使用一些基本的 R 函数来做同样的事情。

library(purrr)
library(dplyr)
norm.params <- fitdistr(df$counts,"normal")$estimate
poisson.params <- fitdistr(df$counts,"poisson")$estimate
negbinom.params <- fitdistr(df$counts,"negative binomial", method = "SANN")$estimate

dist.params <- map(list(Normal = norm.params,Poisson = poisson.params,`Negative Binomial` = negbinom.params),
    ~ map2(names(.),.,~ paste0(.x," = ",round(.y,2))) %>% unlist %>% paste0(.,collapse = ", ")) %>% 
    map2_chr(names(.),., ~ paste(.x,.y,sep=":\n"))

最后,如果我们想按计数缩放,如 this answer 中所见,我们只需定义匿名函数。

mybinwidth = 1
ggplot(df, aes(x = counts)) + 
  geom_histogram(aes(y=..count..),colour = "black", fill = "white", binwidth = mybinwidth) +
  stat_function(aes(color = "black"),fun=function(x,mean,sd) mybinwidth * nrow(df) * dnorm(x,mean, sd),
                args=fitdistr(df$counts,"normal")$estimate) +
  stat_function(aes(color = "blue"),fun=function(x,lambda) mybinwidth * nrow(df) * dpois(x,lambda), 
                args=fitdistr(df$counts,"poisson")$estimate,
                xlim=c(1,20), n=20) + 
  stat_function(aes(color = "orange"),fun=function(x,size, mu) mybinwidth * nrow(df) * dnbinom(x,size = size, mu = mu),
                args=fitdistr(df$counts,"negative binomial", method="SANN")$estimate,
                xlim=c(1,20),n=20) + 
  scale_color_manual("Distribution", values=c(black="black",blue="blue",orange="orange"),
                     labels=dist.params)