正态曲线图上的错误区域
Wrong area on normal curve plot
我正在尝试从头开始学习 R,我刚刚交付了一项大学作业,用于假设检验二项式分布(一个样本的比例检验),我使用 R 来求解和绘制.但是我运行遇到了一些问题。
我的样本量是130,成功案例是68。
- H0: π = 50%
- H1: π > 50
这是我使用的代码(大量 copy-paste 和 trial/error)
library(ggplot2)
library(ggthemes)
library(scales)
#data
n = 130
p = 1/2
stdev = sqrt(n*p*(1-p))
mean_binon = n*p
cases = 68
ztest = (cases-mean_binon)/stdev
pvalor = pnorm(-abs(ztest))
zcrit = qnorm(0.975)
#normal curve
xvalues <- data.frame(x = c(-4, 4))
#first plots and lines
p1 <- ggplot(xvalues, aes(x = xvalues))
p2 <- p1 + stat_function(fun = dnorm) + xlim(c(-4, 4)) +
geom_vline(xintercept = ztest, linetype="solid", color="blue",
size=1) +
geom_vline(xintercept = zcrit, linetype="solid", color="red",
size=1)
#z area function
area_z <- function(x){
norm_z <- dnorm(x)
norm_z[x < ztest] <- NA
return(norm_z)
}
#critical z area function
area_zc <- function(x){
norm_zc <- dnorm(x)
norm_zc[x < zcrit] <- NA
return(norm_zc)
}
#area value
valor_area_z <- round(pnorm(4) - pnorm(ztest), 3)
valor_area_zc <- round(pnorm(4) - pnorm(zcrit), 3)
#final plot
p3 <- p2 + stat_function(fun = dnorm) +
stat_function(fun = area_z, geom = "area", fill = "blue", alpha = 0.3) +
geom_text(x = 1.13, y = 0.1, size = 5, fontface = "bold",
label = paste0(valor_area_z * 100, "%")) +
stat_function(fun = area_zc, geom = "area", fill = "red", alpha = 0.5) +
geom_text(x = 2.27, y = 0.015, size = 3, fontface = "bold",
label = paste0(valor_area_zc * 100, "%")) +
scale_x_continuous(breaks = c(-3:3)) +
labs(x = "\n z", y = "f(z) \n", title = "Distribuição Normal \n") +
theme_fivethirtyeight()
p3
这是情节
我的 geom_vline 和阴影区域之间有间隙。我不确定我的统计信息是否执行了错误的步骤,或者这是一个与 R 相关的问题。也许两者都有?对不起,如果这是基本的。我两者都不擅长,但我正在努力改进。
一个解决方案是在 stat_function
中使用选项 xlim
来定义函数的范围。
您也可以替换 area_z
和 area_zc
与 dnorm
.
p3 <- p2 + stat_function(fun = dnorm) +
stat_function(fun = dnorm, geom = "area", fill = "blue", alpha = 0.3,
xlim = c(ztest,zcrit)) +
geom_text(x = 1.13, y = 0.1, size = 5, fontface = "bold",
label = paste0(valor_area_z * 100, "%")) +
stat_function(fun = dnorm, geom = "area", fill = "red", alpha = 0.5,
xlim = c(zcrit,xvalues$x[2])) +
geom_text(x = 2.27, y = 0.015, size = 3, fontface = "bold",
label = paste0(valor_area_zc * 100, "%")) +
scale_x_continuous(breaks = c(-3:3)) +
labs(x = "\n z", y = "f(z) \n", title = "Distribuição Normal \n") +
theme_fivethirtyeight()
p3
我正在尝试从头开始学习 R,我刚刚交付了一项大学作业,用于假设检验二项式分布(一个样本的比例检验),我使用 R 来求解和绘制.但是我运行遇到了一些问题。
我的样本量是130,成功案例是68。
- H0: π = 50%
- H1: π > 50
这是我使用的代码(大量 copy-paste 和 trial/error)
library(ggplot2)
library(ggthemes)
library(scales)
#data
n = 130
p = 1/2
stdev = sqrt(n*p*(1-p))
mean_binon = n*p
cases = 68
ztest = (cases-mean_binon)/stdev
pvalor = pnorm(-abs(ztest))
zcrit = qnorm(0.975)
#normal curve
xvalues <- data.frame(x = c(-4, 4))
#first plots and lines
p1 <- ggplot(xvalues, aes(x = xvalues))
p2 <- p1 + stat_function(fun = dnorm) + xlim(c(-4, 4)) +
geom_vline(xintercept = ztest, linetype="solid", color="blue",
size=1) +
geom_vline(xintercept = zcrit, linetype="solid", color="red",
size=1)
#z area function
area_z <- function(x){
norm_z <- dnorm(x)
norm_z[x < ztest] <- NA
return(norm_z)
}
#critical z area function
area_zc <- function(x){
norm_zc <- dnorm(x)
norm_zc[x < zcrit] <- NA
return(norm_zc)
}
#area value
valor_area_z <- round(pnorm(4) - pnorm(ztest), 3)
valor_area_zc <- round(pnorm(4) - pnorm(zcrit), 3)
#final plot
p3 <- p2 + stat_function(fun = dnorm) +
stat_function(fun = area_z, geom = "area", fill = "blue", alpha = 0.3) +
geom_text(x = 1.13, y = 0.1, size = 5, fontface = "bold",
label = paste0(valor_area_z * 100, "%")) +
stat_function(fun = area_zc, geom = "area", fill = "red", alpha = 0.5) +
geom_text(x = 2.27, y = 0.015, size = 3, fontface = "bold",
label = paste0(valor_area_zc * 100, "%")) +
scale_x_continuous(breaks = c(-3:3)) +
labs(x = "\n z", y = "f(z) \n", title = "Distribuição Normal \n") +
theme_fivethirtyeight()
p3
这是情节
我的 geom_vline 和阴影区域之间有间隙。我不确定我的统计信息是否执行了错误的步骤,或者这是一个与 R 相关的问题。也许两者都有?对不起,如果这是基本的。我两者都不擅长,但我正在努力改进。
一个解决方案是在 stat_function
中使用选项 xlim
来定义函数的范围。
您也可以替换 area_z
和 area_zc
与 dnorm
.
p3 <- p2 + stat_function(fun = dnorm) +
stat_function(fun = dnorm, geom = "area", fill = "blue", alpha = 0.3,
xlim = c(ztest,zcrit)) +
geom_text(x = 1.13, y = 0.1, size = 5, fontface = "bold",
label = paste0(valor_area_z * 100, "%")) +
stat_function(fun = dnorm, geom = "area", fill = "red", alpha = 0.5,
xlim = c(zcrit,xvalues$x[2])) +
geom_text(x = 2.27, y = 0.015, size = 3, fontface = "bold",
label = paste0(valor_area_zc * 100, "%")) +
scale_x_continuous(breaks = c(-3:3)) +
labs(x = "\n z", y = "f(z) \n", title = "Distribuição Normal \n") +
theme_fivethirtyeight()
p3