如何在 R 中绘制功率图和未知参数的真实值?
How to plot a figure of power and true value of unknown parameter in R?
假设 i=1,\dots, 100
的 iid 随机样本 X_i\sim Bernoulli(\pi)
并且样本大小为 100。我们想要做一个假设检验
H_0: \pi\ge 0.6,\, H_a: \pi<0.6
我想绘制真实参数 pi
和功率之间的关系图。我得到了函数power
。但是我只能输入每个true pi=0.50, 0.51, 0.52, 0.53, ..., 0.59
。如何绘制相似的图形?
我的代码如下
n=100
pi0=0.60
##power function
power_fun=function(N,pi,alpha)
{
pvalue=c()
power=c()
rej=c()
for (i in 1:N) {
set.seed(i)
y=rbinom(n,size=1,prob=pi)
pi_hat=mean(y)
z=(pi_hat-pi0)/sqrt(1/n*pi_hat*(1-pi_hat))
rej[i]=ifelse(z<qnorm(0.05,mean=0,sd=1),1,0)
pvalue[i]=pnorm(q=z,mean=0,sd=1)
c_value=qnorm(alpha,mean=0,sd=1)
aug_term=(pi-pi0)/sqrt(1/n*pi_hat*(1-pi_hat))
power[i]=pnorm(c_value-aug_term,mean=0,sd=1)
}
mean_pvalue=mean(pvalue)
sd_pvalue=sd(pvalue)
mean_power=ifelse(pi<pi0,mean(power),NA)
sd_power=sd(power)
rej_rate=mean(pvalue<alpha)
type1_error=ifelse(pi>=pi0,rej_rate,NA)
type2_error=ifelse(pi<pi0,1-mean_power,NA)
type2_error_emp=ifelse(pi<pi0,1-rej_rate,NA)
mc_se=sqrt(1/N*rej_rate*(1-rej_rate))
df_out=data.frame("pvalue"=mean_pvalue,"pvalue2"=sd_pvalue,"power"=mean_power,"ESE_power"=sd_power,
"rejection rate"=rej_rate,
"Type I error"= type1_error,"type_II_error"=type2_error,"Type_II_error"=type2_error_emp,
"MC_SE"=mc_se)
rownames(df_out)=paste0("N=",N,", pi=",pi, ", alpha=", alpha)
df_out=round(df_out,3)
return(df_out)
}
每个pi
,我们可以获得力量。
power_fun(N=1000,pi=0.5,alpha=0.05)
#power=0.643
power_fun(N=1000,pi=0.51,alpha=0.05)
# power=0.566
power_fun(N=1000,pi=0.52,alpha=0.05)
power_fun(N=1000,pi=0.53,alpha=0.05)
power_fun(N=1000,pi=0.54,alpha=0.05)
但这太复杂了。有没有一种简单的方法可以获取这些 pi 值的幂并绘制它们的 pi
和 power
图表?
这更像是 code-review。
我 认为 您认为可以矢量化 N
、pi
和 alpha
。
我认为您当前的实施无法做到这一点。
遵循@Limey 的建议:
#' Power function
#'
#'
#' @param N Total number of replications
#' @param pi
#' @param alpha Significance level
#' @param pi0
#' @param n Size of the replication
#'
#' @return
#'
power_fun <- function(N, pi, alpha, pi0, n) {
pvalue <- vector(mode = "numeric", length = N)
power <- vector(mode = "numeric", length = N)
rej <- vector(mode = "numeric", length = N)
for (i in seq_len(N)) {
# why?
# set.seed(i)
y = rbinom(n, size = 1, prob = pi)
pi_hat = mean(y)
z = (pi_hat - pi0) / sqrt(1 / n * pi_hat * (1 - pi_hat))
rej[i] = ifelse(z < qnorm(0.05, mean = 0, sd = 1), 1, 0)
pvalue[i] = pnorm(q = z, mean = 0, sd = 1)
c_value = qnorm(alpha, mean = 0, sd = 1)
aug_term = (pi - pi0) / sqrt(1 / n * pi_hat * (1 - pi_hat))
power[i] = pnorm(c_value - aug_term, mean = 0, sd = 1)
}
mean_pvalue = mean(pvalue, na.rm = TRUE)
sd_pvalue = sd(pvalue, na.rm = TRUE)
# mean_power = ifelse(pi < pi0, mean(power), NA)
mean_power = mean(power, na.rm = TRUE)
sd_power = sd(power, na.rm = TRUE)
rej_rate = mean(pvalue < alpha, na.rm = TRUE)
type1_error = ifelse(pi >= pi0, rej_rate, NA)
type2_error = ifelse(pi < pi0, 1 - mean_power, NA)
type2_error_emp = ifelse(pi < pi0, 1 - rej_rate, NA)
mc_se = sqrt(1 / N * rej_rate * (1 - rej_rate))
df_out <- data.frame(
N = N,
pi = pi,
alpha = alpha,
pvalue = mean_pvalue,
pvalue2 = sd_pvalue,
power = mean_power,
ESE_power = sd_power,
rejection_rate = rej_rate,
Type_I_error = type1_error,
type_II_error = type2_error,
Type_II_error = type2_error_emp,
MC_SE = mc_se
)
# moved this to above
# rownames(df_out) = paste0("N=", N, ", pi=", pi, ", alpha=", alpha)
# why?
# df_out = round(df_out, 3)
return(df_out)
}
请阅读评论和更改。
library(tidyverse)
n <- 100
pi0 <- 0.6
# testing the function
power_fun(N=1000,pi=0.5,alpha=0.05, pi0 = pi0, n = n)
#' Plot for all pi going from 0.5 to 1.
seq.default(0, 0.65, length.out = 200) %>%
# seq.default(0.5, 1., length.out = 2) %>%
map_df(function(pi) power_fun(N=1000,pi=pi,alpha=0.05, pi0 = pi0, n = n)) %>%
as_tibble() ->
power_df
# power_df %>% View()
power_df %>% {
ggplot(., aes(pi, power)) +
geom_line() +
# geom_point() +
geom_vline(xintercept = pi0, linetype = "dashed") +
NULL
}
假设 i=1,\dots, 100
的 iid 随机样本 X_i\sim Bernoulli(\pi)
并且样本大小为 100。我们想要做一个假设检验
H_0: \pi\ge 0.6,\, H_a: \pi<0.6
我想绘制真实参数 pi
和功率之间的关系图。我得到了函数power
。但是我只能输入每个true pi=0.50, 0.51, 0.52, 0.53, ..., 0.59
。如何绘制相似的图形?
我的代码如下
n=100
pi0=0.60
##power function
power_fun=function(N,pi,alpha)
{
pvalue=c()
power=c()
rej=c()
for (i in 1:N) {
set.seed(i)
y=rbinom(n,size=1,prob=pi)
pi_hat=mean(y)
z=(pi_hat-pi0)/sqrt(1/n*pi_hat*(1-pi_hat))
rej[i]=ifelse(z<qnorm(0.05,mean=0,sd=1),1,0)
pvalue[i]=pnorm(q=z,mean=0,sd=1)
c_value=qnorm(alpha,mean=0,sd=1)
aug_term=(pi-pi0)/sqrt(1/n*pi_hat*(1-pi_hat))
power[i]=pnorm(c_value-aug_term,mean=0,sd=1)
}
mean_pvalue=mean(pvalue)
sd_pvalue=sd(pvalue)
mean_power=ifelse(pi<pi0,mean(power),NA)
sd_power=sd(power)
rej_rate=mean(pvalue<alpha)
type1_error=ifelse(pi>=pi0,rej_rate,NA)
type2_error=ifelse(pi<pi0,1-mean_power,NA)
type2_error_emp=ifelse(pi<pi0,1-rej_rate,NA)
mc_se=sqrt(1/N*rej_rate*(1-rej_rate))
df_out=data.frame("pvalue"=mean_pvalue,"pvalue2"=sd_pvalue,"power"=mean_power,"ESE_power"=sd_power,
"rejection rate"=rej_rate,
"Type I error"= type1_error,"type_II_error"=type2_error,"Type_II_error"=type2_error_emp,
"MC_SE"=mc_se)
rownames(df_out)=paste0("N=",N,", pi=",pi, ", alpha=", alpha)
df_out=round(df_out,3)
return(df_out)
}
每个pi
,我们可以获得力量。
power_fun(N=1000,pi=0.5,alpha=0.05)
#power=0.643
power_fun(N=1000,pi=0.51,alpha=0.05)
# power=0.566
power_fun(N=1000,pi=0.52,alpha=0.05)
power_fun(N=1000,pi=0.53,alpha=0.05)
power_fun(N=1000,pi=0.54,alpha=0.05)
但这太复杂了。有没有一种简单的方法可以获取这些 pi 值的幂并绘制它们的 pi
和 power
图表?
这更像是 code-review。
我 认为 您认为可以矢量化 N
、pi
和 alpha
。
我认为您当前的实施无法做到这一点。
遵循@Limey 的建议:
#' Power function
#'
#'
#' @param N Total number of replications
#' @param pi
#' @param alpha Significance level
#' @param pi0
#' @param n Size of the replication
#'
#' @return
#'
power_fun <- function(N, pi, alpha, pi0, n) {
pvalue <- vector(mode = "numeric", length = N)
power <- vector(mode = "numeric", length = N)
rej <- vector(mode = "numeric", length = N)
for (i in seq_len(N)) {
# why?
# set.seed(i)
y = rbinom(n, size = 1, prob = pi)
pi_hat = mean(y)
z = (pi_hat - pi0) / sqrt(1 / n * pi_hat * (1 - pi_hat))
rej[i] = ifelse(z < qnorm(0.05, mean = 0, sd = 1), 1, 0)
pvalue[i] = pnorm(q = z, mean = 0, sd = 1)
c_value = qnorm(alpha, mean = 0, sd = 1)
aug_term = (pi - pi0) / sqrt(1 / n * pi_hat * (1 - pi_hat))
power[i] = pnorm(c_value - aug_term, mean = 0, sd = 1)
}
mean_pvalue = mean(pvalue, na.rm = TRUE)
sd_pvalue = sd(pvalue, na.rm = TRUE)
# mean_power = ifelse(pi < pi0, mean(power), NA)
mean_power = mean(power, na.rm = TRUE)
sd_power = sd(power, na.rm = TRUE)
rej_rate = mean(pvalue < alpha, na.rm = TRUE)
type1_error = ifelse(pi >= pi0, rej_rate, NA)
type2_error = ifelse(pi < pi0, 1 - mean_power, NA)
type2_error_emp = ifelse(pi < pi0, 1 - rej_rate, NA)
mc_se = sqrt(1 / N * rej_rate * (1 - rej_rate))
df_out <- data.frame(
N = N,
pi = pi,
alpha = alpha,
pvalue = mean_pvalue,
pvalue2 = sd_pvalue,
power = mean_power,
ESE_power = sd_power,
rejection_rate = rej_rate,
Type_I_error = type1_error,
type_II_error = type2_error,
Type_II_error = type2_error_emp,
MC_SE = mc_se
)
# moved this to above
# rownames(df_out) = paste0("N=", N, ", pi=", pi, ", alpha=", alpha)
# why?
# df_out = round(df_out, 3)
return(df_out)
}
请阅读评论和更改。
library(tidyverse)
n <- 100
pi0 <- 0.6
# testing the function
power_fun(N=1000,pi=0.5,alpha=0.05, pi0 = pi0, n = n)
#' Plot for all pi going from 0.5 to 1.
seq.default(0, 0.65, length.out = 200) %>%
# seq.default(0.5, 1., length.out = 2) %>%
map_df(function(pi) power_fun(N=1000,pi=pi,alpha=0.05, pi0 = pi0, n = n)) %>%
as_tibble() ->
power_df
# power_df %>% View()
power_df %>% {
ggplot(., aes(pi, power)) +
geom_line() +
# geom_point() +
geom_vline(xintercept = pi0, linetype = "dashed") +
NULL
}