如何在 R 中绘制功率图和未知参数的真实值?

How to plot a figure of power and true value of unknown parameter in R?

假设 i=1,\dots, 100 的 iid 随机样本 X_i\sim Bernoulli(\pi) 并且样本大小为 100。我们想要做一个假设检验 H_0: \pi\ge 0.6,\, H_a: \pi<0.6 我想绘制真实参数 pi 和功率之间的关系图。我得到了函数power。但是我只能输入每个true pi=0.50, 0.51, 0.52, 0.53, ..., 0.59。如何绘制相似的图形?

我的代码如下

n=100
pi0=0.60

##power function
power_fun=function(N,pi,alpha)
{
  pvalue=c()
  power=c()
  rej=c()
  
  for (i in 1:N) {
    set.seed(i)
    y=rbinom(n,size=1,prob=pi)
    pi_hat=mean(y)
    z=(pi_hat-pi0)/sqrt(1/n*pi_hat*(1-pi_hat))
    
    rej[i]=ifelse(z<qnorm(0.05,mean=0,sd=1),1,0)
    
    pvalue[i]=pnorm(q=z,mean=0,sd=1)
    
    c_value=qnorm(alpha,mean=0,sd=1)
    aug_term=(pi-pi0)/sqrt(1/n*pi_hat*(1-pi_hat))
    power[i]=pnorm(c_value-aug_term,mean=0,sd=1)
    
  }

  mean_pvalue=mean(pvalue)

  sd_pvalue=sd(pvalue)
  

  mean_power=ifelse(pi<pi0,mean(power),NA)

  sd_power=sd(power)
  
  rej_rate=mean(pvalue<alpha)
  
  type1_error=ifelse(pi>=pi0,rej_rate,NA)
  
  type2_error=ifelse(pi<pi0,1-mean_power,NA)
  
  type2_error_emp=ifelse(pi<pi0,1-rej_rate,NA)
  
  mc_se=sqrt(1/N*rej_rate*(1-rej_rate))
  
  df_out=data.frame("pvalue"=mean_pvalue,"pvalue2"=sd_pvalue,"power"=mean_power,"ESE_power"=sd_power, 
                    "rejection rate"=rej_rate,
                    "Type I error"=  type1_error,"type_II_error"=type2_error,"Type_II_error"=type2_error_emp,
                    "MC_SE"=mc_se)
  rownames(df_out)=paste0("N=",N,", pi=",pi, ", alpha=", alpha)
  
  df_out=round(df_out,3)
  
  return(df_out)
}

每个pi,我们可以获得力量。


power_fun(N=1000,pi=0.5,alpha=0.05)
#power=0.643
power_fun(N=1000,pi=0.51,alpha=0.05)
# power=0.566
power_fun(N=1000,pi=0.52,alpha=0.05)
power_fun(N=1000,pi=0.53,alpha=0.05)
power_fun(N=1000,pi=0.54,alpha=0.05)

但这太复杂了。有没有一种简单的方法可以获取这些 pi 值的幂并绘制它们的 pipower 图表?

这更像是 code-review。

认为 您认为可以矢量化 Npialpha。 我认为您当前的实施无法做到这一点。

遵循@Limey 的建议:

#' Power function
#' 
#' 
#' @param N Total number of replications
#' @param pi 
#' @param alpha Significance level
#' @param pi0 
#' @param n Size of the replication
#'
#' @return
#' 
power_fun <- function(N, pi, alpha, pi0, n) {
  pvalue <- vector(mode = "numeric", length = N)
  power <- vector(mode = "numeric", length = N)
  rej <- vector(mode = "numeric", length = N)
  
  for (i in seq_len(N)) {
    # why?
    # set.seed(i)
    y = rbinom(n, size = 1, prob = pi)
    pi_hat = mean(y)
    z = (pi_hat - pi0) / sqrt(1 / n * pi_hat * (1 - pi_hat))
    
    rej[i] = ifelse(z < qnorm(0.05, mean = 0, sd = 1), 1, 0)
    
    pvalue[i] = pnorm(q = z, mean = 0, sd = 1)
    
    c_value = qnorm(alpha, mean = 0, sd = 1)
    aug_term = (pi - pi0) / sqrt(1 / n * pi_hat * (1 - pi_hat))
    power[i] = pnorm(c_value - aug_term, mean = 0, sd = 1)
    
  }
  
  mean_pvalue = mean(pvalue, na.rm = TRUE)
  sd_pvalue = sd(pvalue, na.rm = TRUE)
  # mean_power = ifelse(pi < pi0, mean(power), NA)
  mean_power = mean(power, na.rm = TRUE)
  sd_power = sd(power, na.rm = TRUE)
  rej_rate = mean(pvalue < alpha, na.rm = TRUE)
  
  type1_error = ifelse(pi >= pi0, rej_rate, NA)
  type2_error = ifelse(pi < pi0, 1 - mean_power, NA)
  type2_error_emp = ifelse(pi < pi0, 1 - rej_rate, NA)
  
  mc_se = sqrt(1 / N * rej_rate * (1 - rej_rate))
  
  df_out <- data.frame(
    N = N, 
    pi = pi, 
    alpha = alpha,
    pvalue = mean_pvalue,
    pvalue2 = sd_pvalue,
    power = mean_power,
    ESE_power = sd_power,
    rejection_rate = rej_rate,
    Type_I_error =  type1_error,
    type_II_error = type2_error,
    Type_II_error = type2_error_emp,
    MC_SE = mc_se
  )
  # moved this to above
  # rownames(df_out) = paste0("N=", N, ", pi=", pi, ", alpha=", alpha)
  
  # why?
  # df_out = round(df_out, 3)
  
  return(df_out)
}

请阅读评论和更改。

library(tidyverse)
n <- 100
pi0 <- 0.6

# testing the function
power_fun(N=1000,pi=0.5,alpha=0.05, pi0 = pi0, n = n)

#' Plot for all pi going from 0.5 to 1.
seq.default(0, 0.65, length.out = 200) %>%
# seq.default(0.5, 1., length.out = 2) %>% 
  map_df(function(pi) power_fun(N=1000,pi=pi,alpha=0.05, pi0 = pi0, n = n)) %>% 
  as_tibble() -> 
  power_df
# power_df %>% View()
power_df %>% {
  ggplot(., aes(pi, power)) + 
    geom_line() + 
    # geom_point() +
    geom_vline(xintercept = pi0, linetype = "dashed") +
    NULL
}