向二进制数据ggplot添加错误条和显着性条

adding error bars and significance bars to binary data ggplot

我有以下数据集:

df <- data.frame(replicate(2,sample(0:1,30,rep=TRUE)))
df <- reshape(data=df, varying=list(1:2), 
        direction="long", 
        times = names(df), 
        timevar="Type",
        v.names="Score")

并创建我使用的条形图

ggplot(df ,aes(x=Score, fill = Type))+
  geom_bar(position="dodge", color="black")

这给了我们

我想添加类似于

的错误栏

对于分级数据,我将只使用 add = "mean_ci",但将其添加到此函数什么也不做

ggplot(df ,aes(x=Score, fill = Type), add = "mean_ci")+
  geom_bar(position="dodge", color="black")

如何为我的二进制数据条形图获得漂亮的误差线?

此外,我还想添加显着性条形图,使条形图最终看起来类似于以下内容

但是我不知道如何使用看起来像上面的数据集来处理它。

非常感谢任何帮助。

从原始 df,只需使用 table 创建另一个数据框,然后从那里计算计数和 sd。二项式变量,因此在此处使用定义为 np(1-p) 的方差:

df <- data.frame(replicate(2,sample(0:1,30,rep=TRUE)))
df1 <- reshape(data=df, varying=list(1:2), 
              direction="long", 
              times = names(df), 
              timevar="Type",
              v.names="Score")

t1 <- as.data.frame(table(df$X1))
t1$Type <- "X1"
t1$sd <- sqrt(sum(t1$Freq)*t1$Freq/sum(t1$Freq)*(1-t1$Freq/sum(t1$Freq)))
t2 <- as.data.frame(table(df$X2))
t2$Type <- "X2"
t2$sd <- sqrt(sum(t2$Freq)*t2$Freq/sum(t2$Freq)*(1-t2$Freq/sum(t2$Freq)))
df_tbl <- rbind(t1,t2)
names(df_tbl) <- c("Score","count","Type","sd")
df_tbl$marg <- df_tbl$count + df_tbl$sd + 1
                            
y0 <- max(df_tbl[which(df_tbl$Score==0),'marg'])
y1 <- max(df_tbl[which(df_tbl$Score==1),'marg'])
prob0 <- prop.test(c(nrow(df)-sum(df$X1), nrow(df)-sum(df$X2)), c(nrow(df), nrow(df)))
prob1 <- prop.test(c(sum(df$X1), sum(df$X2)), c(nrow(df), nrow(df)))
prob <- c(prob0$p.value, prob1$p.value)
prob_sig <- as.character(cut(prob,breaks=c(-1,0.0005,0.001,0.01,max(prob)),
                       labels=c("***","**","*","NS")))

ggplot(df_tbl, aes(x=Score, y=count, fill=Type)) + 
  geom_bar(stat="identity", color="black", 
           position=position_dodge()) +
  geom_errorbar(aes(ymin=count-sd, ymax=count+sd), width=.2,
                position=position_dodge(.9)) +
  ylim(c(0,max(df_tbl$count+df_tbl$sd)+4)) +
  geom_segment(aes(x = 0.8, xend = 1.2, y = y0, yend = y0)) +
  geom_segment(aes(x = 1.8, xend = 2.2, y = y1, yend = y1)) +
  geom_text(aes(x = 1, y = y0 + 0.5, label=prob_sig[1])) +
  geom_text(aes(x = 2, y = y1 + 0.5, label=prob_sig[2]))