使用 fct_relevel() 删除 NA 的绘图

plot using fct_relevel() dropping NA's

我有包含一些 NA 值的数据,我正在尝试绘制如下图:

library(ggplot2)
library(forcats)
library(dplyr)
library(ggpubr)

df<-data.frame(Y = rnorm(20, -6, 1),
                  X = sample(c("yes", "no", NA), 20, replace = TRUE))

dfplot<- df   %>% mutate(X=fct_relevel(X, "yes"))%>%
  ggplot(.,
         aes(x=X, y=Y, fill=X))+
  geom_boxplot(size=1, width = 0.2, show.legend = F, outlier.shape = NA,
               position=position_nudge(x=0.3))+
  geom_jitter(show.legend = T, shape=21, width=0.2, size=2)+
  geom_crossbar(data=df %>% group_by(X) %>% summarise(mean=mean(Y), .groups="keep"),
                aes(x=X, ymin=mean, ymax=mean, y=mean), width = 0.2, show.legend = F)+

  labs(x="",
       y="%")

dfplot

然而,当我尝试仅绘制“yes”和“no”变量时,使用 filter(X!="NA") 删除“NA”时,我无法将它们重新调整为正确的顺序,如“yes”第一栏。如果我使用 drop_na("X")filter(!is.na(X)) 而不是 filter(X!="NA")

也会发生同样的情况
dfplot<- df %>% filter(X!="NA")  %>% mutate(X=fct_relevel(X, "yes"))%>%
  ggplot(.,
         aes(x=X, y=Y, fill=X))+
  geom_boxplot(size=1, width = 0.2, show.legend = F, outlier.shape = NA,
               position=position_nudge(x=0.3))+
  geom_jitter(show.legend = T, shape=21, width=0.2, size=2)+
  geom_crossbar(data=df %>% group_by(X) %>% summarise(mean=mean(Y), .groups="keep"),
                aes(x=X,ymin=mean, ymax=mean, y=mean), width = 0.2, show.legend = F)+

  labs(x="",
       y="%")

dfplot

尝试在 scale_fill_discrete 中使用 na.translate 参数并将其设置为 TRUE。然后 NA 值将映射到您的填充颜色。 Here 是关于如何做到这一点的最小示例。

df<-data.frame(Y = rnorm(20, -6, 1),
               X = sample(c("yes", "no", NA), 20, replace = TRUE))

dfplot<- df %>% 
  mutate(X=fct_relevel(X, "yes", "no"))%>%
  ggplot(.,
         aes(x=X, y=Y, fill=X))+
  geom_boxplot(size=1, width = 0.2, show.legend = F, outlier.shape = NA,
               position=position_nudge(x=0.3))+
  geom_jitter(show.legend = T, shape=21, width=0.2, size=2)+
  geom_crossbar(data=df %>% group_by(X) %>% summarise(mean=mean(Y), .groups="keep"),
                aes(x=X, ymin=mean, ymax=mean, y=mean), width = 0.2, show.legend = F)+
  
  labs(x="",
       y="%") +
  scale_fill_discrete(na.translate = TRUE)

dfplot

我认为原因是因为您在 'geom_crossbar' 中提供了相同的数据,但没有指定删除 'NA' 值。

尝试在代码块的开头添加“set.seed”,使其完全可重现。

下面应该会生成一个 'yes' 和 'no' 处于正确水平的图。

library(ggplot2)
library(forcats)
library(dplyr)
library(ggpubr)

set.seed(123456)

df <- data.frame(Y = rnorm(20, -6, 1),
               X = sample(c("yes", "no", NA), 20, replace = TRUE))


dfplot <- df %>% filter(!is.na(X))  %>% mutate(X=fct_relevel(X, 'yes')) %>%
    ggplot(.,
           aes(x=X, y=Y, fill=X))+
    geom_boxplot(size=1, width = 0.2, show.legend = F, outlier.shape = NA,
                 position=position_nudge(x=0.3))+
    geom_jitter(show.legend = T, shape=21, width=0.2, size=2)+
    geom_crossbar(data=df %>% filter(!is.na(X)) %>% group_by(X) %>% summarise(mean=mean(Y), .groups="keep"),
                  aes(x=X,ymin=mean, ymax=mean, y=mean), width = 0.2, show.legend = F)+
    
    labs(x="",
         y="%")

dfplot