GGPLOT 方面的条形图和散点图

Bar plot and scatterplot in GGPLOT facets

我有一些数据显示对治疗的反应(分类变量)。每个复制也有一个已知的值分配给它基于治疗相对于个体的幅度(连续变量)。我想做的是使用 ggplot2 在单个图中显示对治疗效果(条形图)和连续变量(散点图/回归)的响应,其方式与 facet_wrapfacet_grid 命令都可以使用。基本上,我想使用 ggplot2.

重新创建下面的情节

这是我用来生成示例数据和创建绘图的代码

        ## GENERATE CONTINUOUS VARIABLES FOR EACH TREATMENT (A - D)

        A <- abs(norm(10, 1, 1))
        B <- abs(rnorm(10, 3, 1))
        C <- abs(rnorm(10, 5, 1))
        D <- abs(rnorm(10, 7, 1))

        ## GENERATE RESPONSE TO TREATMENTS

        res_A<-rnorm(10, 28, 3)
        res_B<-rnorm(10, 22, 3)
        res_C<-rnorm(10, 18, 3)
        res_D<-rnorm(10, 12, 3)

        ## ESTABLISH DATA FRAMES FOR TREATMENTS AND RESPONSE

        treatments<-data.frame(A, B, C, D)
        response<-data.frame(res_A, res_B, res_C, res_D)

        ## CONVERT EACH DATA FRAME TO LONG FORM

        library(reshape2)

        treatments <-treatments %>% gather(Treatment, cont_x, A:D)
        response <-response %>% gather(Treatment, Response, res_A:res_D)

        ## CREATE FINAL DATA FRAME WITH REQUIRED DATA

        data<-data.frame(treatments$Treatment, treatments$cont_x, response$Response)
        colnames(data) <- c("Treatment", "X", "Response")

        ## ESTABLISH MEANS AND STANDARD ERROR FOR TREATMENT EFFECTS

        means<-tapply(data$Response,list(data$Treatment),mean,na.rm=T)
        ER<-tapply(data$Response,list(data$Treatment),sd,na.rm=T)/sqrt(tapply(data$Response,list(data$Treatment),length))

        ## SET AESTHETICS AND LABEL VALUES

        cols<-c("darkcyan","olivedrab3", "palevioletred3","brown3")
        labs<-c("A", "B", "C", "D")

        ## GENERATE PLOT CANVASS

        par(mfrow=c(1,2))
        par(mar=c(3.5,3,2,1))

        ## GENEATE BAR PLOT

        graph<-tapply(data$Response,data$Treatment,mean,na.rm=T)
        plot<-barplot(graph,col=cols,las=1,xaxt='n',yaxt='n',
                      xlab=NA,ylab =NA,font.lab=2,
                      cex=0.6,cex.lab=0.6,font.lab=2,font.axis=2,
                      cex.axis=0.6,ylim=c(0,40), main="TREATMENT EFFECTS")
        box()
        arrows(x0=plot,y0=means-ER,x1=plot,
               y1=means+ER,code=3,angle=90,length=0.02,lwd=1)

        axis(side=1,line=0,at=plot,labels=labs,
             cex.axis=0.8,mgp=c(0,0.5,0),tck=-0.02,font.axis=1)

        axis(side=2,line=0,at=seq(0,40,10),las=1,cex.axis=0.8,
             labels=seq(0,40,10),cex=0.6,mgp=c(0,0.6,0))

        xlab<-c("Treatment")
        ylab<-c("Response")

        mtext(xlab, side=1, cex=1.2, line=2)
        mtext(ylab, side=2, cex=1.2, line=1.75)

        mark<-(means+ER)+2
        text(0.7,mark[1],"a",font=1,cex=1.2)
        text(1.9,mark[2],"b",font=1,cex=1.2)
        text(3.1,mark[3],"c",font=1,cex=1.2)
        text(4.3,mark[4],"d",font=1,cex=1.2)

        ## GENERATE SCATTERPLOT

        par(mar=c(3.5, 2, 2, 2))
        plot(data$X, data$Response,type='n',ylim = c(0, 40), xlim=c(0,9),pch=21, col='black', cex=1.5, xaxt='n', 
             yaxt='n', xlab=NA, ylab=NA, main = "CONTINUOUS RESPONSE")

        axis(side=1,line=0,tck=NA,at=seq(0,9,3),labels=T,
             cex.axis=0.8,mgp=c(0,0.5,0),tck=-0.02,font.axis=1)

        axis(side=2,line=0,at=seq(0,40,10),labels=F, tck=0.01)
        axis(side=2,line=0,at=seq(0,40,10),labels=F, tck=-0.01)

        xlab<-c("Continuous variable")

        mtext(xlab, side=1, cex=1.2, line=2)

        ## PERFORM REGRESSION AND ADD IN REGRESSION LINE

        model<-lm (Response ~ X, data = data)
        abline(model, lwd=2)

        ## ADD IN CONFIDENCE INTERVAL

        newx <- seq(0,9,length.out=1000)
        preds <- predict(model, newdata = data.frame(X=newx), 
                         interval = 'confidence')
        lines(newx, preds[ ,3], lty = 'dashed', col = "grey36",lwd=1)
        lines(newx, preds[ ,2], lty = 'dashed', col = 'grey36',lwd=1)

        polygon(c(rev(newx), newx), c(rev(preds[ ,3]), preds[ ,2]), col = 'grey80', border = NA)

        ## ADD IN POINTS ONTOP OF CI POLYGON

        points(data$X, data$Response, bg= ifelse(data$Treatment == "A", "darkcyan",
            ifelse(data$Treatment == "B","olivedrab3", ifelse(data$Treatment == "C", "palevioletred3", "brown3"))),pch=21, col='black', cex=1.5)

    ## ADD THE REGRESSION EQ


    eq<-expression(italic("y = 28.54 - 2.16x"))
    rsq<-expression(italic("R"^{2}~"= 0.76 ***"))
    text(5 ,35, eq, cex=1.2)
    text(4.5, 33, rsq, cex=1.2

甚至可以使用 ggplot2 来做到这一点吗?

是的,这两个图都可以使用 ggplot 创建。这是条形图:

library(ggplot2)
library(dplyr)

data %>%
  group_by(Treatment) %>%
  summarise(Response=mean(Response)) %>%
  ggplot(aes(x=Treatment, y=Response)) +
  geom_col(aes(fill=cols)) +
  geom_errorbar(ymin=means+ER, ymax=means-ER, width = 0.1) +
    geom_text(aes(label=tolower(Treatment)), position=position_stack(vjust = 1.1)) +
  labs(title="TREATMENT EFFECTS") +
  theme_bw() +
  theme(legend.position="none")

这是添加了方程和 R2 的散点图。 ggpubr 包在这里很有用,可以节省编码。

library(ggpubr)

data %>%
  ggplot(aes(x=X, y=Response)) +
  geom_point(aes(fill=Treatment), pch=21, col="black", size=4) +
  scale_fill_manual(values=c("darkcyan", "olivedrab3", "palevioletred3", "brown3")) +
  scale_x_continuous(breaks=seq(0,9,3)) +
  scale_y_continuous(lim=c(0, 40), labels=NULL) +
  geom_smooth(method="lm") +
  stat_regline_equation(label.y=33, label.x=4, 
                        aes(label = ..adj.rr.label..)) +
  stat_regline_equation(label.y=35, label.x=4, 
                        aes(label =  ..eq.label..)) +
  labs(title="CONTINUOUS RESPONSE", x="Continuous variable", y="") +
  theme_classic() +
  theme(legend.position="none", plot.title = element_text(hjust = 0.5))

答案是肯定的,但并不优雅(单独使用 ggplot2)。这是因为 the "grammar of graphics" philosophy implemented in ggplot2 is that individual variables (i.e. columns in a tidy data.frame) get mapped to individual plot aesthetics. Ergo, using one of the face_* functions is only going to break plots up into panels based on a categorical value (2 categorical values in the case of a face_grid). However, in this scenario, the x and y axes will still be mapped to the same variable. In your example, this would require mapping the x-axis to a variable containing categorical values on the left and a separate variable containing continuous ones on the right. More on this here.

背后的整个想法

如果您想使用 ggplot2 生成这些图,然后以编程方式组合它们而无需手动操作,我建议您研究 ggplot2 扩展,例如 cowplot, gridExtra, or egg. I'm sure these packages have some nice vignettes, but this is a tutorial that I have found to be a helpful in the past ggplot2 - Easy Way to Mix Multiple Graphs on The Same Page .

ggpubrggpmisc 的帮助下,您当然可以使用 ggplot2

library(ggplot2)
library(ggpubr)
library(ggpmisc)

a <- data %>% 
       group_by(Treatment) %>% 
       summarise(Response=mean(Response)) %>% 
       mutate(se = sd(Response)/sqrt(length(Response))) %>% ungroup %>%
     ggplot(aes(x=Treatment,y=Response,fill = Treatment)) + 
       geom_col(show.legend = FALSE) + 
       geom_text(aes(label=tolower(Treatment)), position=position_stack(vjust = 1.3)) +
       geom_errorbar(aes(ymin = Response - se, ymax = Response + se), width = 0.1) + scale_fill_manual(values = cols) +
       labs(title="TREATMENT EFFECTS")

b <- data %>% 
     ggplot(aes(x=X,y=Response)) + 
      geom_smooth(method="lm", show.legend = FALSE) + 
      geom_point(aes(fill = factor(Treatment)),shape=21,size=3, show.legend = FALSE) +
      scale_fill_manual(values = cols) +
      labs(title = "CONTINUOUS RESPONSE", xlab="Continuous Variable") +
      xlab("Continuous Variable") +
      stat_poly_eq(formula = y ~ x, label.x = 0.9, label.y = 0.95, aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), parse = TRUE)

ggarrange(a,b,nrow = 1)

数据:

data <- structure(list(Treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L), .Label = c("A", "B", "C", "D"), class = "factor"), X = c(0.0267649727104236, 
1.39488715616046, 0.21355823940511, 0.49907410504292, 0.375279051787701, 
0.681959942595334, 2.05952354292797, 2.26083984353423, 1.11354591314711, 
0.34506137947979, 2.07866454079728, 3.20194490569932, 3.26342299162599, 
2.06754732525705, 4.02484423090347, 3.21831253488128, 3.56925840330762, 
3.92631915144912, 2.55130407898901, 4.88369094725247, 4.85805706436391, 
6.06714331089959, 5.05696298595936, 4.28599088092722, 2.64907718621996, 
5.50017966947343, 5.27853136585637, 5.8694723514342, 4.57774253201089, 
4.28459862391154, 6.6919479712577, 6.89039252602714, 7.36883429701188, 
5.01895090471179, 7.66623439220746, 7.27620218490248, 6.44135570941742, 
7.66409390386461, 8.09858213415943, 4.89114777053612), Response = c(28.647362805403, 
30.5878855986189, 36.3739824861786, 33.5874379487616, 23.8060926287858, 
30.8520531077353, 26.5940268747477, 28.8356526462252, 30.4727218173035, 
26.8151163416507, 17.6391456006427, 19.0921380684935, 21.6950437768534, 
23.9017396212974, 27.1407090174467, 15.4322366130883, 26.9809942596379, 
22.7341801522041, 23.6518581209459, 21.8377270248132, 13.2905142368901, 
19.8951142352182, 17.1400860924093, 16.847732448511, 15.6213812276033, 
18.3368951001566, 18.7411799795391, 17.5514579276854, 14.2841781950673, 
21.6044042356051, 11.0037691942103, 13.0260853225773, 10.6862778263241, 
9.4482751070798, 11.9896873712498, 10.0798146375625, 12.6332310111476, 
14.4806588768585, 6.89810707498932, 7.55062781781536)), class = "data.frame", row.names = c(NA, 
-40L))

cols <- c("darkcyan", "olivedrab3", "palevioletred3", "brown3")