R:箱线图:在重复测量的情况下在每个主题之间画线

R: boxplot: draw lines between each subject in case of repeated measurements

我有数据,我在 4 个不同阶段测量了相同的对象。我的目标是制作一个图表,其中既有箱线图,也有每个主题的点,每个阶段之间用一条线连接每个主题。最终结果将是将下面的两个图表组合成一个最终图表

library(ggplot2)
df_original = data.frame(study_id = c("id1", "id2", "id3", "id4", "id5", "id6", "id7", "id8", "id9", "id10"),
                val_stage1 = runif(10, 5.0, 6.0),
                val_stage2 = runif(10, 5.0, 6.5), 
                val_stage3 = runif(10, 4.7, 5.8), 
                val_stage4 = runif(10, 5.5, 7.0))
df_original
df_plot1 = data.frame(group = "stage1", value = df_original[, "val_stage1"], ID = df_original$study_id)
df_plot2 = data.frame(group = "stage2", value = df_original[, "val_stage2"], ID = df_original$study_id)
df_plot3 = data.frame(group = "stage3", value = df_original[, "val_stage3"], ID = df_original$study_id)
df_plot4 = data.frame(group = "stage4", value = df_original[, "val_stage4"], ID = df_original$study_id)

plot_data = rbind(df_plot1,
                       df_plot2,
                       df_plot3,
                       df_plot4)
ggplot(plot_data, aes(x=group, 
                      y=value, 
                      fill=group
                      )
       ) + 
geom_boxplot(outlier.shape = NA) +
  geom_point()

library(lattice)
# source https://publicifsv.sund.ku.dk/~jufo/courses/rm2017/plotRrepeated.pdf
xyplot(value ~ group, group = ID, data = plot_data, type = "b")

使用 boxplot()lines() 的解决方案。我不确定您的方法是否易于阅读。也许你应该让箱线图稍微淡出一点,如图所示。

lvl <- lapply(plot_data[c("group", "ID")], unique)  # store levels

clr <- Map(function(x, alpha) 
  rainbow(length(x), alpha=alpha), lvl, alpha=c(.1, 1))  # prefab colors
#Note: the alpha controls the fade, here .1 for boxplot and 1 for lines

boxplot(value ~ group, data=plot_data, border="darkgrey", col=clr$group)
sapply(seq(lvl$ID), function(x) 
  lines(value ~ group, data=plot_data[plot_data$ID == lvl$ID[x], ], type="b",
        col=clr$ID[x], lwd=2))
legend("topleft", legend=lvl$ID, lwd=2, col=clr$ID, ncol=2, cex=.8, bty="n")

产量


数据:

plot_data <- structure(list(group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L), .Label = c("stage1", "stage2", "stage3", "stage4"), class = "factor"), 
    value = c(5.78956733993255, 5.88809484057128, 5.10837934492156, 
    5.4757885155268, 5.97264352883212, 5.67352280486375, 5.20609766873531, 
    5.86022568447515, 5.81150085269473, 5.94000163977034, 6.0783141606953, 
    5.94233451236505, 6.49362113315146, 6.12048651557416, 6.1347389126895, 
    5.20223867462482, 5.87281575519592, 5.73356315004639, 5.75488595489878, 
    6.36840553430375, 4.99227170993108, 5.18667783471756, 4.99904926030431, 
    5.15853247770574, 5.11713153058663, 5.49876751452684, 5.20934719201177, 
    5.02541789095849, 5.01650351425633, 5.5694368747063, 6.42072400392499, 
    5.5407249458367, 5.87118571228348, 6.68436990131158, 6.81803358788602, 
    6.84979289071634, 6.78138321859296, 6.70059150888119, 5.99049715092406, 
    6.45158472727053), ID = structure(c(1L, 3L, 4L, 5L, 6L, 7L, 
    8L, 9L, 10L, 2L, 1L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 2L, 
    1L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 2L, 1L, 3L, 4L, 5L, 
    6L, 7L, 8L, 9L, 10L, 2L), .Label = c("id1", "id10", "id2", 
    "id3", "id4", "id5", "id6", "id7", "id8", "id9"), class = "factor")), row.names = c(NA, 
-40L), class = "data.frame")

tidyverse 方法如下所示:

mylabs <- levels(plot_data$group)
library(tidyverse)
plot_data %>% 
ggplot(aes(x = as.numeric(group), y = value)) + 
  geom_boxplot(aes(group = group, fill = group), outlier.shape = NA) +
  geom_point(aes(color = ID)) +
  geom_line(aes(color = ID)) + 
  theme(legend.position = "none") + 
  labs(x = "Group") +
  scale_x_continuous(breaks = 1:4, labels = mylabs)