在 DESeq2 中执行 plotPCA 之后,是否有更好的方法在 ggplot 中绘制此 PCAPlot?

Is there a nicer way to plot this PCAPlot in ggplot after doing plotPCA in DESeq2?

我的数据结构如下:

structure(list(PC1 = c(0.575275450335122, -0.271560036518229, 
0.953309669344739, -0.0973831071329307, 0.871105953866523, 0.0379904534598842, 
0.737223421023106, -1.07991788617048, 0.977937610804854, 0.206927407135419, 
0.846524295528142, -0.743507855124003, 0.229716783731562, -1.708551233374, 
0.623630074143358, -1.22006221488515, 0.624234285603959, -1.56289307177187
), PC2 = c(0.912546111986394, -0.247117897558157, 1.15416425034184, 
-0.0618766648767635, 1.30003524991646, -0.339609948322352, 0.12755285074635, 
0.130688683339775, -0.596424639876554, -1.31752263137418, -0.390273336420353, 
-0.562559844453795, -0.129516525520303, 0.407888572167762, -0.545085195561956, 
0.177266986014392, -0.343592143975577, 0.323436123427009), group = structure(c(1L, 
2L, 7L, 8L, 13L, 14L, 3L, 4L, 9L, 10L, 15L, 16L, 5L, 6L, 11L, 
12L, 17L, 18L), .Label = c("High:D10:Basal", "High:D10:Stim", 
"High:D14:Basal", "High:D14:Stim", "High:D18:Basal", "High:D18:Stim", 
"Low:D10:Basal", "Low:D10:Stim", "Low:D14:Basal", "Low:D14:Stim", 
"Low:D18:Basal", "Low:D18:Stim", "Medium:D10:Basal", "Medium:D10:Stim", 
"Medium:D14:Basal", "Medium:D14:Stim", "Medium:D18:Basal", "Medium:D18:Stim"
), class = "factor"), CellType = structure(c(2L, 2L, 1L, 1L, 
3L, 3L, 2L, 2L, 1L, 1L, 3L, 3L, 2L, 2L, 1L, 1L, 3L, 3L), .Label = c("Low", 
"High", "Medium"), class = "factor"), Donor = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 
3L), .Label = c("D10", "D14", "D18"), class = "factor"), Day = structure(c(1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L), .Label = c("Basal", "Stim"), class = "factor"), name = c("D10_High_d0", 
"D10_High_d7", "D10_Low_d0", "D10_Low_d7", "D10_Medium_d0", "D10_Medium_d7", 
"D14_High_d0", "D14_High_d7", "D14_Low_d0", "D14_Low_d7", "D14_Medium_d0", 
"D14_Medium_d7", "D18_High_d0", "D18_High_d7", "D18_Low_d0", 
"D18_Low_d7", "D18_Medium_d0", "D18_Medium_d7")), class = "data.frame", row.names = c("D10_High_d0", 
"D10_High_d7", "D10_Low_d0", "D10_Low_d7", "D10_Medium_d0", "D10_Medium_d7", 
"D14_High_d0", "D14_High_d7", "D14_Low_d0", "D14_Low_d7", "D14_Medium_d0", 
"D14_Medium_d7", "D18_High_d0", "D18_High_d7", "D18_Low_d0", 
"D18_Low_d7", "D18_Medium_d0", "D18_Medium_d7"), percentVar = c(0.386943314549313, 
0.207313334715293))

在DESeq2 包中执行plotPCA 后检索到数据。 我想在此 PCA 中探索“三个级别”的属性,因此我认为使用形状、填充和文本来标记它们就足够了(使用以下代码)。但出于某种原因,填充的图例没有显示正确的颜色。 PCA没问题,但是图例中的颜色都是黑色的。有没有更好的方法来绘制我的虚拟数据?

percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(x= PC1, y = PC2))+
  geom_point(size= 3, aes(shape=CellType, fill=Day)) +
  scale_fill_manual(values = c("#E69F00", "#56B4E9"))+
  scale_shape_manual(values=c(21, 23, 25))+
  geom_text_repel(size= 3.5, aes(label=Donor, colour=Donor))+ 
  scale_color_discrete()+
  xlab(paste0("PC1: ", percentVar[1], "% variance")) +
  ylab(paste0("PC2: ", percentVar[2], "% variance")) +
  coord_fixed() +
  ggtitle("PCA with Scaled data")

问题是当显示 fill 图例时,它会自动为没有填充的图例设置形状;这确实有点没用。

要更正此问题,您可以手动 specify/override 填充图例的形状,如下所示:

  scale_fill_manual(values = c("#E69F00", "#56B4E9"),
                    guide = guide_legend(override.aes = list(shape = 22)))

我会推荐您尚未使用的形状,这样填充图例和形状图例之间就不会混淆。