如何在 PCA 图周围绘制椭圆?
How to draw ellipses around PCA plot?
我有一个 PCA 图,我已经研究了一段时间(我不是很擅长 R,但这教会了我很多东西,只是想制作这个图)。我现在正处于情节看起来如何的阶段,现在我们只想在复制品周围画椭圆。 (见下图)
理想情况下,椭圆的边框颜色应与颜色完全相同,并填充相同颜色的大部分透明版本(参见下面的示例图)
很多人建议使用 ggbiplot,我试过了,但我发现手册的细节非常少,而且我不太了解如何控制绘图。为了生成绘图,我使用了以下代码:
cisr5 <- read.csv("cis-infR5/cis-infection R5.csv", header =T, stringsAsFactors = F)
cisr5_pca <- prcomp(as.matrix(cisr5[,2:28]), center = T, scale = T)
cisr5_exp <- summary(cisr5_pca)$importance[2,]*100
cisr5scores <- as.data.frame(cisr5_pca$x[,1:2])
cisr5exp12 <- cisr5_exp[1:2]
cisr5_groups <- cisr5$Treatment
cisr5_plot <- ggplot(cisr5scores, aes(x=PC1, y=PC2))+
geom_point(size= 4.5, aes(fill=cisr5$Treatment, shape = cisr5$Treatment, colour= cisr5$Treatment))+
scale_shape_manual(breaks=c("Cells","HIV-1 R5", "LPS", "M. bovis", "H37Rv", "HN878", "CDC1551", "EU127"),
values=c("Cells" = 21,"HIV-1 R5" = 21,"LPS" = 21,"M. bovis" = 21,"HN878" = 21,"H37Rv" = 21,"EU127" = 21,"CDC1551" = 21))+
scale_fill_manual(breaks=c("Cells","HIV-1 R5", "LPS", "M. bovis", "H37Rv", "HN878", "CDC1551", "EU127"),
values=c("Cells" = "black", "HIV-1 R5" = "grey73", "LPS" = "steelblue4", "M. bovis" = "darkorange1", "HN878" = "brown1", "H37Rv" = "seagreen", "EU127" = "darkturquoise", "CDC1551" = "goldenrod1"))+
scale_colour_manual(breaks=c("Cells","HIV-1 R5", "LPS", "M. bovis", "H37Rv", "HN878", "CDC1551", "EU127"),
values=c("Cells" = "black", "HIV-1 R5" = "black", "LPS" = "black", "M. bovis" = "black", "HN878" = "black", "H37Rv" = "black", "EU127" = "black", "CDC1551" = "black"))+
geom_hline(yintercept=0, linetype="dashed", color = "black", size = 0.8)+
geom_vline(xintercept=0, linetype="dashed", colour= "black", size = 0.8)+
xlab(paste("PC1 ", "(",cisr5exp12[1],"%", ")", sep=""))+
ylab(paste("PC1 ", "(",cisr5exp12[2],"%", ")", sep=""))+
theme_minimal()+
theme(axis.title.y = element_text(size = 18, family = "sans"),
legend.position = "bottom",
legend.text = element_text(size = 18, family = "sans"),
axis.text.x = element_text(colour ="black", size = 16, family = "sans"),
axis.text.y = element_text(colour ="black", size = 16, family = "sans"),
axis.title.x = element_text(colour = "black", size = 18, family = "sans"),
legend.title = element_blank(),
panel.background = element_blank(),
axis.line = element_blank(),
plot.title = element_text(size = 10, hjust = 0.5))+
theme(legend.key = element_rect(fill = "white", colour = "white"))
这里是 dput(cisr5) 的输出:
structure(list(Treatment = c("Cells", "Cells", "Cells", "HIV-1 R5",
"HIV-1 R5", "HIV-1 R5", "LPS", "LPS", "LPS", "M. bovis", "M. bovis",
"M. bovis", "H37Rv", "H37Rv", "H37Rv", "HN878", "HN878", "HN878",
"CDC1551", "CDC1551", "CDC1551", "EU127", "EU127", "EU127"),
MIP.1a = c(7277.592, 8021.917, 10155.92, 9224.759, 11058.29,
11403.6, 3063.892, 3861.123, 3871.972, 568.693, 589.4971,
593.841, 22488.42, 25845.16, 20065.2, 1281.982, 1501.423,
1297.166, 891.334, 925.3091, 770.3838, 1197.705, 1114.606,
1227.442), I.TAC = c(281.7118, 270.4363, 304.2628, 264.7985,
287.3495, 276.074, 267.6174, 281.7118, 281.7118, 236.6098,
236.6098, 247.8853, 281.7118, 292.9873, 256.3419, 264.7985,
270.4363, 267.6174, 264.7985, 259.1608, 247.8853, 247.8853,
270.4363, 270.4363), IL.8 = c(2322.259, 2246.195, 2552.522,
1647.884, 1946.82, 1856.497, 1670.42, 1837.503, 1793.763,
733.6484, 866.3237, 871.3251, 2775.03, 3554.01, 2518.453,
2292.289, 2564.807, 2198.427, 2574.049, 2934.074, 2105.359,
1195.042, 1154.881, 1370.437), G.CSF = c(73.23274, 72.13298,
76.53325, 69.93408, 74.3327, 72.13298, 75.43287, 74.3327,
74.3327, 67.736, 69.93408, 72.13298, 74.3327, 73.23274, 71.03343,
72.13298, 67.736, 69.93408, 72.13298, 69.93408, 72.13298,
72.13298, 69.93408, 74.3327), IFN.g = c(4448.144, 4635.324,
4977.491, 4253.181, 5168.498, 4399.135, 3435.918, 3632.174,
3778.012, 1483.4, 1693.075, 1569.619, 4298.186, 5321, 3955.879,
2630.966, 2986.232, 2765.706, 2488.634, 2864.619, 2220.581,
2630.966, 2533.882, 2964.318), IL.1b = c(19.28912, 20.16716,
21.04531, 19.28912, 20.16716, 18.41118, 20.16716, 19.28912,
21.48443, 18.41118, 18.41118, 17.53336, 19.28912, 20.16716,
21.04531, 19.28912, 18.41118, 19.28912, 20.16716, 20.16716,
17.53336, 19.28912, 19.28912, 20.16716), IL.12.p70 = c(84.33227,
84.33227, 84.33227, 84.33227, 86.81428, 79.36854, 84.33227,
79.36854, 84.33227, 74.40519, 74.40519, 74.40519, 89.29638,
84.33227, 81.85036, 74.40519, 79.36854, 84.33227, 84.33227,
89.29638, 79.36854, 84.33227, 74.40519, 84.33227), IL.23 = c(956.1321,
1227.239, 1324.087, 1149.769, 1546.889, 1207.87, 1217.554,
1546.889, 1198.186, 694.8057, 791.5822, 762.5479, 1275.661,
1372.517, 1091.673, 1072.308, 1101.355, 1014.218, 1062.626,
1140.086, 936.7714, 956.1321, 1043.262, 1101.355), IL.6 = c(14.32172,
14.78475, 16.17424, 13.85876, 14.78475, 14.78475, 33.82531,
37.32027, 36.85406, 10.61982, 11.08233, 11.08233, 14.78475,
16.63753, 13.39586, 12.47025, 12.93302, 12.93302, 13.85876,
14.32172, 12.47025, 12.47025, 12.47025, 13.39586), TNF.a = c(289.1692,
312.3068, 352.5328, 338.9781, 404.2004, 370.7624, 328.5335,
377.7523, 380.4718, 181.863, 216.2346, 211.6452, 328.5335,
404.9793, 284.1628, 244.5805, 256.8619, 230.3979, 212.4099,
251.487, 184.1509, 288.3988, 278.3891, 352.1453), IP.10 = c(90.64973,
86.85127, 82.82118, 105.383, 118.7126, 108.1868, 124.6164,
150.8182, 148.0671, 89.8652, 110.4455, 106.6015, 103.7395,
123.8773, 101.3685, 127.3294, 139.9029, 123.631, 128.317,
149.6923, 115.0333, 116.504, 118.7126, 143.6997), MIP.2a = c(258.1445,
252.799, 274.2216, 257.9388, 255.8821, 261.2315, 257.5273,
261.0256, 259.5848, 247.2551, 248.4864, 249.3075, 262.8788,
257.733, 253.4154, 257.9388, 258.3502, 262.0551, 254.2375,
256.9103, 250.1288, 252.3881, 253.4154, 271.3315), MIG = c(1147.899,
1172.558, 1197.223, 1080.117, 1123.246, 1086.277, 1203.39,
1234.232, 1234.232, 975.4516, 975.4516, 987.7596, 1098.599,
1135.572, 1073.957, 1037.007, 1061.639, 1049.322, 1061.639,
1086.277, 1030.849, 1037.007, 1037.007, 1067.798), GM.CSF = c(5202.493,
5367.087, 6281.102, 5567.915, 6144.319, 6399.046, 5782.692,
6603.895, 6685.711, 2875.188, 3106.839, 3246.791, 5810.084,
6669.865, 4701.064, 4711.37, 5208.576, 4535.682, 4300.371,
5243.951, 3986.69, 4800.085, 4733.171, 5733.348), IL.1a = c(21.99391,
25.24585, 27.05401, 26.33061, 28.50134, 27.77759, 24.88434,
23.43878, 24.88434, 17.66354, 19.82794, 18.02416, 24.88434,
27.05401, 23.43878, 21.99391, 24.52289, 21.99391, 24.16147,
25.60739, 22.71626, 20.54976, 19.82794, 21.99391), IL.10 = c(46.10516,
50.43545, 58.48418, 28.9328, 34.24083, 35.47581, 18.69787,
22.64218, 22.14903, 21.65591, 26.5886, 27.57554, 41.65379,
46.22884, 39.42912, 28.56261, 30.04346, 23.13537, 23.38197,
27.94568, 22.3956, 32.0184, 34.36432, 40.41778), IL.18 = c(50.6393,
48.22433, 53.05463, 50.6393, 50.6393, 55.47033, 50.6393,
50.6393, 50.6393, 45.80971, 40.98156, 47.01698, 50.6393,
50.6393, 48.22433, 48.22433, 50.6393, 48.22433, 48.22433,
48.22433, 45.80971, 48.22433, 45.80971, 48.22433), IL.27 = c(805.5802,
846.0657, 825.8205, 785.3449, 825.8205, 805.5802, 734.7781,
825.8205, 765.1145, 643.8357, 684.2423, 694.3469, 805.5802,
785.3449, 744.889, 785.3449, 765.1145, 724.6685, 704.4529,
805.5802, 694.3469, 704.4529, 704.4529, 744.889), M.CSF = c(11452.29,
11704.94, 12765.98, 12184.26, 13394.81, 13015.51, 10865.78,
11909.02, 11690.88, 7205.788, 8203.048, 7639.321, 10028.2,
11592.56, 9588.618, 9980.007, 10657.32, 9526.991, 9568.07,
11018.99, 9022.033, 10186.76, 9828.723, 11824.51), RANTES = c(5461.057,
5429.06, 5125.649, 6432.943, 7192.639, 6825.792, 4361.753,
4795.94, 4261.045, 2425.623, 2721.773, 2527.076, 10053.62,
10504.37, 10984.78, 4090.124, 4540.439, 3850.142, 3299.907,
3409.28, 2741.305, 3539.123, 3027.575, 3609.566), IL.15 = c(63.42687,
60.34966, 58.81413, 70.12247, 78.93716, 72.19048, 52.69245,
58.81413, 51.67533, 30.02343, 34.52132, 30.27284, 109.5234,
111.6625, 108.4552, 48.12252, 51.67533, 44.5807, 38.53444,
40.043, 33.77044, 44.5807, 37.52983, 45.08601), IFN.b = c(24.36161,
24.36161, 20.87949, 22.62047, 26.1029, 26.1029, 20.87949,
20.87949, 20.87949, 17.39801, 19.13867, 17.39801, 33.06965,
29.58596, 29.58596, 20.87949, 22.62047, 18.26832, 20.87949,
17.39801, 17.39801, 19.13867, 19.13867, 17.39801), IL.12.23p40 = c(19127.5,
17407.09, 17387.6, 20070.89, 20790.61, 20691.9, 18080.17,
15128.14, 15930.68, 9402.136, 11857.72, 10224.28, 28743.06,
31302.18, 30019.45, 13089.88, 15350.27, 14867.62, 12831.48,
13866.69, 9326.674, 13252.71, 13348.55, 14424.41), IFN.a = c(7.032462,
7.032462, 7.233667, 7.636124, 7.032462, 7.434888, 7.837376,
8.441224, 7.636124, 6.630097, 7.032462, 6.428938, 8.038643,
7.636124, 7.032462, 6.428938, 6.227795, 6.831272, 8.239925,
6.831272, 7.233667, 7.636124, 7.636124, 6.026667), TGF.b1 = c(3598.345,
4109.123, 3691.072, 4290.412, 4274.196, 3569.607, 3729.499,
3723.092, 4015.527, 4212.627, 4134.977, 4134.977, 4717.371,
3575.991, 2869.674, 3800.033, 4115.585, 4144.675, 3410.285,
2932.565, 3314.964, 3800.033, 3550.458, 3267.381), TGF.b2 = c(35.86783,
39.13779, 35.32298, 47.31947, 43.50015, 36.95764, 32.59941,
34.23343, 36.95764, 75.75773, 55.51084, 48.95697, 57.15028,
39.13779, 33.14404, 35.32298, 39.13779, 38.59269, 38.04763,
30.42132, 34.23343, 36.95764, 36.95764, 32.59941), TGF.b3 = c(33.53998,
41.92618, 33.53998, 33.53998, 41.92618, 41.92618, 33.53998,
33.53998, 33.53998, 41.92618, 33.53998, 25.15427, 41.92618,
33.53998, 33.53998, 33.53998, 33.53998, 41.92618, 33.53998,
33.53998, 33.53998, 33.53998, 33.53998, 33.53998)), class = "data.frame", row.names = c(NA,
-24L))
@Axeman 的建议很好,所以你可以使用 geom_polygon():
cisr5scores$Treatment = cisr5r$Treatment
ggplot(cisr5scores,aes(x=PC1,y=PC2,col=Treatment)) +
geom_point() + geom_polygon(aes(fill=Treatment),alpha=0.2)
所以问题是你有一些形状被压平成一条线。一种方法是在制作多边形时抖动点:
set.seed(999)
ggplot(cisr5scores,aes(x=PC1,y=PC2,col=Treatment)) +
geom_point() +
geom_polygon(aes(x=PC1,y=PC2,fill=Treatment),alpha=0.2,
position = position_jitter(width=0.3,height=0.3))
github 上有一个软件包 ggConvexHull
可以用
安装
devtools::install_github("cmartin/ggConvexHull")
然后,在绘图中添加一个凸包就很简单了。
library(ggConvexHull)
cisr5_plot +
geom_convexhull(aes(fill=cisr5$Treatment,
shape = cisr5$Treatment,
colour= cisr5$Treatment),
alpha = 0.5)
另请参阅:
vignette("extending-ggplot2", package = "ggplot2")
编辑
与问题中发布的代码相比,一个完整的情节简化了许多,如下所示。它使用包 ggfortify
函数 autoplot
来绘制 PCA 组件和一个辅助函数,一个自定义的 ggplot 主题。
这是自定义主题。
theme_lordjord93_so <- function(){
theme_minimal() %+replace%
theme(axis.title.y = element_text(size = 18, family = "sans"),
legend.position = "bottom",
legend.text = element_text(size = 18, family = "sans"),
axis.text.x = element_text(colour ="black", size = 16, family = "sans"),
axis.text.y = element_text(colour ="black", size = 16, family = "sans"),
axis.title.x = element_text(colour = "black", size = 18, family = "sans"),
legend.title = element_blank(),
panel.background = element_blank(),
axis.line = element_blank(),
plot.title = element_text(size = 10, hjust = 0.5),
legend.key = element_rect(fill = "white", colour = "white")
)
}
现在是带有凸包的 PCA 图。
library(ggplot2)
library(ggfortify)
library(ggConvexHull)
cisr5_pca <- prcomp(cisr5[2:28], center = TRUE, scale = TRUE)
cirs5_plot <- autoplot(cisr5_pca, data = cisr5,
fill = 'Treatment', colour = 'Treatment',
shape = 'Treatment', size = 4.5)
cirs5_plot +
geom_convexhull(aes(fill = Treatment, color = Treatment),
alpha = 0.5) +
scale_shape_manual(breaks=c("Cells","HIV-1 R5", "LPS", "M. bovis", "H37Rv", "HN878", "CDC1551", "EU127"),
values=c("Cells" = 21,"HIV-1 R5" = 21,"LPS" = 21,"M. bovis" = 21,"HN878" = 21,"H37Rv" = 21,"EU127" = 21,"CDC1551" = 21))+
scale_fill_manual(breaks=c("Cells","HIV-1 R5", "LPS", "M. bovis", "H37Rv", "HN878", "CDC1551", "EU127"),
values=c("Cells" = "black", "HIV-1 R5" = "grey73", "LPS" = "steelblue4", "M. bovis" = "darkorange1", "HN878" = "brown1", "H37Rv" = "seagreen", "EU127" = "darkturquoise", "CDC1551" = "goldenrod1")) +
scale_colour_manual(breaks=c("Cells","HIV-1 R5", "LPS", "M. bovis", "H37Rv", "HN878", "CDC1551", "EU127"),
values=c("Cells" = "black", "HIV-1 R5" = "black", "LPS" = "black", "M. bovis" = "black", "HN878" = "black", "H37Rv" = "black", "EU127" = "black", "CDC1551" = "black")) +
geom_hline(yintercept=0, linetype="dashed", color = "black", size = 0.8) +
geom_vline(xintercept=0, linetype="dashed", colour= "black", size = 0.8) +
theme_lordjord93_so()
看到有内点和外线与[=19=画的不一样时的凸包效果],这里是built-in数据集的图iris
.
iris_pca <- prcomp(iris[-5])
iris_plot <- autoplot(iris_pca, data = iris, colour = 'Species')
iris_plot +
geom_convexhull(aes(fill = Species, colour = Species), alpha = 0.5) +
geom_hline(yintercept=0, linetype="dashed", color = "black", size = 0.8) +
geom_vline(xintercept=0, linetype="dashed", colour= "black", size = 0.8) +
scale_fill_manual(values = c(setosa = "steelblue4", versicolor = "darkorange1", virginica = "seagreen")) +
scale_colour_manual(values = c(setosa = "steelblue4", versicolor = "darkorange1", virginica = "seagreen")) +
theme_lordjord93_so()
我有一个 PCA 图,我已经研究了一段时间(我不是很擅长 R,但这教会了我很多东西,只是想制作这个图)。我现在正处于情节看起来如何的阶段,现在我们只想在复制品周围画椭圆。 (见下图)
理想情况下,椭圆的边框颜色应与颜色完全相同,并填充相同颜色的大部分透明版本(参见下面的示例图)
很多人建议使用 ggbiplot,我试过了,但我发现手册的细节非常少,而且我不太了解如何控制绘图。为了生成绘图,我使用了以下代码:
cisr5 <- read.csv("cis-infR5/cis-infection R5.csv", header =T, stringsAsFactors = F)
cisr5_pca <- prcomp(as.matrix(cisr5[,2:28]), center = T, scale = T)
cisr5_exp <- summary(cisr5_pca)$importance[2,]*100
cisr5scores <- as.data.frame(cisr5_pca$x[,1:2])
cisr5exp12 <- cisr5_exp[1:2]
cisr5_groups <- cisr5$Treatment
cisr5_plot <- ggplot(cisr5scores, aes(x=PC1, y=PC2))+
geom_point(size= 4.5, aes(fill=cisr5$Treatment, shape = cisr5$Treatment, colour= cisr5$Treatment))+
scale_shape_manual(breaks=c("Cells","HIV-1 R5", "LPS", "M. bovis", "H37Rv", "HN878", "CDC1551", "EU127"),
values=c("Cells" = 21,"HIV-1 R5" = 21,"LPS" = 21,"M. bovis" = 21,"HN878" = 21,"H37Rv" = 21,"EU127" = 21,"CDC1551" = 21))+
scale_fill_manual(breaks=c("Cells","HIV-1 R5", "LPS", "M. bovis", "H37Rv", "HN878", "CDC1551", "EU127"),
values=c("Cells" = "black", "HIV-1 R5" = "grey73", "LPS" = "steelblue4", "M. bovis" = "darkorange1", "HN878" = "brown1", "H37Rv" = "seagreen", "EU127" = "darkturquoise", "CDC1551" = "goldenrod1"))+
scale_colour_manual(breaks=c("Cells","HIV-1 R5", "LPS", "M. bovis", "H37Rv", "HN878", "CDC1551", "EU127"),
values=c("Cells" = "black", "HIV-1 R5" = "black", "LPS" = "black", "M. bovis" = "black", "HN878" = "black", "H37Rv" = "black", "EU127" = "black", "CDC1551" = "black"))+
geom_hline(yintercept=0, linetype="dashed", color = "black", size = 0.8)+
geom_vline(xintercept=0, linetype="dashed", colour= "black", size = 0.8)+
xlab(paste("PC1 ", "(",cisr5exp12[1],"%", ")", sep=""))+
ylab(paste("PC1 ", "(",cisr5exp12[2],"%", ")", sep=""))+
theme_minimal()+
theme(axis.title.y = element_text(size = 18, family = "sans"),
legend.position = "bottom",
legend.text = element_text(size = 18, family = "sans"),
axis.text.x = element_text(colour ="black", size = 16, family = "sans"),
axis.text.y = element_text(colour ="black", size = 16, family = "sans"),
axis.title.x = element_text(colour = "black", size = 18, family = "sans"),
legend.title = element_blank(),
panel.background = element_blank(),
axis.line = element_blank(),
plot.title = element_text(size = 10, hjust = 0.5))+
theme(legend.key = element_rect(fill = "white", colour = "white"))
这里是 dput(cisr5) 的输出:
structure(list(Treatment = c("Cells", "Cells", "Cells", "HIV-1 R5",
"HIV-1 R5", "HIV-1 R5", "LPS", "LPS", "LPS", "M. bovis", "M. bovis",
"M. bovis", "H37Rv", "H37Rv", "H37Rv", "HN878", "HN878", "HN878",
"CDC1551", "CDC1551", "CDC1551", "EU127", "EU127", "EU127"),
MIP.1a = c(7277.592, 8021.917, 10155.92, 9224.759, 11058.29,
11403.6, 3063.892, 3861.123, 3871.972, 568.693, 589.4971,
593.841, 22488.42, 25845.16, 20065.2, 1281.982, 1501.423,
1297.166, 891.334, 925.3091, 770.3838, 1197.705, 1114.606,
1227.442), I.TAC = c(281.7118, 270.4363, 304.2628, 264.7985,
287.3495, 276.074, 267.6174, 281.7118, 281.7118, 236.6098,
236.6098, 247.8853, 281.7118, 292.9873, 256.3419, 264.7985,
270.4363, 267.6174, 264.7985, 259.1608, 247.8853, 247.8853,
270.4363, 270.4363), IL.8 = c(2322.259, 2246.195, 2552.522,
1647.884, 1946.82, 1856.497, 1670.42, 1837.503, 1793.763,
733.6484, 866.3237, 871.3251, 2775.03, 3554.01, 2518.453,
2292.289, 2564.807, 2198.427, 2574.049, 2934.074, 2105.359,
1195.042, 1154.881, 1370.437), G.CSF = c(73.23274, 72.13298,
76.53325, 69.93408, 74.3327, 72.13298, 75.43287, 74.3327,
74.3327, 67.736, 69.93408, 72.13298, 74.3327, 73.23274, 71.03343,
72.13298, 67.736, 69.93408, 72.13298, 69.93408, 72.13298,
72.13298, 69.93408, 74.3327), IFN.g = c(4448.144, 4635.324,
4977.491, 4253.181, 5168.498, 4399.135, 3435.918, 3632.174,
3778.012, 1483.4, 1693.075, 1569.619, 4298.186, 5321, 3955.879,
2630.966, 2986.232, 2765.706, 2488.634, 2864.619, 2220.581,
2630.966, 2533.882, 2964.318), IL.1b = c(19.28912, 20.16716,
21.04531, 19.28912, 20.16716, 18.41118, 20.16716, 19.28912,
21.48443, 18.41118, 18.41118, 17.53336, 19.28912, 20.16716,
21.04531, 19.28912, 18.41118, 19.28912, 20.16716, 20.16716,
17.53336, 19.28912, 19.28912, 20.16716), IL.12.p70 = c(84.33227,
84.33227, 84.33227, 84.33227, 86.81428, 79.36854, 84.33227,
79.36854, 84.33227, 74.40519, 74.40519, 74.40519, 89.29638,
84.33227, 81.85036, 74.40519, 79.36854, 84.33227, 84.33227,
89.29638, 79.36854, 84.33227, 74.40519, 84.33227), IL.23 = c(956.1321,
1227.239, 1324.087, 1149.769, 1546.889, 1207.87, 1217.554,
1546.889, 1198.186, 694.8057, 791.5822, 762.5479, 1275.661,
1372.517, 1091.673, 1072.308, 1101.355, 1014.218, 1062.626,
1140.086, 936.7714, 956.1321, 1043.262, 1101.355), IL.6 = c(14.32172,
14.78475, 16.17424, 13.85876, 14.78475, 14.78475, 33.82531,
37.32027, 36.85406, 10.61982, 11.08233, 11.08233, 14.78475,
16.63753, 13.39586, 12.47025, 12.93302, 12.93302, 13.85876,
14.32172, 12.47025, 12.47025, 12.47025, 13.39586), TNF.a = c(289.1692,
312.3068, 352.5328, 338.9781, 404.2004, 370.7624, 328.5335,
377.7523, 380.4718, 181.863, 216.2346, 211.6452, 328.5335,
404.9793, 284.1628, 244.5805, 256.8619, 230.3979, 212.4099,
251.487, 184.1509, 288.3988, 278.3891, 352.1453), IP.10 = c(90.64973,
86.85127, 82.82118, 105.383, 118.7126, 108.1868, 124.6164,
150.8182, 148.0671, 89.8652, 110.4455, 106.6015, 103.7395,
123.8773, 101.3685, 127.3294, 139.9029, 123.631, 128.317,
149.6923, 115.0333, 116.504, 118.7126, 143.6997), MIP.2a = c(258.1445,
252.799, 274.2216, 257.9388, 255.8821, 261.2315, 257.5273,
261.0256, 259.5848, 247.2551, 248.4864, 249.3075, 262.8788,
257.733, 253.4154, 257.9388, 258.3502, 262.0551, 254.2375,
256.9103, 250.1288, 252.3881, 253.4154, 271.3315), MIG = c(1147.899,
1172.558, 1197.223, 1080.117, 1123.246, 1086.277, 1203.39,
1234.232, 1234.232, 975.4516, 975.4516, 987.7596, 1098.599,
1135.572, 1073.957, 1037.007, 1061.639, 1049.322, 1061.639,
1086.277, 1030.849, 1037.007, 1037.007, 1067.798), GM.CSF = c(5202.493,
5367.087, 6281.102, 5567.915, 6144.319, 6399.046, 5782.692,
6603.895, 6685.711, 2875.188, 3106.839, 3246.791, 5810.084,
6669.865, 4701.064, 4711.37, 5208.576, 4535.682, 4300.371,
5243.951, 3986.69, 4800.085, 4733.171, 5733.348), IL.1a = c(21.99391,
25.24585, 27.05401, 26.33061, 28.50134, 27.77759, 24.88434,
23.43878, 24.88434, 17.66354, 19.82794, 18.02416, 24.88434,
27.05401, 23.43878, 21.99391, 24.52289, 21.99391, 24.16147,
25.60739, 22.71626, 20.54976, 19.82794, 21.99391), IL.10 = c(46.10516,
50.43545, 58.48418, 28.9328, 34.24083, 35.47581, 18.69787,
22.64218, 22.14903, 21.65591, 26.5886, 27.57554, 41.65379,
46.22884, 39.42912, 28.56261, 30.04346, 23.13537, 23.38197,
27.94568, 22.3956, 32.0184, 34.36432, 40.41778), IL.18 = c(50.6393,
48.22433, 53.05463, 50.6393, 50.6393, 55.47033, 50.6393,
50.6393, 50.6393, 45.80971, 40.98156, 47.01698, 50.6393,
50.6393, 48.22433, 48.22433, 50.6393, 48.22433, 48.22433,
48.22433, 45.80971, 48.22433, 45.80971, 48.22433), IL.27 = c(805.5802,
846.0657, 825.8205, 785.3449, 825.8205, 805.5802, 734.7781,
825.8205, 765.1145, 643.8357, 684.2423, 694.3469, 805.5802,
785.3449, 744.889, 785.3449, 765.1145, 724.6685, 704.4529,
805.5802, 694.3469, 704.4529, 704.4529, 744.889), M.CSF = c(11452.29,
11704.94, 12765.98, 12184.26, 13394.81, 13015.51, 10865.78,
11909.02, 11690.88, 7205.788, 8203.048, 7639.321, 10028.2,
11592.56, 9588.618, 9980.007, 10657.32, 9526.991, 9568.07,
11018.99, 9022.033, 10186.76, 9828.723, 11824.51), RANTES = c(5461.057,
5429.06, 5125.649, 6432.943, 7192.639, 6825.792, 4361.753,
4795.94, 4261.045, 2425.623, 2721.773, 2527.076, 10053.62,
10504.37, 10984.78, 4090.124, 4540.439, 3850.142, 3299.907,
3409.28, 2741.305, 3539.123, 3027.575, 3609.566), IL.15 = c(63.42687,
60.34966, 58.81413, 70.12247, 78.93716, 72.19048, 52.69245,
58.81413, 51.67533, 30.02343, 34.52132, 30.27284, 109.5234,
111.6625, 108.4552, 48.12252, 51.67533, 44.5807, 38.53444,
40.043, 33.77044, 44.5807, 37.52983, 45.08601), IFN.b = c(24.36161,
24.36161, 20.87949, 22.62047, 26.1029, 26.1029, 20.87949,
20.87949, 20.87949, 17.39801, 19.13867, 17.39801, 33.06965,
29.58596, 29.58596, 20.87949, 22.62047, 18.26832, 20.87949,
17.39801, 17.39801, 19.13867, 19.13867, 17.39801), IL.12.23p40 = c(19127.5,
17407.09, 17387.6, 20070.89, 20790.61, 20691.9, 18080.17,
15128.14, 15930.68, 9402.136, 11857.72, 10224.28, 28743.06,
31302.18, 30019.45, 13089.88, 15350.27, 14867.62, 12831.48,
13866.69, 9326.674, 13252.71, 13348.55, 14424.41), IFN.a = c(7.032462,
7.032462, 7.233667, 7.636124, 7.032462, 7.434888, 7.837376,
8.441224, 7.636124, 6.630097, 7.032462, 6.428938, 8.038643,
7.636124, 7.032462, 6.428938, 6.227795, 6.831272, 8.239925,
6.831272, 7.233667, 7.636124, 7.636124, 6.026667), TGF.b1 = c(3598.345,
4109.123, 3691.072, 4290.412, 4274.196, 3569.607, 3729.499,
3723.092, 4015.527, 4212.627, 4134.977, 4134.977, 4717.371,
3575.991, 2869.674, 3800.033, 4115.585, 4144.675, 3410.285,
2932.565, 3314.964, 3800.033, 3550.458, 3267.381), TGF.b2 = c(35.86783,
39.13779, 35.32298, 47.31947, 43.50015, 36.95764, 32.59941,
34.23343, 36.95764, 75.75773, 55.51084, 48.95697, 57.15028,
39.13779, 33.14404, 35.32298, 39.13779, 38.59269, 38.04763,
30.42132, 34.23343, 36.95764, 36.95764, 32.59941), TGF.b3 = c(33.53998,
41.92618, 33.53998, 33.53998, 41.92618, 41.92618, 33.53998,
33.53998, 33.53998, 41.92618, 33.53998, 25.15427, 41.92618,
33.53998, 33.53998, 33.53998, 33.53998, 41.92618, 33.53998,
33.53998, 33.53998, 33.53998, 33.53998, 33.53998)), class = "data.frame", row.names = c(NA,
-24L))
@Axeman 的建议很好,所以你可以使用 geom_polygon():
cisr5scores$Treatment = cisr5r$Treatment
ggplot(cisr5scores,aes(x=PC1,y=PC2,col=Treatment)) +
geom_point() + geom_polygon(aes(fill=Treatment),alpha=0.2)
所以问题是你有一些形状被压平成一条线。一种方法是在制作多边形时抖动点:
set.seed(999)
ggplot(cisr5scores,aes(x=PC1,y=PC2,col=Treatment)) +
geom_point() +
geom_polygon(aes(x=PC1,y=PC2,fill=Treatment),alpha=0.2,
position = position_jitter(width=0.3,height=0.3))
github 上有一个软件包 ggConvexHull
可以用
devtools::install_github("cmartin/ggConvexHull")
然后,在绘图中添加一个凸包就很简单了。
library(ggConvexHull)
cisr5_plot +
geom_convexhull(aes(fill=cisr5$Treatment,
shape = cisr5$Treatment,
colour= cisr5$Treatment),
alpha = 0.5)
另请参阅:
vignette("extending-ggplot2", package = "ggplot2")
编辑
与问题中发布的代码相比,一个完整的情节简化了许多,如下所示。它使用包 ggfortify
函数 autoplot
来绘制 PCA 组件和一个辅助函数,一个自定义的 ggplot 主题。
这是自定义主题。
theme_lordjord93_so <- function(){
theme_minimal() %+replace%
theme(axis.title.y = element_text(size = 18, family = "sans"),
legend.position = "bottom",
legend.text = element_text(size = 18, family = "sans"),
axis.text.x = element_text(colour ="black", size = 16, family = "sans"),
axis.text.y = element_text(colour ="black", size = 16, family = "sans"),
axis.title.x = element_text(colour = "black", size = 18, family = "sans"),
legend.title = element_blank(),
panel.background = element_blank(),
axis.line = element_blank(),
plot.title = element_text(size = 10, hjust = 0.5),
legend.key = element_rect(fill = "white", colour = "white")
)
}
现在是带有凸包的 PCA 图。
library(ggplot2)
library(ggfortify)
library(ggConvexHull)
cisr5_pca <- prcomp(cisr5[2:28], center = TRUE, scale = TRUE)
cirs5_plot <- autoplot(cisr5_pca, data = cisr5,
fill = 'Treatment', colour = 'Treatment',
shape = 'Treatment', size = 4.5)
cirs5_plot +
geom_convexhull(aes(fill = Treatment, color = Treatment),
alpha = 0.5) +
scale_shape_manual(breaks=c("Cells","HIV-1 R5", "LPS", "M. bovis", "H37Rv", "HN878", "CDC1551", "EU127"),
values=c("Cells" = 21,"HIV-1 R5" = 21,"LPS" = 21,"M. bovis" = 21,"HN878" = 21,"H37Rv" = 21,"EU127" = 21,"CDC1551" = 21))+
scale_fill_manual(breaks=c("Cells","HIV-1 R5", "LPS", "M. bovis", "H37Rv", "HN878", "CDC1551", "EU127"),
values=c("Cells" = "black", "HIV-1 R5" = "grey73", "LPS" = "steelblue4", "M. bovis" = "darkorange1", "HN878" = "brown1", "H37Rv" = "seagreen", "EU127" = "darkturquoise", "CDC1551" = "goldenrod1")) +
scale_colour_manual(breaks=c("Cells","HIV-1 R5", "LPS", "M. bovis", "H37Rv", "HN878", "CDC1551", "EU127"),
values=c("Cells" = "black", "HIV-1 R5" = "black", "LPS" = "black", "M. bovis" = "black", "HN878" = "black", "H37Rv" = "black", "EU127" = "black", "CDC1551" = "black")) +
geom_hline(yintercept=0, linetype="dashed", color = "black", size = 0.8) +
geom_vline(xintercept=0, linetype="dashed", colour= "black", size = 0.8) +
theme_lordjord93_so()
看到有内点和外线与[=19=画的不一样时的凸包效果],这里是built-in数据集的图iris
.
iris_pca <- prcomp(iris[-5])
iris_plot <- autoplot(iris_pca, data = iris, colour = 'Species')
iris_plot +
geom_convexhull(aes(fill = Species, colour = Species), alpha = 0.5) +
geom_hline(yintercept=0, linetype="dashed", color = "black", size = 0.8) +
geom_vline(xintercept=0, linetype="dashed", colour= "black", size = 0.8) +
scale_fill_manual(values = c(setosa = "steelblue4", versicolor = "darkorange1", virginica = "seagreen")) +
scale_colour_manual(values = c(setosa = "steelblue4", versicolor = "darkorange1", virginica = "seagreen")) +
theme_lordjord93_so()