如何在 r 中绘制多个组件
how to plot with multiple component in r
我想按 clusterNum 列绘制所有组件和组点。然而,正如我到目前为止所学习的那样,我们只能以 2D 和 3D 方式进行绘图。所以,我想知道通常如何用这些所有 PC 绘制或表示图形?
This is what the sample looks like:
PC1 PC2 PC3 PC4 PC5 clusterNum
1 -2.3779857 0.85818117 0.77918535 0.1967346 0.9826036 2
2 -1.4290545 0.09556012 -0.06358622 1.3468587 -0.1525576 2
3 1.5621954 2.53686714 -2.57818785 0.0111326 0.2755555 1
4 1.9915498 0.19282116 1.34349507 0.0578331 -1.0146932 3
5 -0.4431131 1.38970833 2.34020611 -0.8359105 0.1402896 2
6 -1.9892596 0.37200044 2.00614197 0.5013188 -1.5358936 2
7 -2.6111509 -0.31112666 1.44419058 0.7313908 0.2597182 2
1977 -1.8662394 1.74827166 -2.37657231 0.4158580 1.1100287 4
1978 2.3477702 -2.24859797 -0.47305695 0.3690514 -1.1667859 3
1979 2.0797897 -0.14012792 -0.82941643 -1.2233560 -0.4523913 1
data <- structure(list(PC1 = c(-2.37798570730988, -1.42905447617503,1.56219540602943, 1.99154980323715, -0.443113128962729, -1.98925960995357,-2.6111508923784, -1.86623936673779, 2.3477701685881, 2.07978972115199), PC2 = c(0.858181174741465, 0.0955601160006672, 2.53686714046073,0.192821161122631, 1.38970832915268, 0.372000440681993, -0.31112665937417,1.74827166020088, -2.24859797467832, -0.140127921225955), PC3 = c(0.779185345162976,-0.0635862220559265, -2.57818784684844, 1.34349506727208, 2.34020610639405,2.00614196687586, 1.44419058101111, -2.37657231100357, -0.473056945068265,-0.829416434702976), PC4 = c(0.196734648339808, 1.34685865526758,0.0111326028445577, 0.0578331048357187, -0.83591054343165, 0.501318761327088,0.731390845165095, 0.415858033401411, 0.369051391354487, -1.22335597705293), PC5 = c(0.982603621624129, -0.152557597364265, 0.275555453841701,-1.01469324992585, 0.140289629133083, -1.53589363488684, 0.259718249982426,1.11002871887763, -1.16678589524657, -0.452391297542505), clusterNum = c(2L,2L, 1L, 3L, 2L, 2L, 2L, 4L, 3L, 1L)), row.names = c(1L, 2L, 3L,4L, 5L, 6L, 7L, 1977L, 1978L, 1979L), class = "data.frame")
提前致谢。
你可以用多条线做一个线图,像这样:
data <- structure(list(PC1 = c(-2.37798570730988, -1.42905447617503,1.56219540602943, 1.99154980323715, -0.443113128962729, -1.98925960995357,-2.6111508923784, -1.86623936673779, 2.3477701685881, 2.07978972115199), PC2 = c(0.858181174741465, 0.0955601160006672, 2.53686714046073,0.192821161122631, 1.38970832915268, 0.372000440681993, -0.31112665937417,1.74827166020088, -2.24859797467832, -0.140127921225955), PC3 = c(0.779185345162976,-0.0635862220559265, -2.57818784684844, 1.34349506727208, 2.34020610639405,2.00614196687586, 1.44419058101111, -2.37657231100357, -0.473056945068265,-0.829416434702976), PC4 = c(0.196734648339808, 1.34685865526758,0.0111326028445577, 0.0578331048357187, -0.83591054343165, 0.501318761327088,0.731390845165095, 0.415858033401411, 0.369051391354487, -1.22335597705293), PC5 = c(0.982603621624129, -0.152557597364265, 0.275555453841701,-1.01469324992585, 0.140289629133083, -1.53589363488684, 0.259718249982426,1.11002871887763, -1.16678589524657, -0.452391297542505), clusterNum = c(2L,2L, 1L, 3L, 2L, 2L, 2L, 4L, 3L, 1L)), row.names = c(1L, 2L, 3L,4L, 5L, 6L, 7L, 1977L, 1978L, 1979L), class = "data.frame")
library(tidyverse)
data %>%
rownames_to_column() %>%
select(-clusterNum) %>%
mutate(rowname = factor(rowname, levels = rowname)) %>%
pivot_longer(-rowname) %>%
ggplot(aes(rowname, value, group = name, color = name)) +
geom_line()
不过,我认为 PCA 结果并不常见。查看 here, here and here 以了解可视化 PCA 结果的常见做法。
除了 Till 提到的优秀资源之外,我只想在这里留言,在我的研究领域,人们经常使用主成分来做邻居嵌入。这会丢失轴的所有数值解释,但它会使更多相似的行比不同的行更接近。
library(ggplot2)
library(uwot)
data <- structure(list(PC1 = c(-2.37798570730988, -1.42905447617503,1.56219540602943, 1.99154980323715, -0.443113128962729, -1.98925960995357,-2.6111508923784, -1.86623936673779, 2.3477701685881, 2.07978972115199), PC2 = c(0.858181174741465, 0.0955601160006672, 2.53686714046073,0.192821161122631, 1.38970832915268, 0.372000440681993, -0.31112665937417,1.74827166020088, -2.24859797467832, -0.140127921225955), PC3 = c(0.779185345162976,-0.0635862220559265, -2.57818784684844, 1.34349506727208, 2.34020610639405,2.00614196687586, 1.44419058101111, -2.37657231100357, -0.473056945068265,-0.829416434702976), PC4 = c(0.196734648339808, 1.34685865526758,0.0111326028445577, 0.0578331048357187, -0.83591054343165, 0.501318761327088,0.731390845165095, 0.415858033401411, 0.369051391354487, -1.22335597705293), PC5 = c(0.982603621624129, -0.152557597364265, 0.275555453841701,-1.01469324992585, 0.140289629133083, -1.53589363488684, 0.259718249982426,1.11002871887763, -1.16678589524657, -0.452391297542505), clusterNum = c(2L,2L, 1L, 3L, 2L, 2L, 2L, 4L, 3L, 1L)), row.names = c(1L, 2L, 3L,4L, 5L, 6L, 7L, 1977L, 1978L, 1979L), class = "data.frame")
umap <- umap(data[,-ncol(data)], n_neighbors = 2) # Go up to 15 or so for larger data
umap <- cbind(as.data.frame(umap), clusterNum = data$clusterNum)
ggplot(umap, aes(V1, V2)) +
geom_point(aes(colour = factor(clusterNum)))
由 reprex package (v1.0.0)
于 2021-07-07 创建
我想按 clusterNum 列绘制所有组件和组点。然而,正如我到目前为止所学习的那样,我们只能以 2D 和 3D 方式进行绘图。所以,我想知道通常如何用这些所有 PC 绘制或表示图形?
This is what the sample looks like:
PC1 PC2 PC3 PC4 PC5 clusterNum
1 -2.3779857 0.85818117 0.77918535 0.1967346 0.9826036 2
2 -1.4290545 0.09556012 -0.06358622 1.3468587 -0.1525576 2
3 1.5621954 2.53686714 -2.57818785 0.0111326 0.2755555 1
4 1.9915498 0.19282116 1.34349507 0.0578331 -1.0146932 3
5 -0.4431131 1.38970833 2.34020611 -0.8359105 0.1402896 2
6 -1.9892596 0.37200044 2.00614197 0.5013188 -1.5358936 2
7 -2.6111509 -0.31112666 1.44419058 0.7313908 0.2597182 2
1977 -1.8662394 1.74827166 -2.37657231 0.4158580 1.1100287 4
1978 2.3477702 -2.24859797 -0.47305695 0.3690514 -1.1667859 3
1979 2.0797897 -0.14012792 -0.82941643 -1.2233560 -0.4523913 1
data <- structure(list(PC1 = c(-2.37798570730988, -1.42905447617503,1.56219540602943, 1.99154980323715, -0.443113128962729, -1.98925960995357,-2.6111508923784, -1.86623936673779, 2.3477701685881, 2.07978972115199), PC2 = c(0.858181174741465, 0.0955601160006672, 2.53686714046073,0.192821161122631, 1.38970832915268, 0.372000440681993, -0.31112665937417,1.74827166020088, -2.24859797467832, -0.140127921225955), PC3 = c(0.779185345162976,-0.0635862220559265, -2.57818784684844, 1.34349506727208, 2.34020610639405,2.00614196687586, 1.44419058101111, -2.37657231100357, -0.473056945068265,-0.829416434702976), PC4 = c(0.196734648339808, 1.34685865526758,0.0111326028445577, 0.0578331048357187, -0.83591054343165, 0.501318761327088,0.731390845165095, 0.415858033401411, 0.369051391354487, -1.22335597705293), PC5 = c(0.982603621624129, -0.152557597364265, 0.275555453841701,-1.01469324992585, 0.140289629133083, -1.53589363488684, 0.259718249982426,1.11002871887763, -1.16678589524657, -0.452391297542505), clusterNum = c(2L,2L, 1L, 3L, 2L, 2L, 2L, 4L, 3L, 1L)), row.names = c(1L, 2L, 3L,4L, 5L, 6L, 7L, 1977L, 1978L, 1979L), class = "data.frame")
提前致谢。
你可以用多条线做一个线图,像这样:
data <- structure(list(PC1 = c(-2.37798570730988, -1.42905447617503,1.56219540602943, 1.99154980323715, -0.443113128962729, -1.98925960995357,-2.6111508923784, -1.86623936673779, 2.3477701685881, 2.07978972115199), PC2 = c(0.858181174741465, 0.0955601160006672, 2.53686714046073,0.192821161122631, 1.38970832915268, 0.372000440681993, -0.31112665937417,1.74827166020088, -2.24859797467832, -0.140127921225955), PC3 = c(0.779185345162976,-0.0635862220559265, -2.57818784684844, 1.34349506727208, 2.34020610639405,2.00614196687586, 1.44419058101111, -2.37657231100357, -0.473056945068265,-0.829416434702976), PC4 = c(0.196734648339808, 1.34685865526758,0.0111326028445577, 0.0578331048357187, -0.83591054343165, 0.501318761327088,0.731390845165095, 0.415858033401411, 0.369051391354487, -1.22335597705293), PC5 = c(0.982603621624129, -0.152557597364265, 0.275555453841701,-1.01469324992585, 0.140289629133083, -1.53589363488684, 0.259718249982426,1.11002871887763, -1.16678589524657, -0.452391297542505), clusterNum = c(2L,2L, 1L, 3L, 2L, 2L, 2L, 4L, 3L, 1L)), row.names = c(1L, 2L, 3L,4L, 5L, 6L, 7L, 1977L, 1978L, 1979L), class = "data.frame")
library(tidyverse)
data %>%
rownames_to_column() %>%
select(-clusterNum) %>%
mutate(rowname = factor(rowname, levels = rowname)) %>%
pivot_longer(-rowname) %>%
ggplot(aes(rowname, value, group = name, color = name)) +
geom_line()
不过,我认为 PCA 结果并不常见。查看 here, here and here 以了解可视化 PCA 结果的常见做法。
除了 Till 提到的优秀资源之外,我只想在这里留言,在我的研究领域,人们经常使用主成分来做邻居嵌入。这会丢失轴的所有数值解释,但它会使更多相似的行比不同的行更接近。
library(ggplot2)
library(uwot)
data <- structure(list(PC1 = c(-2.37798570730988, -1.42905447617503,1.56219540602943, 1.99154980323715, -0.443113128962729, -1.98925960995357,-2.6111508923784, -1.86623936673779, 2.3477701685881, 2.07978972115199), PC2 = c(0.858181174741465, 0.0955601160006672, 2.53686714046073,0.192821161122631, 1.38970832915268, 0.372000440681993, -0.31112665937417,1.74827166020088, -2.24859797467832, -0.140127921225955), PC3 = c(0.779185345162976,-0.0635862220559265, -2.57818784684844, 1.34349506727208, 2.34020610639405,2.00614196687586, 1.44419058101111, -2.37657231100357, -0.473056945068265,-0.829416434702976), PC4 = c(0.196734648339808, 1.34685865526758,0.0111326028445577, 0.0578331048357187, -0.83591054343165, 0.501318761327088,0.731390845165095, 0.415858033401411, 0.369051391354487, -1.22335597705293), PC5 = c(0.982603621624129, -0.152557597364265, 0.275555453841701,-1.01469324992585, 0.140289629133083, -1.53589363488684, 0.259718249982426,1.11002871887763, -1.16678589524657, -0.452391297542505), clusterNum = c(2L,2L, 1L, 3L, 2L, 2L, 2L, 4L, 3L, 1L)), row.names = c(1L, 2L, 3L,4L, 5L, 6L, 7L, 1977L, 1978L, 1979L), class = "data.frame")
umap <- umap(data[,-ncol(data)], n_neighbors = 2) # Go up to 15 or so for larger data
umap <- cbind(as.data.frame(umap), clusterNum = data$clusterNum)
ggplot(umap, aes(V1, V2)) +
geom_point(aes(colour = factor(clusterNum)))
由 reprex package (v1.0.0)
于 2021-07-07 创建