如何在 r 中绘制多个组件

how to plot with multiple component in r

我想按 clusterNum 列绘制所有组件和组点。然而,正如我到目前为止所学习的那样,我们只能以 2D 和 3D 方式进行绘图。所以,我想知道通常如何用这些所有 PC 绘制或表示图形?

This is what the sample looks like: 

            PC1         PC2         PC3        PC4        PC5 clusterNum
1    -2.3779857  0.85818117  0.77918535  0.1967346  0.9826036          2
2    -1.4290545  0.09556012 -0.06358622  1.3468587 -0.1525576          2
3     1.5621954  2.53686714 -2.57818785  0.0111326  0.2755555          1
4     1.9915498  0.19282116  1.34349507  0.0578331 -1.0146932          3
5    -0.4431131  1.38970833  2.34020611 -0.8359105  0.1402896          2
6    -1.9892596  0.37200044  2.00614197  0.5013188 -1.5358936          2
7    -2.6111509 -0.31112666  1.44419058  0.7313908  0.2597182          2
1977 -1.8662394  1.74827166 -2.37657231  0.4158580  1.1100287          4
1978  2.3477702 -2.24859797 -0.47305695  0.3690514 -1.1667859          3
1979  2.0797897 -0.14012792 -0.82941643 -1.2233560 -0.4523913          1
data <- structure(list(PC1 = c(-2.37798570730988, -1.42905447617503,1.56219540602943, 1.99154980323715, -0.443113128962729, -1.98925960995357,-2.6111508923784, -1.86623936673779, 2.3477701685881, 2.07978972115199), PC2 = c(0.858181174741465, 0.0955601160006672, 2.53686714046073,0.192821161122631, 1.38970832915268, 0.372000440681993, -0.31112665937417,1.74827166020088, -2.24859797467832, -0.140127921225955), PC3 = c(0.779185345162976,-0.0635862220559265, -2.57818784684844, 1.34349506727208, 2.34020610639405,2.00614196687586, 1.44419058101111, -2.37657231100357, -0.473056945068265,-0.829416434702976), PC4 = c(0.196734648339808, 1.34685865526758,0.0111326028445577, 0.0578331048357187, -0.83591054343165, 0.501318761327088,0.731390845165095, 0.415858033401411, 0.369051391354487, -1.22335597705293), PC5 = c(0.982603621624129, -0.152557597364265, 0.275555453841701,-1.01469324992585, 0.140289629133083, -1.53589363488684, 0.259718249982426,1.11002871887763, -1.16678589524657, -0.452391297542505), clusterNum = c(2L,2L, 1L, 3L, 2L, 2L, 2L, 4L, 3L, 1L)), row.names = c(1L, 2L, 3L,4L, 5L, 6L, 7L, 1977L, 1978L, 1979L), class = "data.frame") 

提前致谢。

你可以用多条线做一个线图,像这样:

data <- structure(list(PC1 = c(-2.37798570730988, -1.42905447617503,1.56219540602943, 1.99154980323715, -0.443113128962729, -1.98925960995357,-2.6111508923784, -1.86623936673779, 2.3477701685881, 2.07978972115199), PC2 = c(0.858181174741465, 0.0955601160006672, 2.53686714046073,0.192821161122631, 1.38970832915268, 0.372000440681993, -0.31112665937417,1.74827166020088, -2.24859797467832, -0.140127921225955), PC3 = c(0.779185345162976,-0.0635862220559265, -2.57818784684844, 1.34349506727208, 2.34020610639405,2.00614196687586, 1.44419058101111, -2.37657231100357, -0.473056945068265,-0.829416434702976), PC4 = c(0.196734648339808, 1.34685865526758,0.0111326028445577, 0.0578331048357187, -0.83591054343165, 0.501318761327088,0.731390845165095, 0.415858033401411, 0.369051391354487, -1.22335597705293), PC5 = c(0.982603621624129, -0.152557597364265, 0.275555453841701,-1.01469324992585, 0.140289629133083, -1.53589363488684, 0.259718249982426,1.11002871887763, -1.16678589524657, -0.452391297542505), clusterNum = c(2L,2L, 1L, 3L, 2L, 2L, 2L, 4L, 3L, 1L)), row.names = c(1L, 2L, 3L,4L, 5L, 6L, 7L, 1977L, 1978L, 1979L), class = "data.frame") 

library(tidyverse)
data %>% 
  rownames_to_column() %>% 
  select(-clusterNum) %>% 
  mutate(rowname = factor(rowname, levels = rowname)) %>% 
  pivot_longer(-rowname) %>% 
  ggplot(aes(rowname, value, group = name, color = name)) +
  geom_line()

不过,我认为 PCA 结果并不常见。查看 here, here and here 以了解可视化 PCA 结果的常见做法。

除了 Till 提到的优秀资源之外,我只想在这里留言,在我的研究领域,人们经常使用主成分来做邻居嵌入。这会丢失轴的所有数值解释,但它会使更多相似的行比不同的行更接近。

library(ggplot2)
library(uwot)

data <- structure(list(PC1 = c(-2.37798570730988, -1.42905447617503,1.56219540602943, 1.99154980323715, -0.443113128962729, -1.98925960995357,-2.6111508923784, -1.86623936673779, 2.3477701685881, 2.07978972115199), PC2 = c(0.858181174741465, 0.0955601160006672, 2.53686714046073,0.192821161122631, 1.38970832915268, 0.372000440681993, -0.31112665937417,1.74827166020088, -2.24859797467832, -0.140127921225955), PC3 = c(0.779185345162976,-0.0635862220559265, -2.57818784684844, 1.34349506727208, 2.34020610639405,2.00614196687586, 1.44419058101111, -2.37657231100357, -0.473056945068265,-0.829416434702976), PC4 = c(0.196734648339808, 1.34685865526758,0.0111326028445577, 0.0578331048357187, -0.83591054343165, 0.501318761327088,0.731390845165095, 0.415858033401411, 0.369051391354487, -1.22335597705293), PC5 = c(0.982603621624129, -0.152557597364265, 0.275555453841701,-1.01469324992585, 0.140289629133083, -1.53589363488684, 0.259718249982426,1.11002871887763, -1.16678589524657, -0.452391297542505), clusterNum = c(2L,2L, 1L, 3L, 2L, 2L, 2L, 4L, 3L, 1L)), row.names = c(1L, 2L, 3L,4L, 5L, 6L, 7L, 1977L, 1978L, 1979L), class = "data.frame") 

umap <- umap(data[,-ncol(data)], n_neighbors = 2) # Go up to 15 or so for larger data

umap <- cbind(as.data.frame(umap), clusterNum = data$clusterNum)

ggplot(umap, aes(V1, V2)) +
  geom_point(aes(colour = factor(clusterNum)))

reprex package (v1.0.0)

于 2021-07-07 创建