使用 prcomp 在 R 上进行 PCA(主成分分析)时出现问题

Trouble with a PCA (Principal Components Analysis) on R using prcomp

我想问是否有人可以帮助我开始对我的数据进行 PCA,这是代码结构:

> dput(FA)
structure(list(sample = c("c1", "c2", "c3", "Zn10_1", "Zn10_2", 
"Zn10_3", "nZn10_1", "nZn10_2", "nZn10_3", "Zn100_1", "Zn100_2", 
"Zn100_3", "nZn100_1", "nZn100_2", "nZn100_3"), `C14:0` = c(1.97065107387833, 
2.01336092952724, 1.87687740699995, 1.96877720760228, 2.56733419600637, 
1.94573831651913, 1.76152966586202, 1.7930086410055, 2.15832575068244, 
2.13913049917982, 2.0408991811232, 2.0521020435284, 2.34112963482385, 
2.39787472942157, 2.29796121207546), `C14:1` = c(1.07066553147741, 
1.22543494743745, 1.01470008798304, 1.06964744965716, 1.50326388655048, 
0.956312501379721, 1.18981069749714, 1.1776643100288, 1.26951774340309, 
1.30932412066814, 1.13108118532112, 1.09142506848199, 0.700040776785988, 
0.758198381738416, 0.666805060169721), `C15:1` = c(1.09998232960113, 
0.867845652574848, 1.04294504519899, 1.09893637082181, 1.29641477576113, 
1.43005364356829, 0.796595095921738, 0.786200576067033, 0.917925386715196, 
1.33635485053488, 1.16071827309155, 1.12271887445182, 5.83072684031764, 
5.88385095324636, 5.76534752495801), `C16:0` = c(12.9881447687587, 
13.4295784749687, 12.4334301664617, 12.9757945121389, 11.990994848014, 
12.2921035784454, 11.9978401727862, 12.5936108929039, 13.4165605095541, 
12.2975009534812, 12.3667853503469, 12.0978424421172, 16.6359858671902, 
16.6797856208545, 16.5034252392006), `C16:1 n7` = c(0.604809561292188, 
0.537408553453964, 0.568429763971015, 0.604234455807833, 0, 0.885670765359058, 
0.508829881844746, 0.492275464781356, 0.609281164695178, 0.879794714566601, 
0.660135681023738, 0.594153631153198, 0.619312168688614, 0.677169547048814, 
0.586285203847339), `C16:1 n9` = c(5.03325247787184, 5.11895735568352, 
4.81152846173726, 5.02846642406346, 4.99035505890389, 5.75421090973311, 
3.37949434633465, 3.51597276774025, 3.95268425841674, 4.96291606032666, 
5.13695988437476, 4.46386851457305, 5.3316236939054, 5.38552361990531, 
5.26964715947335), `C17:0` = c(5.20071966715394, 4.43246787844013, 
4.97195981872387, 5.19577437099033, 5.42425714711782, 5.49681008410782, 
5.52661669419387, 6.39958104215763, 6.87461328480437, 5.11732447614075, 
5.30625667341959, 5.4999935697659, 3.21382356615385, 3.26951347972543, 
3.1648077902961), `C16:2n4` = c(0.620873560264092, 0.428287464907068, 
0.583964490439788, 0.620283179733671, 0.240522221848077, 0, 0.39258035827722, 
0.383608274417387, 0.428025477707006, 0.894606073397688, 0.676375181171923, 
0.61130092209557, 0.662917402259458, 0.721156628737455, 0), `C18:0` = c(5.9276156206326, 
5.98219225803807, 6.68769974480681, 5.92197912863453, 5.80283912430669, 
6.29594472284156, 7.34595350019057, 7.16679759099241, 7.47734303912648, 
5.78753846324746, 6.04109405512498, 6.27590848490824, 4.98985294429608, 
5.00237298730162, 5.1362861474394), `C16:3` = c(0.403206374194792, 
0.370909239943441, 0.37495180704175, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0.735278519590535, 0.792925025176817, 0.805198563223814), 
    `C18:1 n7` = c(1.80238068464764, 1.74132666653005, 1.71588115086903, 
    1.80066682447912, 1.88713735262001, 1.76074527031502, 4.12209376191081, 
    1.87483634459283, 1.93994540491356, 1.98398151542418, 1.87079041707096, 
    1.87248417090705, 0, 0, 0), `C18:1 n9` = c(27.7549758236815, 
    28.0712719522941, 28.6276763862272, 27.7285839809662, 28.2589558449305, 
    27.3343782423453, 33.9620124507686, 32.1903639696256, 33.7972702456779, 
    26.6534105005128, 27.2949458615664, 27.4318073673335, 26.3711488918378, 
    26.3019597402447, 27.4201563846584), `C16:4` = c(0.573886363271273, 
    0.769482981208631, 0.641160528802089, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 1.33467370321571, 1.18417854124946, 1.29775362182088
    ), `C18:2n6` = c(3.87945575171483, 3.92323613188795, 3.76364054902548, 
    5.06577970719103, 4.80803921474305, 4.91224971853683, 4.02299580739423, 
    4.54372872479707, 4.82620564149227, 4.25678452805456, 4.36274171481002, 
    4.50373596601407, 3.68582075656259, 3.63703712278184, 3.63345851654746
    ), `C18:3n6` = c(0, 0, 0, 0.905148029417311, 0.827396443157383, 
    0.775293052826773, 0, 0, 0, 1.15750769264949, 0.964626308802215, 
    0.915665336322678, 0, 0, 0), `C18:3n3` = c(1.39194551091549, 
    1.40064345580852, 1.42919483512712, 1.39062192817394, 1.49845344211352, 
    1.30952118148303, 2.35357641976877, 2.76250327310814, 2.86515013648772, 
    1.60555129728989, 1.45587118828482, 1.43437088732944, 0.480600385140279, 
    0.330481890055447, 0), `C18:4n3` = c(2.79031662141974, 2.62710301440603, 
    2.63596063217863, 2.78766334591821, 3.05848057302014, 2.84686196163271, 
    3.28865455469445, 3.6770096884001, 0, 2.89488008353606, 2.86951967618437, 
    2.92704256386294, 2.95690624403374, 3.0131150956719, 2.97294407015292
    ), `C20:2NMID` = c(1.79675828500747, 1.67472694112584, 1.71023215942584, 
    1.79504977110507, 2.11996286336895, 2.19607496854235, 1.90318892135688, 
    1.98612202147159, 1.13848953594177, 1.9787975398333, 1.8651065920191, 
    1.86648261907722, 2.67759704521509, 2.73356561599278, 2.73515886945089
    ), `C20:4n6` = c(2.44574384347239, 2.52259267607943, 2.33232734210716, 
    2.44341821770896, 2.40762744069925, 2.4680456522219, 3.2867488247999, 
    2.10460853626604, 2.56669699727025, 2.57717643660924, 2.52118239800579, 
    2.55923317314906, 1.72829932517955, 1.7849494727338, 1.68840073725993
    ), `C20:5 n3` = c(11.6239096560698, 11.803520563946, 12.1488622225109, 
    11.612856632737, 10.2284480063113, 10.3507803704276, 5.22424088425867, 
    6.01139041633936, 5.9909008189263, 11.0396463047511, 10.9876458002623, 
    11.0702910323955, 8.99033849446449, 8.83272175855722, 9.467373731655
    ), `C22:2NMID` = c(1.02488313440748, 1.07686632922806, 1.07330837420614, 
    0, 0, 0, 0, 0, 0, 1.26711174799954, 1.08479860989879, 1.04255528929623, 
    0, 0, 0), `C22:6 n3` = c(9.0247546224157, 9.10981782413574, 
    8.63589566877704, 9.01617310153626, 10.1851540063786, 10.1423872491666, 
    8.27785541862533, 9.83438072793925, 8.96742493175615, 8.64316844588114, 
    9.17206968369513, 9.58190617859761, 9.10642269734904, 8.94905601407587, 
    9.02074015361679)), row.names = c(NA, -15L), class = c("tbl_df", 
"tbl", "data.frame"))
>str(FA)
tibble [15 x 23] (S3: tbl_df/tbl/data.frame)
 $ sample   : chr [1:15] "c1" "c2" "c3" "Zn10_1" ...
 $ C14:0    : num [1:15] 1.97 2.01 1.88 1.97 2.57 ...
 $ C14:1    : num [1:15] 1.07 1.23 1.01 1.07 1.5 ...
 $ C15:1    : num [1:15] 1.1 0.868 1.043 1.099 1.296 ...
 $ C16:0    : num [1:15] 13 13.4 12.4 13 12 ...
 $ C16:1 n7 : num [1:15] 0.605 0.537 0.568 0.604 0 ...
 $ C16:1 n9 : num [1:15] 5.03 5.12 4.81 5.03 4.99 ...
 $ C17:0    : num [1:15] 5.2 4.43 4.97 5.2 5.42 ...
 $ C16:2n4  : num [1:15] 0.621 0.428 0.584 0.62 0.241 ...
 $ C18:0    : num [1:15] 5.93 5.98 6.69 5.92 5.8 ...
 $ C16:3    : num [1:15] 0.403 0.371 0.375 0 0 ...
 $ C18:1 n7 : num [1:15] 1.8 1.74 1.72 1.8 1.89 ...
 $ C18:1 n9 : num [1:15] 27.8 28.1 28.6 27.7 28.3 ...
 $ C16:4    : num [1:15] 0.574 0.769 0.641 0 0 ...
 $ C18:2n6  : num [1:15] 3.88 3.92 3.76 5.07 4.81 ...
 $ C18:3n6  : num [1:15] 0 0 0 0.905 0.827 ...
 $ C18:3n3  : num [1:15] 1.39 1.4 1.43 1.39 1.5 ...
 $ C18:4n3  : num [1:15] 2.79 2.63 2.64 2.79 3.06 ...
 $ C20:2NMID: num [1:15] 1.8 1.67 1.71 1.8 2.12 ...
 $ C20:4n6  : num [1:15] 2.45 2.52 2.33 2.44 2.41 ...
 $ C20:5 n3 : num [1:15] 11.6 11.8 12.1 11.6 10.2 ...
 $ C22:2NMID: num [1:15] 1.02 1.08 1.07 0 0 ...
 $ C22:6 n3 : num [1:15] 9.02 9.11 8.64 9.02 10.19 ...

第一列是样本名称,其余列是每个样本各自的脂肪酸值,但每当我尝试 运行 PCA 时,就会发生这种情况:

> pca<-prcomp(FA, scale. = TRUE)
Error in colMeans(x, na.rm = TRUE) : 'x' must be numeric

我试过查看其他人的数据框,发现它们在非常相似的数据设置下工作得很好,所以老实说,我在这里迷路了...我无法更改第一列,因为它是样本名称和第一行总是有脂肪酸的名称,所以我想问是否有人知道我应该在这里做什么?预先感谢您的帮助。

您的第一列是一个字符,因此您不能对其进行任何 PCA:

sapply(FA,class)
     sample       C14:0       C14:1       C15:1       C16:0    C16:1 n7 
"character"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
   C16:1 n9       C17:0     C16:2n4       C18:0       C16:3    C18:1 n7 
  "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
   C18:1 n9       C16:4     C18:2n6     C18:3n6     C18:3n3     C18:4n3 
  "numeric"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
  C20:2NMID     C20:4n6    C20:5 n3   C22:2NMID    C22:6 n3 
  "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 

排除第一列,它应该有效:

pca = prcomp(FA[,-1])
plot(pca$x[,1:2],pch=20,cex=0.2)
text(pca$x[,1:2],labels=FA$sample,cex=0.7)

使用 ggplot2:

library(ggrepel)
ggplot(data.frame(pca$x,sample=FA$sample),aes(x=PC1,y=PC2)) + geom_point() + 
geom_text_repel(aes(label=sample))

我建议你只用数字“重命名”你的样本名称,然后只保存“翻译”。

  names <- c(1:15)

  newdataframe <- FA[2:23] 

  dfnames <- cbind(names,newdataframe)
  
  pcanames<-prcomp(dfnames, scale. = TRUE)