使用 prcomp 在 R 上进行 PCA(主成分分析)时出现问题
Trouble with a PCA (Principal Components Analysis) on R using prcomp
我想问是否有人可以帮助我开始对我的数据进行 PCA,这是代码结构:
> dput(FA)
structure(list(sample = c("c1", "c2", "c3", "Zn10_1", "Zn10_2",
"Zn10_3", "nZn10_1", "nZn10_2", "nZn10_3", "Zn100_1", "Zn100_2",
"Zn100_3", "nZn100_1", "nZn100_2", "nZn100_3"), `C14:0` = c(1.97065107387833,
2.01336092952724, 1.87687740699995, 1.96877720760228, 2.56733419600637,
1.94573831651913, 1.76152966586202, 1.7930086410055, 2.15832575068244,
2.13913049917982, 2.0408991811232, 2.0521020435284, 2.34112963482385,
2.39787472942157, 2.29796121207546), `C14:1` = c(1.07066553147741,
1.22543494743745, 1.01470008798304, 1.06964744965716, 1.50326388655048,
0.956312501379721, 1.18981069749714, 1.1776643100288, 1.26951774340309,
1.30932412066814, 1.13108118532112, 1.09142506848199, 0.700040776785988,
0.758198381738416, 0.666805060169721), `C15:1` = c(1.09998232960113,
0.867845652574848, 1.04294504519899, 1.09893637082181, 1.29641477576113,
1.43005364356829, 0.796595095921738, 0.786200576067033, 0.917925386715196,
1.33635485053488, 1.16071827309155, 1.12271887445182, 5.83072684031764,
5.88385095324636, 5.76534752495801), `C16:0` = c(12.9881447687587,
13.4295784749687, 12.4334301664617, 12.9757945121389, 11.990994848014,
12.2921035784454, 11.9978401727862, 12.5936108929039, 13.4165605095541,
12.2975009534812, 12.3667853503469, 12.0978424421172, 16.6359858671902,
16.6797856208545, 16.5034252392006), `C16:1 n7` = c(0.604809561292188,
0.537408553453964, 0.568429763971015, 0.604234455807833, 0, 0.885670765359058,
0.508829881844746, 0.492275464781356, 0.609281164695178, 0.879794714566601,
0.660135681023738, 0.594153631153198, 0.619312168688614, 0.677169547048814,
0.586285203847339), `C16:1 n9` = c(5.03325247787184, 5.11895735568352,
4.81152846173726, 5.02846642406346, 4.99035505890389, 5.75421090973311,
3.37949434633465, 3.51597276774025, 3.95268425841674, 4.96291606032666,
5.13695988437476, 4.46386851457305, 5.3316236939054, 5.38552361990531,
5.26964715947335), `C17:0` = c(5.20071966715394, 4.43246787844013,
4.97195981872387, 5.19577437099033, 5.42425714711782, 5.49681008410782,
5.52661669419387, 6.39958104215763, 6.87461328480437, 5.11732447614075,
5.30625667341959, 5.4999935697659, 3.21382356615385, 3.26951347972543,
3.1648077902961), `C16:2n4` = c(0.620873560264092, 0.428287464907068,
0.583964490439788, 0.620283179733671, 0.240522221848077, 0, 0.39258035827722,
0.383608274417387, 0.428025477707006, 0.894606073397688, 0.676375181171923,
0.61130092209557, 0.662917402259458, 0.721156628737455, 0), `C18:0` = c(5.9276156206326,
5.98219225803807, 6.68769974480681, 5.92197912863453, 5.80283912430669,
6.29594472284156, 7.34595350019057, 7.16679759099241, 7.47734303912648,
5.78753846324746, 6.04109405512498, 6.27590848490824, 4.98985294429608,
5.00237298730162, 5.1362861474394), `C16:3` = c(0.403206374194792,
0.370909239943441, 0.37495180704175, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0.735278519590535, 0.792925025176817, 0.805198563223814),
`C18:1 n7` = c(1.80238068464764, 1.74132666653005, 1.71588115086903,
1.80066682447912, 1.88713735262001, 1.76074527031502, 4.12209376191081,
1.87483634459283, 1.93994540491356, 1.98398151542418, 1.87079041707096,
1.87248417090705, 0, 0, 0), `C18:1 n9` = c(27.7549758236815,
28.0712719522941, 28.6276763862272, 27.7285839809662, 28.2589558449305,
27.3343782423453, 33.9620124507686, 32.1903639696256, 33.7972702456779,
26.6534105005128, 27.2949458615664, 27.4318073673335, 26.3711488918378,
26.3019597402447, 27.4201563846584), `C16:4` = c(0.573886363271273,
0.769482981208631, 0.641160528802089, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1.33467370321571, 1.18417854124946, 1.29775362182088
), `C18:2n6` = c(3.87945575171483, 3.92323613188795, 3.76364054902548,
5.06577970719103, 4.80803921474305, 4.91224971853683, 4.02299580739423,
4.54372872479707, 4.82620564149227, 4.25678452805456, 4.36274171481002,
4.50373596601407, 3.68582075656259, 3.63703712278184, 3.63345851654746
), `C18:3n6` = c(0, 0, 0, 0.905148029417311, 0.827396443157383,
0.775293052826773, 0, 0, 0, 1.15750769264949, 0.964626308802215,
0.915665336322678, 0, 0, 0), `C18:3n3` = c(1.39194551091549,
1.40064345580852, 1.42919483512712, 1.39062192817394, 1.49845344211352,
1.30952118148303, 2.35357641976877, 2.76250327310814, 2.86515013648772,
1.60555129728989, 1.45587118828482, 1.43437088732944, 0.480600385140279,
0.330481890055447, 0), `C18:4n3` = c(2.79031662141974, 2.62710301440603,
2.63596063217863, 2.78766334591821, 3.05848057302014, 2.84686196163271,
3.28865455469445, 3.6770096884001, 0, 2.89488008353606, 2.86951967618437,
2.92704256386294, 2.95690624403374, 3.0131150956719, 2.97294407015292
), `C20:2NMID` = c(1.79675828500747, 1.67472694112584, 1.71023215942584,
1.79504977110507, 2.11996286336895, 2.19607496854235, 1.90318892135688,
1.98612202147159, 1.13848953594177, 1.9787975398333, 1.8651065920191,
1.86648261907722, 2.67759704521509, 2.73356561599278, 2.73515886945089
), `C20:4n6` = c(2.44574384347239, 2.52259267607943, 2.33232734210716,
2.44341821770896, 2.40762744069925, 2.4680456522219, 3.2867488247999,
2.10460853626604, 2.56669699727025, 2.57717643660924, 2.52118239800579,
2.55923317314906, 1.72829932517955, 1.7849494727338, 1.68840073725993
), `C20:5 n3` = c(11.6239096560698, 11.803520563946, 12.1488622225109,
11.612856632737, 10.2284480063113, 10.3507803704276, 5.22424088425867,
6.01139041633936, 5.9909008189263, 11.0396463047511, 10.9876458002623,
11.0702910323955, 8.99033849446449, 8.83272175855722, 9.467373731655
), `C22:2NMID` = c(1.02488313440748, 1.07686632922806, 1.07330837420614,
0, 0, 0, 0, 0, 0, 1.26711174799954, 1.08479860989879, 1.04255528929623,
0, 0, 0), `C22:6 n3` = c(9.0247546224157, 9.10981782413574,
8.63589566877704, 9.01617310153626, 10.1851540063786, 10.1423872491666,
8.27785541862533, 9.83438072793925, 8.96742493175615, 8.64316844588114,
9.17206968369513, 9.58190617859761, 9.10642269734904, 8.94905601407587,
9.02074015361679)), row.names = c(NA, -15L), class = c("tbl_df",
"tbl", "data.frame"))
>str(FA)
tibble [15 x 23] (S3: tbl_df/tbl/data.frame)
$ sample : chr [1:15] "c1" "c2" "c3" "Zn10_1" ...
$ C14:0 : num [1:15] 1.97 2.01 1.88 1.97 2.57 ...
$ C14:1 : num [1:15] 1.07 1.23 1.01 1.07 1.5 ...
$ C15:1 : num [1:15] 1.1 0.868 1.043 1.099 1.296 ...
$ C16:0 : num [1:15] 13 13.4 12.4 13 12 ...
$ C16:1 n7 : num [1:15] 0.605 0.537 0.568 0.604 0 ...
$ C16:1 n9 : num [1:15] 5.03 5.12 4.81 5.03 4.99 ...
$ C17:0 : num [1:15] 5.2 4.43 4.97 5.2 5.42 ...
$ C16:2n4 : num [1:15] 0.621 0.428 0.584 0.62 0.241 ...
$ C18:0 : num [1:15] 5.93 5.98 6.69 5.92 5.8 ...
$ C16:3 : num [1:15] 0.403 0.371 0.375 0 0 ...
$ C18:1 n7 : num [1:15] 1.8 1.74 1.72 1.8 1.89 ...
$ C18:1 n9 : num [1:15] 27.8 28.1 28.6 27.7 28.3 ...
$ C16:4 : num [1:15] 0.574 0.769 0.641 0 0 ...
$ C18:2n6 : num [1:15] 3.88 3.92 3.76 5.07 4.81 ...
$ C18:3n6 : num [1:15] 0 0 0 0.905 0.827 ...
$ C18:3n3 : num [1:15] 1.39 1.4 1.43 1.39 1.5 ...
$ C18:4n3 : num [1:15] 2.79 2.63 2.64 2.79 3.06 ...
$ C20:2NMID: num [1:15] 1.8 1.67 1.71 1.8 2.12 ...
$ C20:4n6 : num [1:15] 2.45 2.52 2.33 2.44 2.41 ...
$ C20:5 n3 : num [1:15] 11.6 11.8 12.1 11.6 10.2 ...
$ C22:2NMID: num [1:15] 1.02 1.08 1.07 0 0 ...
$ C22:6 n3 : num [1:15] 9.02 9.11 8.64 9.02 10.19 ...
第一列是样本名称,其余列是每个样本各自的脂肪酸值,但每当我尝试 运行 PCA 时,就会发生这种情况:
> pca<-prcomp(FA, scale. = TRUE)
Error in colMeans(x, na.rm = TRUE) : 'x' must be numeric
我试过查看其他人的数据框,发现它们在非常相似的数据设置下工作得很好,所以老实说,我在这里迷路了...我无法更改第一列,因为它是样本名称和第一行总是有脂肪酸的名称,所以我想问是否有人知道我应该在这里做什么?预先感谢您的帮助。
您的第一列是一个字符,因此您不能对其进行任何 PCA:
sapply(FA,class)
sample C14:0 C14:1 C15:1 C16:0 C16:1 n7
"character" "numeric" "numeric" "numeric" "numeric" "numeric"
C16:1 n9 C17:0 C16:2n4 C18:0 C16:3 C18:1 n7
"numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
C18:1 n9 C16:4 C18:2n6 C18:3n6 C18:3n3 C18:4n3
"numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
C20:2NMID C20:4n6 C20:5 n3 C22:2NMID C22:6 n3
"numeric" "numeric" "numeric" "numeric" "numeric"
排除第一列,它应该有效:
pca = prcomp(FA[,-1])
plot(pca$x[,1:2],pch=20,cex=0.2)
text(pca$x[,1:2],labels=FA$sample,cex=0.7)
使用 ggplot2:
library(ggrepel)
ggplot(data.frame(pca$x,sample=FA$sample),aes(x=PC1,y=PC2)) + geom_point() +
geom_text_repel(aes(label=sample))
我建议你只用数字“重命名”你的样本名称,然后只保存“翻译”。
names <- c(1:15)
newdataframe <- FA[2:23]
dfnames <- cbind(names,newdataframe)
pcanames<-prcomp(dfnames, scale. = TRUE)
我想问是否有人可以帮助我开始对我的数据进行 PCA,这是代码结构:
> dput(FA)
structure(list(sample = c("c1", "c2", "c3", "Zn10_1", "Zn10_2",
"Zn10_3", "nZn10_1", "nZn10_2", "nZn10_3", "Zn100_1", "Zn100_2",
"Zn100_3", "nZn100_1", "nZn100_2", "nZn100_3"), `C14:0` = c(1.97065107387833,
2.01336092952724, 1.87687740699995, 1.96877720760228, 2.56733419600637,
1.94573831651913, 1.76152966586202, 1.7930086410055, 2.15832575068244,
2.13913049917982, 2.0408991811232, 2.0521020435284, 2.34112963482385,
2.39787472942157, 2.29796121207546), `C14:1` = c(1.07066553147741,
1.22543494743745, 1.01470008798304, 1.06964744965716, 1.50326388655048,
0.956312501379721, 1.18981069749714, 1.1776643100288, 1.26951774340309,
1.30932412066814, 1.13108118532112, 1.09142506848199, 0.700040776785988,
0.758198381738416, 0.666805060169721), `C15:1` = c(1.09998232960113,
0.867845652574848, 1.04294504519899, 1.09893637082181, 1.29641477576113,
1.43005364356829, 0.796595095921738, 0.786200576067033, 0.917925386715196,
1.33635485053488, 1.16071827309155, 1.12271887445182, 5.83072684031764,
5.88385095324636, 5.76534752495801), `C16:0` = c(12.9881447687587,
13.4295784749687, 12.4334301664617, 12.9757945121389, 11.990994848014,
12.2921035784454, 11.9978401727862, 12.5936108929039, 13.4165605095541,
12.2975009534812, 12.3667853503469, 12.0978424421172, 16.6359858671902,
16.6797856208545, 16.5034252392006), `C16:1 n7` = c(0.604809561292188,
0.537408553453964, 0.568429763971015, 0.604234455807833, 0, 0.885670765359058,
0.508829881844746, 0.492275464781356, 0.609281164695178, 0.879794714566601,
0.660135681023738, 0.594153631153198, 0.619312168688614, 0.677169547048814,
0.586285203847339), `C16:1 n9` = c(5.03325247787184, 5.11895735568352,
4.81152846173726, 5.02846642406346, 4.99035505890389, 5.75421090973311,
3.37949434633465, 3.51597276774025, 3.95268425841674, 4.96291606032666,
5.13695988437476, 4.46386851457305, 5.3316236939054, 5.38552361990531,
5.26964715947335), `C17:0` = c(5.20071966715394, 4.43246787844013,
4.97195981872387, 5.19577437099033, 5.42425714711782, 5.49681008410782,
5.52661669419387, 6.39958104215763, 6.87461328480437, 5.11732447614075,
5.30625667341959, 5.4999935697659, 3.21382356615385, 3.26951347972543,
3.1648077902961), `C16:2n4` = c(0.620873560264092, 0.428287464907068,
0.583964490439788, 0.620283179733671, 0.240522221848077, 0, 0.39258035827722,
0.383608274417387, 0.428025477707006, 0.894606073397688, 0.676375181171923,
0.61130092209557, 0.662917402259458, 0.721156628737455, 0), `C18:0` = c(5.9276156206326,
5.98219225803807, 6.68769974480681, 5.92197912863453, 5.80283912430669,
6.29594472284156, 7.34595350019057, 7.16679759099241, 7.47734303912648,
5.78753846324746, 6.04109405512498, 6.27590848490824, 4.98985294429608,
5.00237298730162, 5.1362861474394), `C16:3` = c(0.403206374194792,
0.370909239943441, 0.37495180704175, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0.735278519590535, 0.792925025176817, 0.805198563223814),
`C18:1 n7` = c(1.80238068464764, 1.74132666653005, 1.71588115086903,
1.80066682447912, 1.88713735262001, 1.76074527031502, 4.12209376191081,
1.87483634459283, 1.93994540491356, 1.98398151542418, 1.87079041707096,
1.87248417090705, 0, 0, 0), `C18:1 n9` = c(27.7549758236815,
28.0712719522941, 28.6276763862272, 27.7285839809662, 28.2589558449305,
27.3343782423453, 33.9620124507686, 32.1903639696256, 33.7972702456779,
26.6534105005128, 27.2949458615664, 27.4318073673335, 26.3711488918378,
26.3019597402447, 27.4201563846584), `C16:4` = c(0.573886363271273,
0.769482981208631, 0.641160528802089, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1.33467370321571, 1.18417854124946, 1.29775362182088
), `C18:2n6` = c(3.87945575171483, 3.92323613188795, 3.76364054902548,
5.06577970719103, 4.80803921474305, 4.91224971853683, 4.02299580739423,
4.54372872479707, 4.82620564149227, 4.25678452805456, 4.36274171481002,
4.50373596601407, 3.68582075656259, 3.63703712278184, 3.63345851654746
), `C18:3n6` = c(0, 0, 0, 0.905148029417311, 0.827396443157383,
0.775293052826773, 0, 0, 0, 1.15750769264949, 0.964626308802215,
0.915665336322678, 0, 0, 0), `C18:3n3` = c(1.39194551091549,
1.40064345580852, 1.42919483512712, 1.39062192817394, 1.49845344211352,
1.30952118148303, 2.35357641976877, 2.76250327310814, 2.86515013648772,
1.60555129728989, 1.45587118828482, 1.43437088732944, 0.480600385140279,
0.330481890055447, 0), `C18:4n3` = c(2.79031662141974, 2.62710301440603,
2.63596063217863, 2.78766334591821, 3.05848057302014, 2.84686196163271,
3.28865455469445, 3.6770096884001, 0, 2.89488008353606, 2.86951967618437,
2.92704256386294, 2.95690624403374, 3.0131150956719, 2.97294407015292
), `C20:2NMID` = c(1.79675828500747, 1.67472694112584, 1.71023215942584,
1.79504977110507, 2.11996286336895, 2.19607496854235, 1.90318892135688,
1.98612202147159, 1.13848953594177, 1.9787975398333, 1.8651065920191,
1.86648261907722, 2.67759704521509, 2.73356561599278, 2.73515886945089
), `C20:4n6` = c(2.44574384347239, 2.52259267607943, 2.33232734210716,
2.44341821770896, 2.40762744069925, 2.4680456522219, 3.2867488247999,
2.10460853626604, 2.56669699727025, 2.57717643660924, 2.52118239800579,
2.55923317314906, 1.72829932517955, 1.7849494727338, 1.68840073725993
), `C20:5 n3` = c(11.6239096560698, 11.803520563946, 12.1488622225109,
11.612856632737, 10.2284480063113, 10.3507803704276, 5.22424088425867,
6.01139041633936, 5.9909008189263, 11.0396463047511, 10.9876458002623,
11.0702910323955, 8.99033849446449, 8.83272175855722, 9.467373731655
), `C22:2NMID` = c(1.02488313440748, 1.07686632922806, 1.07330837420614,
0, 0, 0, 0, 0, 0, 1.26711174799954, 1.08479860989879, 1.04255528929623,
0, 0, 0), `C22:6 n3` = c(9.0247546224157, 9.10981782413574,
8.63589566877704, 9.01617310153626, 10.1851540063786, 10.1423872491666,
8.27785541862533, 9.83438072793925, 8.96742493175615, 8.64316844588114,
9.17206968369513, 9.58190617859761, 9.10642269734904, 8.94905601407587,
9.02074015361679)), row.names = c(NA, -15L), class = c("tbl_df",
"tbl", "data.frame"))
>str(FA)
tibble [15 x 23] (S3: tbl_df/tbl/data.frame)
$ sample : chr [1:15] "c1" "c2" "c3" "Zn10_1" ...
$ C14:0 : num [1:15] 1.97 2.01 1.88 1.97 2.57 ...
$ C14:1 : num [1:15] 1.07 1.23 1.01 1.07 1.5 ...
$ C15:1 : num [1:15] 1.1 0.868 1.043 1.099 1.296 ...
$ C16:0 : num [1:15] 13 13.4 12.4 13 12 ...
$ C16:1 n7 : num [1:15] 0.605 0.537 0.568 0.604 0 ...
$ C16:1 n9 : num [1:15] 5.03 5.12 4.81 5.03 4.99 ...
$ C17:0 : num [1:15] 5.2 4.43 4.97 5.2 5.42 ...
$ C16:2n4 : num [1:15] 0.621 0.428 0.584 0.62 0.241 ...
$ C18:0 : num [1:15] 5.93 5.98 6.69 5.92 5.8 ...
$ C16:3 : num [1:15] 0.403 0.371 0.375 0 0 ...
$ C18:1 n7 : num [1:15] 1.8 1.74 1.72 1.8 1.89 ...
$ C18:1 n9 : num [1:15] 27.8 28.1 28.6 27.7 28.3 ...
$ C16:4 : num [1:15] 0.574 0.769 0.641 0 0 ...
$ C18:2n6 : num [1:15] 3.88 3.92 3.76 5.07 4.81 ...
$ C18:3n6 : num [1:15] 0 0 0 0.905 0.827 ...
$ C18:3n3 : num [1:15] 1.39 1.4 1.43 1.39 1.5 ...
$ C18:4n3 : num [1:15] 2.79 2.63 2.64 2.79 3.06 ...
$ C20:2NMID: num [1:15] 1.8 1.67 1.71 1.8 2.12 ...
$ C20:4n6 : num [1:15] 2.45 2.52 2.33 2.44 2.41 ...
$ C20:5 n3 : num [1:15] 11.6 11.8 12.1 11.6 10.2 ...
$ C22:2NMID: num [1:15] 1.02 1.08 1.07 0 0 ...
$ C22:6 n3 : num [1:15] 9.02 9.11 8.64 9.02 10.19 ...
第一列是样本名称,其余列是每个样本各自的脂肪酸值,但每当我尝试 运行 PCA 时,就会发生这种情况:
> pca<-prcomp(FA, scale. = TRUE)
Error in colMeans(x, na.rm = TRUE) : 'x' must be numeric
我试过查看其他人的数据框,发现它们在非常相似的数据设置下工作得很好,所以老实说,我在这里迷路了...我无法更改第一列,因为它是样本名称和第一行总是有脂肪酸的名称,所以我想问是否有人知道我应该在这里做什么?预先感谢您的帮助。
您的第一列是一个字符,因此您不能对其进行任何 PCA:
sapply(FA,class)
sample C14:0 C14:1 C15:1 C16:0 C16:1 n7
"character" "numeric" "numeric" "numeric" "numeric" "numeric"
C16:1 n9 C17:0 C16:2n4 C18:0 C16:3 C18:1 n7
"numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
C18:1 n9 C16:4 C18:2n6 C18:3n6 C18:3n3 C18:4n3
"numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
C20:2NMID C20:4n6 C20:5 n3 C22:2NMID C22:6 n3
"numeric" "numeric" "numeric" "numeric" "numeric"
排除第一列,它应该有效:
pca = prcomp(FA[,-1])
plot(pca$x[,1:2],pch=20,cex=0.2)
text(pca$x[,1:2],labels=FA$sample,cex=0.7)
使用 ggplot2:
library(ggrepel)
ggplot(data.frame(pca$x,sample=FA$sample),aes(x=PC1,y=PC2)) + geom_point() +
geom_text_repel(aes(label=sample))
我建议你只用数字“重命名”你的样本名称,然后只保存“翻译”。
names <- c(1:15)
newdataframe <- FA[2:23]
dfnames <- cbind(names,newdataframe)
pcanames<-prcomp(dfnames, scale. = TRUE)