R:PCA ggplot 错误 "arguments imply differing number of rows"
R: PCA ggplot Error "arguments imply differing number of rows"
我有一个数据集:
https://docs.google.com/spreadsheets/d/1ZgyRQ2uTw-MjjkJgWCIiZ1vpnxKmF3o15a5awndttgo/edit?usp=sharing
我正在尝试应用 PCA 分析并根据此 post 中提供的图表获得图表:
但是,错误似乎并没有消失:
Error in (function (..., row.names = NULL, check.rows = FALSE, check.names =
TRUE, :
arguments imply differing number of rows: 0, 1006
以下是我无法找到错误源的代码。想对错误检测有一些帮助。有什么提示吗?
目标是生成按 Happiness.in.life 级别分组的 PCA 图。我修改了原始代码以适合我的数据集。最初,group 由 Genders 确定,它有 2 个级别。我正在尝试做的是基于 Happiness.in.life 的 5 个级别构建一个图表。但是,我似乎不能使用旧代码...
谢谢!
library(magrittr)
library(dplyr)
library(tidyr)
df <- happiness_reduced %>% dplyr::select(Happiness.in.life:Internet.usage, Happiness.in.life)
head(df)
vars_on_hap <- df %>% dplyr::select(-Happiness.in.life)
head(vars_on_hap)
group<-df$Happiness.in.life
fit <- prcomp(vars_on_hap)
pcData <- data.frame(fit$x)
vPCs <- fit$rotation[, c("PC1", "PC2")] %>% as.data.frame()
multiple <- min(
(max(pcData[,"PC1"]) - min(pcData[,"PC1"]))/(max(vPCs[,"PC1"])-
min(vPCs[,"PC1"])),
(max(pcData[,"PC2"]) - min(pcData[,"PC2"]))/(max(vPCs[,"PC2"])-
min(vPCs[,"PC2"]))
)
ggplot(pcData, aes(x=PC1, y=PC2)) +
geom_point(aes(colour=groups)) +
coord_equal() +
geom_text(data=vPCs,
aes(x = fit$rotation[, "PC1"]*multiple*0.82,
y = fit$rotation[,"PC2"]*multiple*0.82,
label=rownames(fit$rotation)),
size = 2, vjust=1, color="black") +
geom_segment(data=vPCs,
aes(x = 0,
y = 0,
xend = fit$rotation[,"PC1"]*multiple*0.8,
yend = fit$rotation[,"PC2"]*multiple*0.8),
arrow = arrow(length = unit(.2, 'cm')),
color = "grey30")
以下是如何在 ggplot2 中绘制 PCA 结果的方法:
library(tidyverse)
library(ggrepel)
一个好主意(不是在所有情况下,例如,如果它们都在相同的单位)是在 PCA 之前缩放变量
hapiness %>% #this is the data from google drive. In the future try not top post such links on SO because they tend to be unusable after some time has passed
select(-Happiness.in.life) %>%
prcomp(center = TRUE, scale. = TRUE) -> fit
现在我们可以继续绘制拟合:
fit$x %>% #coordinates of the points are in x element
as.data.frame()%>% #convert matrix to data frame
select(PC1, PC2) %>% #select the first two PC
bind_cols(hapiness = as.factor(hapiness$Happiness.in.life)) %>% #add the coloring variable
ggplot() +
geom_point(aes(x = PC1, y = PC2, colour = hapiness)) + #plot points and color
geom_segment(data = fit$rotation %>% #data we want plotted by geom_segment is in rotation element
as.data.frame()%>%
select(PC1, PC2) %>%
rownames_to_column(), #get to row names so you can label after
aes(x = 0, y = 0, xend = PC1 * 7, yend = PC2* 7, group = rowname), #I scaled the rotation by 7 so it fits in the plot nicely
arrow = arrow(angle = 20, type = "closed", ends = "last",length = unit(0.2,"cm")),
color = "grey30") +
geom_text_repel(data = fit$rotation %>%
as.data.frame()%>%
select(PC1, PC2) %>%
rownames_to_column(),
aes(x = PC1*7,
y = PC2*7,
label = rowname)) +
coord_equal(ratio = fit$sdev[2]^2 / fit$sdev[1]^2) + #I like setting the ratio to the ratio of eigen values
xlab(paste("PC1", round(fit$sdev[1]^2/ sum(fit$sdev^2) *100, 2), "%")) +
ylab(paste("PC2", round(fit$sdev[2]^2/ sum(fit$sdev^2) *100, 2), "%")) +
theme_bw()
看看左边所有快乐的人(因为使用的颜色很难注意到,我建议使用 ggpubr 库中的调色板 jco)get_palette('jco', 5)
即 scale_color_manual(values = get_palette('jco', 5))
使用库 ggord 可以实现非常相似的情节:
library(ggord)
ggord(fit, grp_in = as.factor(hapiness$Happiness.in.life),
size = 1, ellipse = F, ext = 1.2, vec_ext = 5)
主要区别在于 ggord 对轴使用等比例缩放。另外,我将旋转缩放了 5,而不是第一个图中的 7。
如您所见,我不喜欢很多中间数据帧。
我有一个数据集: https://docs.google.com/spreadsheets/d/1ZgyRQ2uTw-MjjkJgWCIiZ1vpnxKmF3o15a5awndttgo/edit?usp=sharing
我正在尝试应用 PCA 分析并根据此 post 中提供的图表获得图表:
但是,错误似乎并没有消失:
Error in (function (..., row.names = NULL, check.rows = FALSE, check.names =
TRUE, :
arguments imply differing number of rows: 0, 1006
以下是我无法找到错误源的代码。想对错误检测有一些帮助。有什么提示吗? 目标是生成按 Happiness.in.life 级别分组的 PCA 图。我修改了原始代码以适合我的数据集。最初,group 由 Genders 确定,它有 2 个级别。我正在尝试做的是基于 Happiness.in.life 的 5 个级别构建一个图表。但是,我似乎不能使用旧代码...
谢谢!
library(magrittr)
library(dplyr)
library(tidyr)
df <- happiness_reduced %>% dplyr::select(Happiness.in.life:Internet.usage, Happiness.in.life)
head(df)
vars_on_hap <- df %>% dplyr::select(-Happiness.in.life)
head(vars_on_hap)
group<-df$Happiness.in.life
fit <- prcomp(vars_on_hap)
pcData <- data.frame(fit$x)
vPCs <- fit$rotation[, c("PC1", "PC2")] %>% as.data.frame()
multiple <- min(
(max(pcData[,"PC1"]) - min(pcData[,"PC1"]))/(max(vPCs[,"PC1"])-
min(vPCs[,"PC1"])),
(max(pcData[,"PC2"]) - min(pcData[,"PC2"]))/(max(vPCs[,"PC2"])-
min(vPCs[,"PC2"]))
)
ggplot(pcData, aes(x=PC1, y=PC2)) +
geom_point(aes(colour=groups)) +
coord_equal() +
geom_text(data=vPCs,
aes(x = fit$rotation[, "PC1"]*multiple*0.82,
y = fit$rotation[,"PC2"]*multiple*0.82,
label=rownames(fit$rotation)),
size = 2, vjust=1, color="black") +
geom_segment(data=vPCs,
aes(x = 0,
y = 0,
xend = fit$rotation[,"PC1"]*multiple*0.8,
yend = fit$rotation[,"PC2"]*multiple*0.8),
arrow = arrow(length = unit(.2, 'cm')),
color = "grey30")
以下是如何在 ggplot2 中绘制 PCA 结果的方法:
library(tidyverse)
library(ggrepel)
一个好主意(不是在所有情况下,例如,如果它们都在相同的单位)是在 PCA 之前缩放变量
hapiness %>% #this is the data from google drive. In the future try not top post such links on SO because they tend to be unusable after some time has passed
select(-Happiness.in.life) %>%
prcomp(center = TRUE, scale. = TRUE) -> fit
现在我们可以继续绘制拟合:
fit$x %>% #coordinates of the points are in x element
as.data.frame()%>% #convert matrix to data frame
select(PC1, PC2) %>% #select the first two PC
bind_cols(hapiness = as.factor(hapiness$Happiness.in.life)) %>% #add the coloring variable
ggplot() +
geom_point(aes(x = PC1, y = PC2, colour = hapiness)) + #plot points and color
geom_segment(data = fit$rotation %>% #data we want plotted by geom_segment is in rotation element
as.data.frame()%>%
select(PC1, PC2) %>%
rownames_to_column(), #get to row names so you can label after
aes(x = 0, y = 0, xend = PC1 * 7, yend = PC2* 7, group = rowname), #I scaled the rotation by 7 so it fits in the plot nicely
arrow = arrow(angle = 20, type = "closed", ends = "last",length = unit(0.2,"cm")),
color = "grey30") +
geom_text_repel(data = fit$rotation %>%
as.data.frame()%>%
select(PC1, PC2) %>%
rownames_to_column(),
aes(x = PC1*7,
y = PC2*7,
label = rowname)) +
coord_equal(ratio = fit$sdev[2]^2 / fit$sdev[1]^2) + #I like setting the ratio to the ratio of eigen values
xlab(paste("PC1", round(fit$sdev[1]^2/ sum(fit$sdev^2) *100, 2), "%")) +
ylab(paste("PC2", round(fit$sdev[2]^2/ sum(fit$sdev^2) *100, 2), "%")) +
theme_bw()
看看左边所有快乐的人(因为使用的颜色很难注意到,我建议使用 ggpubr 库中的调色板 jco)get_palette('jco', 5)
即 scale_color_manual(values = get_palette('jco', 5))
使用库 ggord 可以实现非常相似的情节:
library(ggord)
ggord(fit, grp_in = as.factor(hapiness$Happiness.in.life),
size = 1, ellipse = F, ext = 1.2, vec_ext = 5)
主要区别在于 ggord 对轴使用等比例缩放。另外,我将旋转缩放了 5,而不是第一个图中的 7。
如您所见,我不喜欢很多中间数据帧。