双标图 princomp 和双标图中的颜色
Color in biplot princomp and biplot
如何在双标图中用不同的颜色给鸢尾花数据集的物种着色
使用 princomp 和双标图。
最佳
data(iris)
fit <- princomp(iris[,c(1:3)], cor=TRUE)
biplot(fit)
不幸的是它不支持它。您必须编写自己的双标图函数并添加为每个样本赋予不同颜色的可能性,来源相当直接。
https://github.com/SurajGupta/r-source/blob/master/src/library/stats/R/biplot.R
或者使用更现代的函数,例如 autoplot
autoplot( fit, data=iris, colour="Species", loadings=TRUE )
如下所示:
https://cran.r-project.org/web/packages/ggfortify/vignettes/plot_pca.html
用于绘制双标图的函数,stats:::biplot.princomp
和stats:::biplot.default
不允许不同点使用多种颜色或不同颜色。最简单的解决方案是使用一个包,例如另一个答案中提到的 ggfortify:
library(ggfortify)
autoplot( fit, data=iris, colour="Species", loadings=TRUE )
或factoextra
:
library(factoextra)
fviz_pca_biplot(fit, col.ind = iris$Species)
最后一个选项,是像下面这样重写双标图函数,col1
是数据点的颜色向量,col2
是载荷的颜色:
biplot_col = function (x, y, var.axes = TRUE,col1,col2, cex = 0.8,
xlabs = NULL, ylabs = NULL, expand = 1, xlim = NULL, ylim = NULL,
arrow.len = 0.1, main = NULL, sub = NULL, xlab = NULL, ylab = NULL,
...)
{
n <- nrow(x)
p <- nrow(y)
xlabs <- as.character(1L:n)
dimnames(x) <- list(xlabs, dimnames(x)[[2L]])
ylabs <- dimnames(y)[[1L]]
ylabs <- as.character(ylabs)
dimnames(y) <- list(ylabs, dimnames(y)[[2L]])
unsigned.range <- function(x) c(-abs(min(x, na.rm = TRUE)),
abs(max(x, na.rm = TRUE)))
rangx1 <- unsigned.range(x[, 1L])
rangx2 <- unsigned.range(x[, 2L])
rangy1 <- unsigned.range(y[, 1L])
rangy2 <- unsigned.range(y[, 2L])
if (missing(xlim) && missing(ylim))
xlim <- ylim <- rangx1 <- rangx2 <- range(rangx1, rangx2)
else if (missing(xlim))
xlim <- rangx1
else if (missing(ylim))
ylim <- rangx2
ratio <- max(rangy1/rangx1, rangy2/rangx2)/expand
on.exit(par(op))
op <- par(pty = "s")
if (!is.null(main))
op <- c(op, par(mar = par("mar") + c(0, 0, 1, 0)))
plot(x, type = "n", xlim = xlim, ylim = ylim, col = col1,
xlab = xlab, ylab = ylab, sub = sub, main = main, ...)
text(x, xlabs, cex = cex[1L], col = col1, ...)
par(new = TRUE)
dev.hold()
on.exit(dev.flush(), add = TRUE)
plot(y, axes = FALSE, type = "n", xlim = xlim * ratio, ylim = ylim *
ratio, xlab = "", ylab = "", col = col1, ...)
axis(3, col = col2, ...)
axis(4, col = col2, ...)
box(col = col1)
text(y, labels = ylabs, cex = cex[2L], col = col2, ...)
if (var.axes)
arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col2,
length = arrow.len)
invisible()
}
然后这样画:
lam <- fit$sdev[1:2]
lam <- lam * sqrt(fit$n.obs)
scores <- fit$scores
species2col = c("#c15050","#d97642","#d49d42")
names(species2col) = unique(iris$Species)
col1 = species2col[as.character(iris$Species)]
col2 = "#693c72"
par(mar=rep(2.2,4))
biplot_col(t(t(scores[,1:2])/lam), t(t(fit$loadings[,1:2]) * lam),
col1 = col1, col2 = col2)
如何在双标图中用不同的颜色给鸢尾花数据集的物种着色 使用 princomp 和双标图。 最佳
data(iris)
fit <- princomp(iris[,c(1:3)], cor=TRUE)
biplot(fit)
不幸的是它不支持它。您必须编写自己的双标图函数并添加为每个样本赋予不同颜色的可能性,来源相当直接。
https://github.com/SurajGupta/r-source/blob/master/src/library/stats/R/biplot.R
或者使用更现代的函数,例如 autoplot
autoplot( fit, data=iris, colour="Species", loadings=TRUE )
如下所示:
https://cran.r-project.org/web/packages/ggfortify/vignettes/plot_pca.html
用于绘制双标图的函数,stats:::biplot.princomp
和stats:::biplot.default
不允许不同点使用多种颜色或不同颜色。最简单的解决方案是使用一个包,例如另一个答案中提到的 ggfortify:
library(ggfortify)
autoplot( fit, data=iris, colour="Species", loadings=TRUE )
或factoextra
:
library(factoextra)
fviz_pca_biplot(fit, col.ind = iris$Species)
最后一个选项,是像下面这样重写双标图函数,col1
是数据点的颜色向量,col2
是载荷的颜色:
biplot_col = function (x, y, var.axes = TRUE,col1,col2, cex = 0.8,
xlabs = NULL, ylabs = NULL, expand = 1, xlim = NULL, ylim = NULL,
arrow.len = 0.1, main = NULL, sub = NULL, xlab = NULL, ylab = NULL,
...)
{
n <- nrow(x)
p <- nrow(y)
xlabs <- as.character(1L:n)
dimnames(x) <- list(xlabs, dimnames(x)[[2L]])
ylabs <- dimnames(y)[[1L]]
ylabs <- as.character(ylabs)
dimnames(y) <- list(ylabs, dimnames(y)[[2L]])
unsigned.range <- function(x) c(-abs(min(x, na.rm = TRUE)),
abs(max(x, na.rm = TRUE)))
rangx1 <- unsigned.range(x[, 1L])
rangx2 <- unsigned.range(x[, 2L])
rangy1 <- unsigned.range(y[, 1L])
rangy2 <- unsigned.range(y[, 2L])
if (missing(xlim) && missing(ylim))
xlim <- ylim <- rangx1 <- rangx2 <- range(rangx1, rangx2)
else if (missing(xlim))
xlim <- rangx1
else if (missing(ylim))
ylim <- rangx2
ratio <- max(rangy1/rangx1, rangy2/rangx2)/expand
on.exit(par(op))
op <- par(pty = "s")
if (!is.null(main))
op <- c(op, par(mar = par("mar") + c(0, 0, 1, 0)))
plot(x, type = "n", xlim = xlim, ylim = ylim, col = col1,
xlab = xlab, ylab = ylab, sub = sub, main = main, ...)
text(x, xlabs, cex = cex[1L], col = col1, ...)
par(new = TRUE)
dev.hold()
on.exit(dev.flush(), add = TRUE)
plot(y, axes = FALSE, type = "n", xlim = xlim * ratio, ylim = ylim *
ratio, xlab = "", ylab = "", col = col1, ...)
axis(3, col = col2, ...)
axis(4, col = col2, ...)
box(col = col1)
text(y, labels = ylabs, cex = cex[2L], col = col2, ...)
if (var.axes)
arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col2,
length = arrow.len)
invisible()
}
然后这样画:
lam <- fit$sdev[1:2]
lam <- lam * sqrt(fit$n.obs)
scores <- fit$scores
species2col = c("#c15050","#d97642","#d49d42")
names(species2col) = unique(iris$Species)
col1 = species2col[as.character(iris$Species)]
col2 = "#693c72"
par(mar=rep(2.2,4))
biplot_col(t(t(scores[,1:2])/lam), t(t(fit$loadings[,1:2]) * lam),
col1 = col1, col2 = col2)