在 R 中用 K-means 标记特定集群
Labeling a particular cluster in K-means in R
如果我只想标记集群 3 中的数据点,需要对代码进行哪些修改?
> library(datasets)
head(iris)
library(ggplot2)
ggplot(iris, aes(Petal.Length, Petal.Width, color = Species)) + geom_point()
set.seed(20)
irisCluster <- kmeans(iris[, 3:4], 3, nstart = 20)
irisCluster
table(irisCluster$cluster, iris$Species)
setosa versicolor virginica
irisCluster$cluster <- as.factor(irisCluster$cluster)
ggplot(iris, aes(Petal.Length, Petal.Width, color = irisCluster$cluster)) + geom_point()`
您可以将cluster
的标签设为空白,这不是3。您可能需要根据实际数据调整标签的位置。
library(dplyr)
library(ggplot2)
iris %>%
mutate(cluster = irisCluster$cluster,
label = replace(Petal.Length, cluster != 3, '')) %>%
ggplot() + aes(Petal.Length, Petal.Width, color = cluster, label = label) +
geom_point() + geom_text(vjust = -0.5, hjust = -0.4)
你的问题有点模棱两可,但如果你想突出显示特定集群中的点,你可以使用 gghighlight 包,例如
library(datasets)
library(ggplot2)
#install.packages("gghighlight")
library(gghighlight)
set.seed(20)
irisCluster <- kmeans(iris[, 3:4], 3, nstart = 20)
irisCluster
table(irisCluster$cluster, iris$Species)
iris$cluster <- as.factor(irisCluster$cluster)
ggplot(iris, aes(Petal.Length, Petal.Width, color = factor(cluster))) +
geom_point() +
gghighlight(cluster == 3, keep_scales = TRUE)
如果我只想标记集群 3 中的数据点,需要对代码进行哪些修改?
> library(datasets)
head(iris)
library(ggplot2)
ggplot(iris, aes(Petal.Length, Petal.Width, color = Species)) + geom_point()
set.seed(20)
irisCluster <- kmeans(iris[, 3:4], 3, nstart = 20)
irisCluster
table(irisCluster$cluster, iris$Species)
setosa versicolor virginica
irisCluster$cluster <- as.factor(irisCluster$cluster)
ggplot(iris, aes(Petal.Length, Petal.Width, color = irisCluster$cluster)) + geom_point()`
您可以将cluster
的标签设为空白,这不是3。您可能需要根据实际数据调整标签的位置。
library(dplyr)
library(ggplot2)
iris %>%
mutate(cluster = irisCluster$cluster,
label = replace(Petal.Length, cluster != 3, '')) %>%
ggplot() + aes(Petal.Length, Petal.Width, color = cluster, label = label) +
geom_point() + geom_text(vjust = -0.5, hjust = -0.4)
你的问题有点模棱两可,但如果你想突出显示特定集群中的点,你可以使用 gghighlight 包,例如
library(datasets)
library(ggplot2)
#install.packages("gghighlight")
library(gghighlight)
set.seed(20)
irisCluster <- kmeans(iris[, 3:4], 3, nstart = 20)
irisCluster
table(irisCluster$cluster, iris$Species)
iris$cluster <- as.factor(irisCluster$cluster)
ggplot(iris, aes(Petal.Length, Petal.Width, color = factor(cluster))) +
geom_point() +
gghighlight(cluster == 3, keep_scales = TRUE)