层次聚类 - 图上的聚类数
Hierarchical clustering - cluster number on the graph
我正在绘制图表,但我不想获取图表上的点,而是希望图表显示与这些点对应的集群。我制作的图表可以在附件中看到。因此,我希望它具有与簇对应的数字,而不是包含点的图形。我留下了另一张图片作为我或多或少想要的例子。此示例图表包含相应集群的编号。
我正在使用层次聚类。代码如下。
感谢您的帮助。
library(readxl)
complete_data <- read_excel('C:/Users/Jovani Souza/Word/Cluster/test2.xlsx')
nproperty<-dim(complete_data)[2]
coordinates<- complete_data [,1:2] #matrix containing latitude and longitude
d<-dist(coordinates)
fit.average<-hclust(d,method="average")
#########varying the value of k
mean<-matrix (nrow= nproperty-2,ncol=2)
standard_deviation<-matrix(nrow=npropriedades-2,ncol=2)
for(k in 2:nproperty-1){
clusters<-cutree(fit.average, k) # set the number of k clusters
nclusters<-matrix(table(clusters)) #to indicate how many properties are in each cluster
########inserting column with determination of clusters
complete_data$cluster <- clusters
#########calculate center of mass
mass_center<-matrix(nrow=k,ncol=2)
for(i in 1:k){
mass_center[i,]<-c(weighted.mean(subset(complete_data,cluster==i)$Latitude,subset(complete_data,cluster==i)$Production),
weighted.mean(subset(complete_data,cluster==i)$Longitude,subset(complete_data,cluster==i)$Production))}
coordinates$cluster<-clusters #including clusters index
mass_center<-cbind(mass_center,matrix(c(1:k),ncol=1)) #including clusters index
###############calculation of clusters coverage, considered as the largest distance between properties and center of mass
coverage<-matrix(nrow=k,ncol=1)
for(i in 1:k){
aux_dist<-pdist(rbind(subset(coordinates, cluster==i),mass_center[i,]))
coverage[i,]<-max(aux_dist[nclusters[i,1]+1,])}
###########Production sum of the clusters
sum_production<-matrix(nrow=k,ncol=1)
for(i in 1:k){
sum_production[i,]<-sum(subset(complete_data,cluster==i)["Production"])
}
###########mean of coverage and biogás
mean[k-1,]<-c(mean(coverage),mean(sum_production)) #ver como nomear colunas
standard_deviation[k-1,]<-c(sd(coverage),sd(sum_production))
}
colnames(mean)<-c("Coverage","Production")
colnames(standard_deviation)<-c("Coverage","Production")
plot(mean)
enter image description here
enter image description here
您没有提供最小的可重现示例,但下面是一些模拟数据来演示实现此目的的一种方法:
complete_data <- mtcars[, c("mpg", "disp")]
hc <- hclust(dist(complete_data), method="average")
vec <- factor(stats::cutree(hc, k=3))
with(complete_data, plot(disp ~ mpg, type = "n"))
with(complete_data, text(disp ~ mpg, label=vec, col=c("red", "green", "blue")[vec]))
编辑:根据请求:基于聚类仅绘制点的子集:
complete_data <- mtcars[, c("mpg", "disp")]
hc <- hclust(dist(complete_data), method="average")
complete_data$vec <- factor(stats::cutree(hc, k=10))
with(subset(complete_data, vec %in% 2:8), plot(disp ~ mpg, type = "n"))
with(subset(complete_data, vec %in% 2:8), text(disp ~ mpg, label=vec, col=c("red", "green", "blue")[vec]))
由 reprex package (v0.3.0)
于 2020-03-18 创建
我正在绘制图表,但我不想获取图表上的点,而是希望图表显示与这些点对应的集群。我制作的图表可以在附件中看到。因此,我希望它具有与簇对应的数字,而不是包含点的图形。我留下了另一张图片作为我或多或少想要的例子。此示例图表包含相应集群的编号。 我正在使用层次聚类。代码如下。 感谢您的帮助。
library(readxl)
complete_data <- read_excel('C:/Users/Jovani Souza/Word/Cluster/test2.xlsx')
nproperty<-dim(complete_data)[2]
coordinates<- complete_data [,1:2] #matrix containing latitude and longitude
d<-dist(coordinates)
fit.average<-hclust(d,method="average")
#########varying the value of k
mean<-matrix (nrow= nproperty-2,ncol=2)
standard_deviation<-matrix(nrow=npropriedades-2,ncol=2)
for(k in 2:nproperty-1){
clusters<-cutree(fit.average, k) # set the number of k clusters
nclusters<-matrix(table(clusters)) #to indicate how many properties are in each cluster
########inserting column with determination of clusters
complete_data$cluster <- clusters
#########calculate center of mass
mass_center<-matrix(nrow=k,ncol=2)
for(i in 1:k){
mass_center[i,]<-c(weighted.mean(subset(complete_data,cluster==i)$Latitude,subset(complete_data,cluster==i)$Production),
weighted.mean(subset(complete_data,cluster==i)$Longitude,subset(complete_data,cluster==i)$Production))}
coordinates$cluster<-clusters #including clusters index
mass_center<-cbind(mass_center,matrix(c(1:k),ncol=1)) #including clusters index
###############calculation of clusters coverage, considered as the largest distance between properties and center of mass
coverage<-matrix(nrow=k,ncol=1)
for(i in 1:k){
aux_dist<-pdist(rbind(subset(coordinates, cluster==i),mass_center[i,]))
coverage[i,]<-max(aux_dist[nclusters[i,1]+1,])}
###########Production sum of the clusters
sum_production<-matrix(nrow=k,ncol=1)
for(i in 1:k){
sum_production[i,]<-sum(subset(complete_data,cluster==i)["Production"])
}
###########mean of coverage and biogás
mean[k-1,]<-c(mean(coverage),mean(sum_production)) #ver como nomear colunas
standard_deviation[k-1,]<-c(sd(coverage),sd(sum_production))
}
colnames(mean)<-c("Coverage","Production")
colnames(standard_deviation)<-c("Coverage","Production")
plot(mean)
enter image description here
enter image description here
您没有提供最小的可重现示例,但下面是一些模拟数据来演示实现此目的的一种方法:
complete_data <- mtcars[, c("mpg", "disp")]
hc <- hclust(dist(complete_data), method="average")
vec <- factor(stats::cutree(hc, k=3))
with(complete_data, plot(disp ~ mpg, type = "n"))
with(complete_data, text(disp ~ mpg, label=vec, col=c("red", "green", "blue")[vec]))
编辑:根据请求:基于聚类仅绘制点的子集:
complete_data <- mtcars[, c("mpg", "disp")]
hc <- hclust(dist(complete_data), method="average")
complete_data$vec <- factor(stats::cutree(hc, k=10))
with(subset(complete_data, vec %in% 2:8), plot(disp ~ mpg, type = "n"))
with(subset(complete_data, vec %in% 2:8), text(disp ~ mpg, label=vec, col=c("red", "green", "blue")[vec]))
由 reprex package (v0.3.0)
于 2020-03-18 创建