SOM 映射 - R - Kohonen 包 - 聚类
SOM map - R - Kohonen pack - clustering
我在 R 中尝试 SOM 映射和聚类。我使用本教程:https://www.r-bloggers.com/self-organising-maps-for-customer-segmentation-using-r/ ... SOM 映射工作正常,但是当我尝试聚类时出现此错误:
> mydata <- som_model$codes
> wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var))
Error in apply(mydata, 2, var) : dim(X) must have a positive length.
我的代码:
require(kohonen)
data = matrix(
c(6, 6, 80, 280, 404, 0, 158, 158197, 158197233,
6, 13, 85, 280, 402, 0, 160, 160197, 160197233,
6, 13, 81, 283, 400, 0, 160, 160197, 160197233),
nrow=3,
ncol=9,
byrow = TRUE)
data_train <- data[, c(1,2,4,5,7,8,9)]
data_train_matrix <- as.matrix(scale(data_train))
som_grid <- somgrid(xdim = 2, ydim=1, topo="hexagonal")
som_model <- som(data_train_matrix,
grid=som_grid,
rlen=500,
alpha=c(0.05,0.01),
keep.data = TRUE )
#training proces
plot(som_model, type="changes")
#nodes
plot(som_model, type="count", main="Node Counts")
#heatmap
plot(som_model, type = "property", property = getCodes(som_model)[,4], main="Heat map - status")
mydata <- som_model$codes
wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var))
for (i in 2:15) {
wss[i] <- sum(kmeans(mydata, centers=i)$withinss)
}
plot(wss)
## use hierarchical clustering to cluster the codebook vectors
som_cluster <- cutree(hclust(dist(som_model$codes)), 6)
# plot these results:
plot(som_model, type="mapping", bgcol = pretty_palette[som_cluster], main = "Clusters")
add.cluster.boundaries(som_model, som_cluster)
它的编写方式与教程相同,那么教程怎么可能有效而这个不行呢?我是 R 的新手,所以我不明白这个错误。我知道矩阵可能有问题,但问题是什么?
您的代码有几个问题。首先,您从一个非常小的数据样本开始,在 2 x 1 网格上执行 SOM
,在 som_model$codes
中仅输出 2 行,然后对最多 15 个集群执行 kmeans。我将使用库 mlbench 中的 Sonar 数据集提供工作代码。我必须补充一点,我从未在实际数据分析中使用过 kohonen
库或 SOM
。
library(mlbench)
library(kohonen)
data(Sonar) #somewhat bigger data example
data_train <- Sonar[, 1:60] #use first 60 columns
data_train_matrix <- as.matrix(scale(data_train)) #scale data
som_grid <- somgrid(xdim = 5, ydim = 5, topo = "hexagonal") #initialize a bigger grid
som_model <- som(data_train_matrix,
grid = som_grid,
rlen = 500,
alpha = c(0.05,0.01),
keep.data = TRUE )
plot(som_model, type = "changes")
mydata <- som_model$codes[[1]] #extract the matrix containing codebook vectors
wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var))
for (i in 2:24) { #i must be less than 5*5 the grid size defined at the begining
wss[i] <- sum(kmeans(mydata, centers = i)$withinss)
}
plot(wss, type = "l")
让我们使用 8 个簇来切割三个:
som_cluster <- cutree(hclust(dist(mydata)), k = 8)
plot(som_model, type="mapping", bgcol = som_cluster, main = "Clusters")
add.cluster.boundaries(som_model, som_cluster)
你得到的错误是 - 我相信 - 因为 som_model$codes
是一个列表所以你不能在它上面使用 apply
,所以用 mydata
替换你的定义:
mydata <- som_model$codes[[1]]
,就像在最后添加 [[1]]
那么它应该可以工作
我在 R 中尝试 SOM 映射和聚类。我使用本教程:https://www.r-bloggers.com/self-organising-maps-for-customer-segmentation-using-r/ ... SOM 映射工作正常,但是当我尝试聚类时出现此错误:
> mydata <- som_model$codes
> wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var))
Error in apply(mydata, 2, var) : dim(X) must have a positive length.
我的代码:
require(kohonen)
data = matrix(
c(6, 6, 80, 280, 404, 0, 158, 158197, 158197233,
6, 13, 85, 280, 402, 0, 160, 160197, 160197233,
6, 13, 81, 283, 400, 0, 160, 160197, 160197233),
nrow=3,
ncol=9,
byrow = TRUE)
data_train <- data[, c(1,2,4,5,7,8,9)]
data_train_matrix <- as.matrix(scale(data_train))
som_grid <- somgrid(xdim = 2, ydim=1, topo="hexagonal")
som_model <- som(data_train_matrix,
grid=som_grid,
rlen=500,
alpha=c(0.05,0.01),
keep.data = TRUE )
#training proces
plot(som_model, type="changes")
#nodes
plot(som_model, type="count", main="Node Counts")
#heatmap
plot(som_model, type = "property", property = getCodes(som_model)[,4], main="Heat map - status")
mydata <- som_model$codes
wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var))
for (i in 2:15) {
wss[i] <- sum(kmeans(mydata, centers=i)$withinss)
}
plot(wss)
## use hierarchical clustering to cluster the codebook vectors
som_cluster <- cutree(hclust(dist(som_model$codes)), 6)
# plot these results:
plot(som_model, type="mapping", bgcol = pretty_palette[som_cluster], main = "Clusters")
add.cluster.boundaries(som_model, som_cluster)
它的编写方式与教程相同,那么教程怎么可能有效而这个不行呢?我是 R 的新手,所以我不明白这个错误。我知道矩阵可能有问题,但问题是什么?
您的代码有几个问题。首先,您从一个非常小的数据样本开始,在 2 x 1 网格上执行 SOM
,在 som_model$codes
中仅输出 2 行,然后对最多 15 个集群执行 kmeans。我将使用库 mlbench 中的 Sonar 数据集提供工作代码。我必须补充一点,我从未在实际数据分析中使用过 kohonen
库或 SOM
。
library(mlbench)
library(kohonen)
data(Sonar) #somewhat bigger data example
data_train <- Sonar[, 1:60] #use first 60 columns
data_train_matrix <- as.matrix(scale(data_train)) #scale data
som_grid <- somgrid(xdim = 5, ydim = 5, topo = "hexagonal") #initialize a bigger grid
som_model <- som(data_train_matrix,
grid = som_grid,
rlen = 500,
alpha = c(0.05,0.01),
keep.data = TRUE )
plot(som_model, type = "changes")
mydata <- som_model$codes[[1]] #extract the matrix containing codebook vectors
wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var))
for (i in 2:24) { #i must be less than 5*5 the grid size defined at the begining
wss[i] <- sum(kmeans(mydata, centers = i)$withinss)
}
plot(wss, type = "l")
让我们使用 8 个簇来切割三个:
som_cluster <- cutree(hclust(dist(mydata)), k = 8)
plot(som_model, type="mapping", bgcol = som_cluster, main = "Clusters")
add.cluster.boundaries(som_model, som_cluster)
你得到的错误是 - 我相信 - 因为 som_model$codes
是一个列表所以你不能在它上面使用 apply
,所以用 mydata
替换你的定义:
mydata <- som_model$codes[[1]]
,就像在最后添加 [[1]]
那么它应该可以工作