将 SOMbrero 包中的集群和节点添加到训练数据中
add clusters and nodes from SOMbrero package to training data
我正在玩SOMbrero package. I would like to attach the cluster numbers created like so (taken from here):
my.sc <- superClass(iris.som, k=3)
SOM 节点的 X 和 Y 坐标到训练数据集。
在我使用 kohonen 包的一些代码中,我创建了这样的集群:
range01 <- function(x){(x-min(x))/(max(x)-min(x))}
ind <- sapply(SubsetData, is.numeric)
SubsetData[ind] <- lapply(SubsetData[ind], range01)
TrainingMatrix <- as.matrix(SubsetData)
GridDefinition <- somgrid(xdim = 4, ydim = 4, topo = "rectangular", toroidal = FALSE)
SomModel <- som(
data = TrainingMatrix,
grid = GridDefinition,
rlen = 10000,
alpha = c(0.05, 0.01),
keep.data = TRUE
)
nb <- table(SomModel$unit.classif)
groups = 5
tree.hc = cutree(hclust(d=dist(SomModel$codes[[1]]),method="ward.D2",members=nb),groups)
plot(SomModel, type="codes", bgcol=rainbow(groups)[tree.hc])
add.cluster.boundaries(SomModel, tree.hc)
result <- OrginalData
result$Cluster <- tree.hc[SomModel$unit.classif]
result$X <- SomModel$grid$pts[SomModel$unit.classif,"x"]
result$Y <- SomModel$grid$pts[SomModel$unit.classif,"y"]
write.table(result, file = "FinalData.csv", sep = ",", col.names = NA, quote = FALSE)
PS:
可以找到一些使用 iris 数据集的示例代码 here。
PPS:
我试了一下上面引用的 iris 代码,认为我已经设法提取了集群、节点 ID 和原型(请参见下面的代码)。缺少的是坐标 X 和 Y。我认为它们在这里:
iris.som$parameters$the.grid$coord
代码:
library(SOMbrero)
set.seed(100)
setwd("D:\RProjects\Clustering")
#iris.som <- trainSOM(x.data=iris[,1:4],dimension=c(10,10), maxit=100000, scaling="unitvar", radius.type="gaussian")
iris.som <- trainSOM(x.data=iris[,1:4],dimension=c(3,3), maxit=100000, scaling="unitvar", radius.type="gaussian")
# perform a hierarchical clustering
## with 3 super clusters
iris.sc <- superClass(iris.som, k=3)
summary(iris.sc)
# compute the projection quality indicators
quality(iris.som)
iris1 <- iris
iris1$Cluster = iris.sc$cluster[iris.sc$som$clustering]
iris1$Node = iris.sc$som$clustering
iris1$Pt1Sepal.Length = iris.sc$som$prototypes[iris.sc$som$clustering,1]
iris1$Pt2Sepal.Width = iris.sc$som$prototypes[iris.sc$som$clustering,2]
iris1$Pt3Petal.Length = iris.sc$som$prototypes[iris.sc$som$clustering,3]
iris1$Pt4Petal.Width = iris.sc$som$prototypes[iris.sc$som$clustering,4]
write.table(iris1, file = "Iris.csv", sep = ",", col.names = NA, quote = FALSE)
我不确定我是否做对了但是:
iris.som$parameters$the.grid
包含簇的坐标(它是一个两列数组,在映射 space 中具有 x 和 y 坐标)
所以我认为你想要做的是
out.grid <- iris.som$parameters$the.grid$coord
out.grid$sc <- iris.sc$clustering
并导出 out.grid(三列数组)。 iris.sc$som$prototypes
包含集群原型的坐标,但在原始 space 中(四维 space,其中 iris
数据集采用其值。
我想我已经使用 iris 示例解决了这个问题(请 correct/improve 代码!- 我不精通 R):
library(SOMbrero)
set.seed(100)
setwd("D:\RProjects\SomBreroClustering")
iris.som <- trainSOM(x.data=iris[,1:4],dimension=c(5,5), maxit=10000, scaling="unitvar", radius.type="letremy")
# perform a hierarchical clustering
# with 3 super clusters
iris.sc <- superClass(iris.som, k=3)
summary(iris.sc)
# compute the projection quality indicators
quality(iris.som)
iris1 <- iris
iris1$Cluster = iris.sc$cluster[iris.sc$som$clustering]
iris1$Node = iris.sc$som$clustering
iris1$Pt1Sepal.Length = iris.sc$som$prototypes[iris.sc$som$clustering,1]
iris1$Pt2Sepal.Width = iris.sc$som$prototypes[iris.sc$som$clustering,2]
iris1$Pt3Petal.Length = iris.sc$som$prototypes[iris.sc$som$clustering,3]
iris1$Pt4Petal.Width = iris.sc$som$prototypes[iris.sc$som$clustering,4]
iris1$X = iris.som$parameters$the.grid$coord[iris.sc$som$clustering,1]
iris1$Y = iris.som$parameters$the.grid$coord[iris.sc$som$clustering,2]
write.table(iris1, file = "Iris.csv", sep = ",", col.names = NA, quote = FALSE)
I think my answer captures the requirements. Adding the node ids, x +
y coordinates, cluster and prototypes to the original data. Would you
agree.
是:)
我正在玩SOMbrero package. I would like to attach the cluster numbers created like so (taken from here):
my.sc <- superClass(iris.som, k=3)
SOM 节点的 X 和 Y 坐标到训练数据集。
在我使用 kohonen 包的一些代码中,我创建了这样的集群:
range01 <- function(x){(x-min(x))/(max(x)-min(x))}
ind <- sapply(SubsetData, is.numeric)
SubsetData[ind] <- lapply(SubsetData[ind], range01)
TrainingMatrix <- as.matrix(SubsetData)
GridDefinition <- somgrid(xdim = 4, ydim = 4, topo = "rectangular", toroidal = FALSE)
SomModel <- som(
data = TrainingMatrix,
grid = GridDefinition,
rlen = 10000,
alpha = c(0.05, 0.01),
keep.data = TRUE
)
nb <- table(SomModel$unit.classif)
groups = 5
tree.hc = cutree(hclust(d=dist(SomModel$codes[[1]]),method="ward.D2",members=nb),groups)
plot(SomModel, type="codes", bgcol=rainbow(groups)[tree.hc])
add.cluster.boundaries(SomModel, tree.hc)
result <- OrginalData
result$Cluster <- tree.hc[SomModel$unit.classif]
result$X <- SomModel$grid$pts[SomModel$unit.classif,"x"]
result$Y <- SomModel$grid$pts[SomModel$unit.classif,"y"]
write.table(result, file = "FinalData.csv", sep = ",", col.names = NA, quote = FALSE)
PS:
可以找到一些使用 iris 数据集的示例代码 here。
PPS:
我试了一下上面引用的 iris 代码,认为我已经设法提取了集群、节点 ID 和原型(请参见下面的代码)。缺少的是坐标 X 和 Y。我认为它们在这里:
iris.som$parameters$the.grid$coord
代码:
library(SOMbrero)
set.seed(100)
setwd("D:\RProjects\Clustering")
#iris.som <- trainSOM(x.data=iris[,1:4],dimension=c(10,10), maxit=100000, scaling="unitvar", radius.type="gaussian")
iris.som <- trainSOM(x.data=iris[,1:4],dimension=c(3,3), maxit=100000, scaling="unitvar", radius.type="gaussian")
# perform a hierarchical clustering
## with 3 super clusters
iris.sc <- superClass(iris.som, k=3)
summary(iris.sc)
# compute the projection quality indicators
quality(iris.som)
iris1 <- iris
iris1$Cluster = iris.sc$cluster[iris.sc$som$clustering]
iris1$Node = iris.sc$som$clustering
iris1$Pt1Sepal.Length = iris.sc$som$prototypes[iris.sc$som$clustering,1]
iris1$Pt2Sepal.Width = iris.sc$som$prototypes[iris.sc$som$clustering,2]
iris1$Pt3Petal.Length = iris.sc$som$prototypes[iris.sc$som$clustering,3]
iris1$Pt4Petal.Width = iris.sc$som$prototypes[iris.sc$som$clustering,4]
write.table(iris1, file = "Iris.csv", sep = ",", col.names = NA, quote = FALSE)
我不确定我是否做对了但是:
iris.som$parameters$the.grid
包含簇的坐标(它是一个两列数组,在映射 space 中具有 x 和 y 坐标)所以我认为你想要做的是
out.grid <- iris.som$parameters$the.grid$coord out.grid$sc <- iris.sc$clustering
并导出 out.grid(三列数组)。 iris.sc$som$prototypes
包含集群原型的坐标,但在原始 space 中(四维 space,其中 iris
数据集采用其值。
我想我已经使用 iris 示例解决了这个问题(请 correct/improve 代码!- 我不精通 R):
library(SOMbrero)
set.seed(100)
setwd("D:\RProjects\SomBreroClustering")
iris.som <- trainSOM(x.data=iris[,1:4],dimension=c(5,5), maxit=10000, scaling="unitvar", radius.type="letremy")
# perform a hierarchical clustering
# with 3 super clusters
iris.sc <- superClass(iris.som, k=3)
summary(iris.sc)
# compute the projection quality indicators
quality(iris.som)
iris1 <- iris
iris1$Cluster = iris.sc$cluster[iris.sc$som$clustering]
iris1$Node = iris.sc$som$clustering
iris1$Pt1Sepal.Length = iris.sc$som$prototypes[iris.sc$som$clustering,1]
iris1$Pt2Sepal.Width = iris.sc$som$prototypes[iris.sc$som$clustering,2]
iris1$Pt3Petal.Length = iris.sc$som$prototypes[iris.sc$som$clustering,3]
iris1$Pt4Petal.Width = iris.sc$som$prototypes[iris.sc$som$clustering,4]
iris1$X = iris.som$parameters$the.grid$coord[iris.sc$som$clustering,1]
iris1$Y = iris.som$parameters$the.grid$coord[iris.sc$som$clustering,2]
write.table(iris1, file = "Iris.csv", sep = ",", col.names = NA, quote = FALSE)
I think my answer captures the requirements. Adding the node ids, x + y coordinates, cluster and prototypes to the original data. Would you agree.
是:)