迭代嵌套聚类
Iterative nested clustering
我有这样一个数据集:
n = 50
g = 6
set.seed(g)
d <- data.frame(x = unlist(lapply(1:g, function(i) rnorm(n/g, runif(1)*i^2))),
y = unlist(lapply(1:g, function(i) rnorm(n/g, runif(1)*i^2))))
plot(d)
我可以这样聚类:
library(cluster)
library(dplyr)
knb <- clusGap(d, kmeans, 10, B = 10, verbose = interactive())
k <- maxSE(knb$Tab[, "gap"], knb$Tab[, "SE.sim"], method="Tibs2001SEmax")
d_dist <- dist(as.matrix(d)) # find distance matrix
plot(hclust(d_dist))
rect.hclust(hclust(d_dist), k=k)
j <- rect.hclust(hclust(d_dist), k=k)
集群组存储在j中,如:
j
[[1]]
[1] 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
[[2]]
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
如何在生成的两个集群上迭代重复该过程? (我不知道每一步会有多少簇,也不知道需要多少步才能达到clusGap函数给出的“无簇可能性”
非常感谢你的帮助,我被卡住了;)
这是我的问题的解决方案草案。它无疑coult/must得到改进。如果有谁能提出更好的工作流程,欢迎大家提出!
library(dplyr)
library(cluster)
n = 500
g = 30
set.seed(g)
d <- data.frame(x = unlist(lapply(1:g, function(i) rnorm(n/g, runif(1)*i^1.5))),
y = unlist(lapply(1:g, function(i) rnorm(n/g, runif(1)*i^1.5))))
rm(g,n)
plot(d)
flattenlist <- function(x){
morelists <- sapply(x, function(xprime) class(xprime)[1]=="list")
out <- c(x[!morelists], unlist(x[morelists], recursive=FALSE))
if(sum(morelists)){
Recall(out)
}else{
return(out)
}
}
clusters <- list()
for (zz in 1:2){
if(exists("j")){
for (i in 1:10000){
if(iteration == 1){
lg <- length(j)
for (z in 1:lg){
assign(paste0("subd"), d%>% filter(row.names(d) %in% j[[z]]))
try(knb <- clusGap(subd, kmeans, 10, B = 10, verbose = interactive()))
k <- maxSE(knb$Tab[, "gap"], knb$Tab[, "SE.sim"], method="Tibs2001SEmax")
if(k>1){
d_dist <- dist(as.matrix(subd)) # find distance matrix
plot(hclust(d_dist))
rect.hclust(hclust(d_dist), k=k)
assign(paste0("j", z), rect.hclust(hclust(d_dist), k=k))
}else{
clusters <- do.call(c, list(clusters, list(row.names(subd))))
}
if(z == lg){
gset <- ls(pat = "j")
gset <- gset[-1]
j <- do.call(c, list(mget(gset)))
j <- flattenlist(j)
iteration <- iteration + 1
rm(list=(gset))
}
rm(knb, subd, d_dist, k)
}
}else{
for(f in 1:10000){
#gset <- ls(pat = "j")
#gset <- gset[-1]
assign(paste0("subd"), d%>% filter(row.names(d) %in% j[[f]]))
try(knb <- clusGap(subd, kmeans, 10, B = 10, verbose = interactive()))
if (exists("knb")){
k <- maxSE(knb$Tab[, "gap"], knb$Tab[, "SE.sim"], method="Tibs2001SEmax")
if(k>1){
d_dist <- dist(as.matrix(subd)) # find distance matrix
plot(hclust(d_dist))
rect.hclust(hclust(d_dist), k=k)
assign(paste0("j", f), rect.hclust(hclust(d_dist), k=k))
tmp <- flattenlist(get(paste0("j", f)))
j <- do.call(c, list(j, tmp))
iteration <- iteration + 1
}else{
clusters <- do.call(c, list(clusters, list(row.names(subd))))
}
}else{
clusters <- do.call(c, list(clusters, list(row.names(subd))))
}
}
}
}
}else{
knb <- clusGap(d, kmeans, 10, B = 10, verbose = interactive())
k <- maxSE(knb$Tab[, "gap"], knb$Tab[, "SE.sim"], method="Tibs2001SEmax")
d_dist <- dist(as.matrix(d)) # find distance matrix
plot(hclust(d_dist))
rect.hclust(hclust(d_dist), k=k)
j <- rect.hclust(hclust(d_dist), k=k)
iteration <- c(1)
rm(k, d_dist, knb)
}
}
欢迎任何改进;)
我有这样一个数据集:
n = 50
g = 6
set.seed(g)
d <- data.frame(x = unlist(lapply(1:g, function(i) rnorm(n/g, runif(1)*i^2))),
y = unlist(lapply(1:g, function(i) rnorm(n/g, runif(1)*i^2))))
plot(d)
我可以这样聚类:
library(cluster)
library(dplyr)
knb <- clusGap(d, kmeans, 10, B = 10, verbose = interactive())
k <- maxSE(knb$Tab[, "gap"], knb$Tab[, "SE.sim"], method="Tibs2001SEmax")
d_dist <- dist(as.matrix(d)) # find distance matrix
plot(hclust(d_dist))
rect.hclust(hclust(d_dist), k=k)
j <- rect.hclust(hclust(d_dist), k=k)
集群组存储在j中,如:
j
[[1]]
[1] 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
[[2]]
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
如何在生成的两个集群上迭代重复该过程? (我不知道每一步会有多少簇,也不知道需要多少步才能达到clusGap函数给出的“无簇可能性”
非常感谢你的帮助,我被卡住了;)
这是我的问题的解决方案草案。它无疑coult/must得到改进。如果有谁能提出更好的工作流程,欢迎大家提出!
library(dplyr)
library(cluster)
n = 500
g = 30
set.seed(g)
d <- data.frame(x = unlist(lapply(1:g, function(i) rnorm(n/g, runif(1)*i^1.5))),
y = unlist(lapply(1:g, function(i) rnorm(n/g, runif(1)*i^1.5))))
rm(g,n)
plot(d)
flattenlist <- function(x){
morelists <- sapply(x, function(xprime) class(xprime)[1]=="list")
out <- c(x[!morelists], unlist(x[morelists], recursive=FALSE))
if(sum(morelists)){
Recall(out)
}else{
return(out)
}
}
clusters <- list()
for (zz in 1:2){
if(exists("j")){
for (i in 1:10000){
if(iteration == 1){
lg <- length(j)
for (z in 1:lg){
assign(paste0("subd"), d%>% filter(row.names(d) %in% j[[z]]))
try(knb <- clusGap(subd, kmeans, 10, B = 10, verbose = interactive()))
k <- maxSE(knb$Tab[, "gap"], knb$Tab[, "SE.sim"], method="Tibs2001SEmax")
if(k>1){
d_dist <- dist(as.matrix(subd)) # find distance matrix
plot(hclust(d_dist))
rect.hclust(hclust(d_dist), k=k)
assign(paste0("j", z), rect.hclust(hclust(d_dist), k=k))
}else{
clusters <- do.call(c, list(clusters, list(row.names(subd))))
}
if(z == lg){
gset <- ls(pat = "j")
gset <- gset[-1]
j <- do.call(c, list(mget(gset)))
j <- flattenlist(j)
iteration <- iteration + 1
rm(list=(gset))
}
rm(knb, subd, d_dist, k)
}
}else{
for(f in 1:10000){
#gset <- ls(pat = "j")
#gset <- gset[-1]
assign(paste0("subd"), d%>% filter(row.names(d) %in% j[[f]]))
try(knb <- clusGap(subd, kmeans, 10, B = 10, verbose = interactive()))
if (exists("knb")){
k <- maxSE(knb$Tab[, "gap"], knb$Tab[, "SE.sim"], method="Tibs2001SEmax")
if(k>1){
d_dist <- dist(as.matrix(subd)) # find distance matrix
plot(hclust(d_dist))
rect.hclust(hclust(d_dist), k=k)
assign(paste0("j", f), rect.hclust(hclust(d_dist), k=k))
tmp <- flattenlist(get(paste0("j", f)))
j <- do.call(c, list(j, tmp))
iteration <- iteration + 1
}else{
clusters <- do.call(c, list(clusters, list(row.names(subd))))
}
}else{
clusters <- do.call(c, list(clusters, list(row.names(subd))))
}
}
}
}
}else{
knb <- clusGap(d, kmeans, 10, B = 10, verbose = interactive())
k <- maxSE(knb$Tab[, "gap"], knb$Tab[, "SE.sim"], method="Tibs2001SEmax")
d_dist <- dist(as.matrix(d)) # find distance matrix
plot(hclust(d_dist))
rect.hclust(hclust(d_dist), k=k)
j <- rect.hclust(hclust(d_dist), k=k)
iteration <- c(1)
rm(k, d_dist, knb)
}
}
欢迎任何改进;)