无法使用R按簇号生成词云

Can't generate word cloud by cluster number using R

我正在尝试按聚类生成词云,但它给出错误 x 必须是至少两个维度的数组,我正在使用 twitter 数据 -> 语料库 -> textminig -> 文档术语矩阵 -> kmeans 聚类 - > 每个集群的词云。

library(tm)
library(SnowballC)
library(XML)
library(streamR)
library(wordcloud)
library(NLP) 
library(fpc)
library(cluster)

tweetsDF <- parseTweets('tweetsStream.txt', simplify = FALSE)
names(tweetsDF)

corp = Corpus(VectorSource(tweetsDF$text))
inspect(corp[1:1])

corp = Corpus(VectorSource(corp))
dtm = DocumentTermMatrix(corp)
inspect(dtm)

tdm = TermDocumentMatrix(corp)

freq = colSums(as.matrix(dtm))
length(freq)

freq= sort(colSums(as.matrix(dtm)), decreasing = TRUE)
head(freq, 14)

d= dist(t(dtm), method="euclidian")
kfit <- kmeans(d, 2)
clusplot(as.matrix(d), kfit$cluster, color=T, shade=T, labels=2, lines=0)

docs1 = names(which(kfit$cluster ==2))
docs1 = as.matrix(docs1)
v1= sort(colSums((docs1)), decreasing= TRUE)

error x must be an array of at least two dimension

myNames1 = names(v1)
d1 = data.frame(word=myNames1, freq=v1)
wordcloud(d1$word, d1$freq, min.freq=2)

output of dput

你不是在聚类后收集词条数据来确定词云....

什么你应该是这样的:

library(slam)

docs1 <- which(kfit$cluster ==2)
head(docs1); length(docs1)
docs1 <- tdm[docs1, ]
head(docs1)
d1 <- data.frame(word=rownames(docs1), freq=row_sums(docs1))
head(d1)
d1 <- d1[order(d1$freq), ]
wordcloud(d1$word, d1$freq, min.freq=2)

最小示例:

使用一些内置数据,我完成了 kmeans 聚类并基于其中一个聚类生成了词云

library(tm)
library(wordcloud)
library(slam)

data("acq")

dtm = DocumentTermMatrix(acq)
inspect(dtm)

tdm <- TermDocumentMatrix(acq)

freq = colSums(as.matrix(dtm))
length(freq)

freq= sort(colSums(as.matrix(dtm)), decreasing = TRUE)
head(freq, 14)

d= dist(t(dtm), method="euclidian")
kfit <- kmeans(d, 2)
clusplot(as.matrix(d), kfit$cluster, color=T, shade=T, labels=2, lines=0)

docs1 <- which(kfit$cluster ==2)
head(docs1); length(docs1)
docs1 <- tdm[docs1, ]
inspect(docs1)
d1 <- data.frame(word=rownames(docs1), freq=row_sums(docs1))
head(d1)
d1 <- d1[order(d1$freq), ]
wordcloud(d1$word, d1$freq, min.freq=2)

附带说明:张贴您的 dput 声明的图片无济于事,因为我们无法使用它在我们的机器上生成您的数据。