R中基于另一列的列的Wordcloud
Wordcloud of a column in R based on another column
我正在 R 中使用 wordcloud,到目前为止我只在基本的东西上取得了成功,但是我想做的是我想显示特定位置的词云。例如,如果我有
这样的文字
TEXT LOCATION
True or false? link(#Addition, #Classification) NewYork,USA
Gene deFuser: detecting gene fusion events from protein sequences #bmc #bioinformatics Norwich,UK
Biologists do have a sense of humor, especially computational bio people France
Semantic Inference using #Chemogenomics Data for Drug Discovery London,UK
这是我使用的基本词云代码
library(tm)
library(SnowballC)
library(wordcloud)
DATA<-c('True or false? link(#Addition, #Classification) ','Gene deFuser: detecting gene fusion events from protein sequences #bmc #bioinformatics',' Biologists do have a sense of humor, especially computational bio people','Semantic Inference using #Chemogenomics Data for Drug Discovery')
Location<-c('NewYork,USA','Norwich,UK',' France','London,UK')
jeopQ<-data.frame(DATA,Location)
jeopCorpus <- Corpus(VectorSource(jeopQ$DATA))
jeopCorpus <- tm_map(jeopCorpus, content_transformer(tolower))
jeopCorpus <- tm_map(jeopCorpus, removePunctuation)
jeopCorpus <- tm_map(jeopCorpus, PlainTextDocument)
jeopCorpus <- tm_map(jeopCorpus, removeNumbers)
jeopCorpus <- tm_map(jeopCorpus, removeWords, stopwords('english'))
jeopCorpus <- tm_map(jeopCorpus, stemDocument)
myDTM = TermDocumentMatrix(jeopCorpus, control = list(minWordLength = 1))
m = as.matrix(myDTM)
v = sort(rowSums(m), decreasing = TRUE)
set.seed(4363)
wordcloud(names(v), v,max.words =100,min.freq=3,scale=c(4,0.1), random.order = FALSE,rot.per=.5,vfont=c("sans serif","plain"),colors=palette())
我想要一个单独的词云,其中包含 "USA" 的位置和其中包含 "UK" 的位置,以及一个单独的法国词云,这可能吗?
jeopQ<-data.frame(DATA,Location)
# Clean Location
jeopQ$Location <- sub('.*,\s*','', jeopQ$Location)
# Loop
for(i in unique(jeopQ$Location)){
jeopCorpus <- Corpus(VectorSource(jeopQ$DATA[jeopQ$Location==i]))
jeopCorpus <- tm_map(jeopCorpus, content_transformer(tolower))
jeopCorpus <- tm_map(jeopCorpus, removePunctuation)
jeopCorpus <- tm_map(jeopCorpus, PlainTextDocument)
jeopCorpus <- tm_map(jeopCorpus, removeNumbers)
jeopCorpus <- tm_map(jeopCorpus, removeWords, stopwords('english'))
jeopCorpus <- tm_map(jeopCorpus, stemDocument)
myDTM = TermDocumentMatrix(jeopCorpus, control = list(minWordLength = 1))
m = as.matrix(myDTM)
v = sort(rowSums(m), decreasing = TRUE)
set.seed(4363)
wordcloud(names(v), v,max.words =100,min.freq=3,scale=c(4,0.1), random.order = FALSE,rot.per=.5,vfont=c("sans serif","plain"),colors=palette())
}
我正在 R 中使用 wordcloud,到目前为止我只在基本的东西上取得了成功,但是我想做的是我想显示特定位置的词云。例如,如果我有
这样的文字 TEXT LOCATION
True or false? link(#Addition, #Classification) NewYork,USA
Gene deFuser: detecting gene fusion events from protein sequences #bmc #bioinformatics Norwich,UK
Biologists do have a sense of humor, especially computational bio people France
Semantic Inference using #Chemogenomics Data for Drug Discovery London,UK
这是我使用的基本词云代码
library(tm)
library(SnowballC)
library(wordcloud)
DATA<-c('True or false? link(#Addition, #Classification) ','Gene deFuser: detecting gene fusion events from protein sequences #bmc #bioinformatics',' Biologists do have a sense of humor, especially computational bio people','Semantic Inference using #Chemogenomics Data for Drug Discovery')
Location<-c('NewYork,USA','Norwich,UK',' France','London,UK')
jeopQ<-data.frame(DATA,Location)
jeopCorpus <- Corpus(VectorSource(jeopQ$DATA))
jeopCorpus <- tm_map(jeopCorpus, content_transformer(tolower))
jeopCorpus <- tm_map(jeopCorpus, removePunctuation)
jeopCorpus <- tm_map(jeopCorpus, PlainTextDocument)
jeopCorpus <- tm_map(jeopCorpus, removeNumbers)
jeopCorpus <- tm_map(jeopCorpus, removeWords, stopwords('english'))
jeopCorpus <- tm_map(jeopCorpus, stemDocument)
myDTM = TermDocumentMatrix(jeopCorpus, control = list(minWordLength = 1))
m = as.matrix(myDTM)
v = sort(rowSums(m), decreasing = TRUE)
set.seed(4363)
wordcloud(names(v), v,max.words =100,min.freq=3,scale=c(4,0.1), random.order = FALSE,rot.per=.5,vfont=c("sans serif","plain"),colors=palette())
我想要一个单独的词云,其中包含 "USA" 的位置和其中包含 "UK" 的位置,以及一个单独的法国词云,这可能吗?
jeopQ<-data.frame(DATA,Location)
# Clean Location
jeopQ$Location <- sub('.*,\s*','', jeopQ$Location)
# Loop
for(i in unique(jeopQ$Location)){
jeopCorpus <- Corpus(VectorSource(jeopQ$DATA[jeopQ$Location==i]))
jeopCorpus <- tm_map(jeopCorpus, content_transformer(tolower))
jeopCorpus <- tm_map(jeopCorpus, removePunctuation)
jeopCorpus <- tm_map(jeopCorpus, PlainTextDocument)
jeopCorpus <- tm_map(jeopCorpus, removeNumbers)
jeopCorpus <- tm_map(jeopCorpus, removeWords, stopwords('english'))
jeopCorpus <- tm_map(jeopCorpus, stemDocument)
myDTM = TermDocumentMatrix(jeopCorpus, control = list(minWordLength = 1))
m = as.matrix(myDTM)
v = sort(rowSums(m), decreasing = TRUE)
set.seed(4363)
wordcloud(names(v), v,max.words =100,min.freq=3,scale=c(4,0.1), random.order = FALSE,rot.per=.5,vfont=c("sans serif","plain"),colors=palette())
}