闪亮的 wordcloud 中的中国符号
Chinese symbols in shiny wordcloud
我目前正在创建一个闪亮的应用程序来加载最近荷兰关于冠状病毒的推文,并且在另一个选项卡上我想显示一个包含最常用词的词云。
table没问题,但词云显示的主要是中文符号。我在想这可能是推文中使用的笑脸,但似乎并非如此。
我写的代码:
library(tidyverse)
library(shiny)
library(rtweet)
library(dplyr)
library(glue)
library(reactable)
library(purrr)
library(wordcloud2)
library(tidytext)
library(tm)
make_url_html <- function(url) {
if(length(url) < 2) {
if(!is.na(url)) {
as.character(glue("<a title = {url} target = '_new' href = '{url}'>{url}</a>") )
} else {
""
}
} else {
paste0(purrr::map_chr(url, ~ paste0("<a title = '", .x, "' target = '_new' href = '", .x, "'>", .x, "</a>", collapse = ", ")), collapse = ", ")
}
}
# UI page instellen
ui <- fluidPage(
titlePanel("Corona op twitter"),
h4("Meest gebruikte woorden omtrent populaire COVID-19 hashtags op de Nederlandse twitter"),
tabsetPanel(
#Eerste tab bevat de twitter tabel
tabPanel(
title = "Zoek tweets",
sidebarLayout(
sidebarPanel(
# Radiobuttons voor de hastags
radioButtons(
inputId = "hashtag_to_search",
label = "Kies hashtag",
choices = c("#coronavirus" = "#coronavirus", "#coronahulp" = "#coronahulp")
),
#Slider voor het aantal tweets
sliderInput("num_tweets_to_download",
"Aantal tweets:",
min = 1,
max = 100,
value = 50)
),
mainPanel(
reactableOutput("tweet_table")
)
)
),
tabPanel(
# Tweede tab bevat de wordcloud
title = "Wordcloud",
sidebarLayout(
sidebarPanel(
radioButtons(
inputId = "hashtag",
label = "Choose hashtag",
choices = c("#coronavirus" = "virus", "#coronahulp" = "hulp")
),
sliderInput("num",
"Number of words:",
min = 1,
max = 100,
value = 50)
),
# Show a plot of the generated distribution
mainPanel(
wordcloud2Output("cloud", width = "100%", height = "800px"),
reactableOutput("table")
)
)
)
)
)
# Server met tabel en wordcloud
server <- function(input, output) {
# Data inladen
tweet_df <- reactive({
search_tweets(paste("lang:nl", input$hashtag_to_search), n = input$num_tweets_to_download, include_rts = FALSE)
})
# data schoonmaken
word <- c("we", "coronavirus", "nl", "nederland", "https", stopwords("nl"))
new_stopwords_df <- data.frame(word)
tweet_clean <- reactive({
req(tweet_df())
tweet_df() %>%
mutate(text = lapply(text, tolower),
text = str_replace_all(text, "https://t.co/[a-z,A-Z,0-9]*", ""),
text = str_replace(text,"RT @[a-z,A-Z,0-9,_]*: ",""),
text = str_replace_all(text,"#[a-z,A-Z]*",""),
text = str_replace_all(text,"@[a-z,A-Z]*",""),
text = str_replace_all(text,"\b[a-zA-Z]{1}\b",""),
text = str_replace_all(text,"[:digit:]",""),
text = str_replace_all(text,"[^[:alnum:] ]",""),
text = str_replace_all(text," "," ")) %>%
select(status_id, text) %>% unnest_tokens(word,text) %>%
anti_join(new_stopwords_df, by = "word") %>% drop_na(word)
})
tweet_clean_freq <- reactive({
req(tweet_clean())
tweet_clean() %>%
group_by(word) %>%
summarise(freq =n()) %>%
arrange(desc(freq)) %>%
head(data, n = 50)
})
output$table <- renderReactable({reactable(tweet_clean())})
output$cloud <- renderWordcloud2({
wordcloud2(data = tweet_clean_freq()
)
})
# Tabel
tweet_table_data <- reactive({
req(tweet_df())
tweet_df() %>%
select(user_id, status_id, created_at, screen_name, text, favorite_count, retweet_count, urls_expanded_url) %>%
mutate(
Tweet = glue::glue("{text} <a href='https://twitter.com/{screen_name}/status/{status_id}'>>> </a>"),
URLs = purrr::map_chr(urls_expanded_url, make_url_html)
)%>%
select(DateTime = created_at, User = screen_name, Tweet, Likes = favorite_count, RTs = retweet_count, URLs)
})
output$tweet_table <- renderReactable({
reactable::reactable(tweet_table_data(),
filterable = TRUE, searchable = TRUE, bordered = TRUE, striped = TRUE, highlight = TRUE,
showSortable = TRUE, defaultSortOrder = "desc", defaultPageSize = 25, showPageSizeOptions = TRUE, pageSizeOptions = c(25, 50, 75, 100, 200),
columns = list(
DateTime = colDef(defaultSortOrder = "asc"),
User = colDef(defaultSortOrder = "asc"),
Tweet = colDef(html = TRUE, minWidth = 190, resizable = TRUE),
Likes = colDef(filterable = FALSE, format = colFormat(separators = TRUE)),
RTs = colDef(filterable = FALSE, format = colFormat(separators = TRUE)),
URLs = colDef(html = TRUE)
)
)
})
}
# Applicatie
shinyApp(ui = ui, server = server)
我试图通过在词云下添加一个table来检查问题是什么,但它也显示中文符号。当我在闪亮的上下文之外(并且没有反应方面)尝试我的代码时,它似乎工作正常。
顺便说一句:我知道我还没有连接单选按钮,我想先让 wordcloud 工作。
谢谢!
找到问题了,我没有去掉文字中的表情符号。
我添加了这行代码;
text = sapply(text,function(row) iconv(row, "latin1", "ASCII", sub="")))
到 mutate 函数,这解决了问题。
我目前正在创建一个闪亮的应用程序来加载最近荷兰关于冠状病毒的推文,并且在另一个选项卡上我想显示一个包含最常用词的词云。
table没问题,但词云显示的主要是中文符号。我在想这可能是推文中使用的笑脸,但似乎并非如此。
我写的代码:
library(tidyverse)
library(shiny)
library(rtweet)
library(dplyr)
library(glue)
library(reactable)
library(purrr)
library(wordcloud2)
library(tidytext)
library(tm)
make_url_html <- function(url) {
if(length(url) < 2) {
if(!is.na(url)) {
as.character(glue("<a title = {url} target = '_new' href = '{url}'>{url}</a>") )
} else {
""
}
} else {
paste0(purrr::map_chr(url, ~ paste0("<a title = '", .x, "' target = '_new' href = '", .x, "'>", .x, "</a>", collapse = ", ")), collapse = ", ")
}
}
# UI page instellen
ui <- fluidPage(
titlePanel("Corona op twitter"),
h4("Meest gebruikte woorden omtrent populaire COVID-19 hashtags op de Nederlandse twitter"),
tabsetPanel(
#Eerste tab bevat de twitter tabel
tabPanel(
title = "Zoek tweets",
sidebarLayout(
sidebarPanel(
# Radiobuttons voor de hastags
radioButtons(
inputId = "hashtag_to_search",
label = "Kies hashtag",
choices = c("#coronavirus" = "#coronavirus", "#coronahulp" = "#coronahulp")
),
#Slider voor het aantal tweets
sliderInput("num_tweets_to_download",
"Aantal tweets:",
min = 1,
max = 100,
value = 50)
),
mainPanel(
reactableOutput("tweet_table")
)
)
),
tabPanel(
# Tweede tab bevat de wordcloud
title = "Wordcloud",
sidebarLayout(
sidebarPanel(
radioButtons(
inputId = "hashtag",
label = "Choose hashtag",
choices = c("#coronavirus" = "virus", "#coronahulp" = "hulp")
),
sliderInput("num",
"Number of words:",
min = 1,
max = 100,
value = 50)
),
# Show a plot of the generated distribution
mainPanel(
wordcloud2Output("cloud", width = "100%", height = "800px"),
reactableOutput("table")
)
)
)
)
)
# Server met tabel en wordcloud
server <- function(input, output) {
# Data inladen
tweet_df <- reactive({
search_tweets(paste("lang:nl", input$hashtag_to_search), n = input$num_tweets_to_download, include_rts = FALSE)
})
# data schoonmaken
word <- c("we", "coronavirus", "nl", "nederland", "https", stopwords("nl"))
new_stopwords_df <- data.frame(word)
tweet_clean <- reactive({
req(tweet_df())
tweet_df() %>%
mutate(text = lapply(text, tolower),
text = str_replace_all(text, "https://t.co/[a-z,A-Z,0-9]*", ""),
text = str_replace(text,"RT @[a-z,A-Z,0-9,_]*: ",""),
text = str_replace_all(text,"#[a-z,A-Z]*",""),
text = str_replace_all(text,"@[a-z,A-Z]*",""),
text = str_replace_all(text,"\b[a-zA-Z]{1}\b",""),
text = str_replace_all(text,"[:digit:]",""),
text = str_replace_all(text,"[^[:alnum:] ]",""),
text = str_replace_all(text," "," ")) %>%
select(status_id, text) %>% unnest_tokens(word,text) %>%
anti_join(new_stopwords_df, by = "word") %>% drop_na(word)
})
tweet_clean_freq <- reactive({
req(tweet_clean())
tweet_clean() %>%
group_by(word) %>%
summarise(freq =n()) %>%
arrange(desc(freq)) %>%
head(data, n = 50)
})
output$table <- renderReactable({reactable(tweet_clean())})
output$cloud <- renderWordcloud2({
wordcloud2(data = tweet_clean_freq()
)
})
# Tabel
tweet_table_data <- reactive({
req(tweet_df())
tweet_df() %>%
select(user_id, status_id, created_at, screen_name, text, favorite_count, retweet_count, urls_expanded_url) %>%
mutate(
Tweet = glue::glue("{text} <a href='https://twitter.com/{screen_name}/status/{status_id}'>>> </a>"),
URLs = purrr::map_chr(urls_expanded_url, make_url_html)
)%>%
select(DateTime = created_at, User = screen_name, Tweet, Likes = favorite_count, RTs = retweet_count, URLs)
})
output$tweet_table <- renderReactable({
reactable::reactable(tweet_table_data(),
filterable = TRUE, searchable = TRUE, bordered = TRUE, striped = TRUE, highlight = TRUE,
showSortable = TRUE, defaultSortOrder = "desc", defaultPageSize = 25, showPageSizeOptions = TRUE, pageSizeOptions = c(25, 50, 75, 100, 200),
columns = list(
DateTime = colDef(defaultSortOrder = "asc"),
User = colDef(defaultSortOrder = "asc"),
Tweet = colDef(html = TRUE, minWidth = 190, resizable = TRUE),
Likes = colDef(filterable = FALSE, format = colFormat(separators = TRUE)),
RTs = colDef(filterable = FALSE, format = colFormat(separators = TRUE)),
URLs = colDef(html = TRUE)
)
)
})
}
# Applicatie
shinyApp(ui = ui, server = server)
我试图通过在词云下添加一个table来检查问题是什么,但它也显示中文符号。当我在闪亮的上下文之外(并且没有反应方面)尝试我的代码时,它似乎工作正常。
顺便说一句:我知道我还没有连接单选按钮,我想先让 wordcloud 工作。
谢谢!
找到问题了,我没有去掉文字中的表情符号。
我添加了这行代码;
text = sapply(text,function(row) iconv(row, "latin1", "ASCII", sub="")))
到 mutate 函数,这解决了问题。