尝试创建一个 R Shiny 应用程序来抓取 Twitter 并创建一个词云
Trying to create an R Shiny app that will crawl Twitter and create a word cloud
我以前从来没有用过 Shiny,所以如果这是一个非常愚蠢的问题,我深表歉意。我正在尝试制作一个闪亮的应用程序,您可以在其中输入将从 Twitter 中提取的搜索词并创建词云。我觉得我快到了,但它不起作用,老实说,我真的不知道我在做什么。我正在尝试自学 Shiny,但我也知道像这样的论坛对学习非常有用。
library(tm)
library(wordcloud)
library(memoise)
ui <- fluidPage(
# Application title
titlePanel("Word Cloud"),
sidebarLayout(
# Sidebar with a slider and selection inputs
sidebarPanel(
textInput("selection", "Input your search term:",
""),
actionButton("update", "Change"),
hr(),
sliderInput("freq",
"Minimum Frequency:",
min = 1, max = 50, value = 15),
sliderInput("max",
"Maximum Number of Words:",
min = 1, max = 300, value = 100)
),
# Show Word Cloud
mainPanel(
plotOutput("plot")
)
)
)
#Define server logic
server <- function(input, output, session) {
# Define a reactive expression for the document term matrix
terms <- reactive({
consumer_key <- "XXXX"
consumer_secret <- "XXXX"
access_token <- "XXXX"
access_secret <- "XXXX"
#Here we are creating the "handshake" with Twitter
setup_twitter_oauth(consumer_key= consumer_key, consumer_secret=
consumer_secret,access_token= access_token, access_secret= access_secret)
#Once you have created your handshake, you can start searching for tweets
#Note that if you select a common term like "Atlanta" you will generate a lot
of Tweets quickly
#But if you select an esoteric term like "heteroscedasticity", it might take
a while to get any
tw<-searchTwitter("selection", n=1000, lang='en', resultType = "recent")
# Using "memoise" to automatically cache the results
getTermMatrix <- memoise(function(tw) {
text <- readLines(sprintf(tw),
encoding="UTF-8")
myCorpus = Corpus(VectorSource(text))
myCorpus = tm_map(myCorpus, content_transformer(tolower))
myCorpus = tm_map(myCorpus, removePunctuation)
myCorpus = tm_map(myCorpus, removeNumbers)
myCorpus = tm_map(myCorpus, removeWords,
c(stopwords("SMART"), "thy", "thou", "thee", "the",
"and", "but"))
myDTM = TermDocumentMatrix(myCorpus,
control = list(minWordLength = 1))
m = as.matrix(myDTM)
sort(rowSums(m), decreasing = TRUE)
})
# Change when the "update" button is pressed...
input$update
# ...but not for anything else
isolate({
withProgress({
setProgress(message = "Processing corpus...")
getTermMatrix(input$selection)
})
})
})
# Make the wordcloud drawing predictable during a session
wordcloud_rep <- reactive({
v <- terms()
wordcloud_rep(names(v), v, scale=c(4,0.5),
min.freq = input$freq, max.words=input$max,
colors=brewer.pal(8, "Dark2"))
})
}
# Run the application
shinyApp(ui = ui, server = server)
感谢任何试图提供帮助的人!!
编辑:啊,不好意思,没说清楚哪里出了问题!到目前为止,它打开了一个包含我想要的所有输入框的应用程序,但输入搜索词似乎实际上没有做任何事情。它只是永远加载。没有错误。
那么你的问题是什么?还是您寻求一般帮助?我看到的第一件事是你正在存储秘密信息 (consumer_key <- "XXXX",consumer_secret <- "XXXX", access_token <- "XXXX"...) 在您的服务器逻辑中。最好将它们存储在您的 .Renviron 中并使用 Sys.getenv("token")
.
调用它们
嘿抱歉第一次回答...,
该应用程序使用以下代码,我认为您应该考虑一个基本的闪亮教程来完全理解所有内容,即输出以与输入相同的方式连接到 ui --> server: output$plot <- renderPlot({...})
和 ui: plotOutput("plot")
。我使用 observeEvent
检查更新按钮是否被点击。最大值的滑块输入。 Words 也在工作,但不是最小的。频率,因为我不知道你在用它做什么。我也只是做了一个基本的单词清理。希望对您有所帮助:
library(tm)
library(wordcloud)
library(memoise)
# new libs
library(twitteR)
library(tidyverse)
library(tidytext)
ui <- fluidPage(
# Application title
titlePanel("Word Cloud"),
sidebarLayout(
# Sidebar with a slider and selection inputs
sidebarPanel(
textInput("selection", "Input your search term:",
""),
actionButton("update", "Change"),
hr(),
sliderInput("freq",
"Minimum Frequency:",
min = 1, max = 50, value = 15),
sliderInput("max",
"Maximum Number of Words:",
min = 1, max = 300, value = 100)
),
# Show Word Cloud
mainPanel(
plotOutput("plot")
)
)
)
#Define server logic
api_key <- Sys.getenv("twitter_api")
api_secret <- Sys.getenv("twitter_secret")
access_token <- Sys.getenv("twitter_access_token")
access_secret <- Sys.getenv("twitter_access_secret")
server <- function(input, output, session) {
tweets_clean <- reactiveValues(df = NULL)
# Define a reactive expression for the document term matri
#Here we are creating the "handshake" with Twitter
setup_twitter_oauth(access_token = access_token ,access_secret = access_secret,
consumer_key = api_key,consumer_secret = api_secret )
observeEvent(input$update,{
tw <- searchTwitter(input$selection, n=input$max, lang='en', resultType = "recent")
# tweets to df so we could use tidytext
df <- twListToDF(tw)
# use dplyr and tidytext to clean your tweets
tweets_clean$df <- df %>%
dplyr::select(text) %>%
tidytext::unnest_tokens(word, text) %>%
count(word, sort = TRUE)
})
output$plot <- renderPlot({
# plot it
if(is.null(tweets_clean$df)){
NULL
} else{
wordcloud(tweets_clean$df$word, tweets_clean$df$n)
}
})
}
# Run the application
shinyApp(ui = ui, server = server)`enter code here`
我以前从来没有用过 Shiny,所以如果这是一个非常愚蠢的问题,我深表歉意。我正在尝试制作一个闪亮的应用程序,您可以在其中输入将从 Twitter 中提取的搜索词并创建词云。我觉得我快到了,但它不起作用,老实说,我真的不知道我在做什么。我正在尝试自学 Shiny,但我也知道像这样的论坛对学习非常有用。
library(tm)
library(wordcloud)
library(memoise)
ui <- fluidPage(
# Application title
titlePanel("Word Cloud"),
sidebarLayout(
# Sidebar with a slider and selection inputs
sidebarPanel(
textInput("selection", "Input your search term:",
""),
actionButton("update", "Change"),
hr(),
sliderInput("freq",
"Minimum Frequency:",
min = 1, max = 50, value = 15),
sliderInput("max",
"Maximum Number of Words:",
min = 1, max = 300, value = 100)
),
# Show Word Cloud
mainPanel(
plotOutput("plot")
)
)
)
#Define server logic
server <- function(input, output, session) {
# Define a reactive expression for the document term matrix
terms <- reactive({
consumer_key <- "XXXX"
consumer_secret <- "XXXX"
access_token <- "XXXX"
access_secret <- "XXXX"
#Here we are creating the "handshake" with Twitter
setup_twitter_oauth(consumer_key= consumer_key, consumer_secret=
consumer_secret,access_token= access_token, access_secret= access_secret)
#Once you have created your handshake, you can start searching for tweets
#Note that if you select a common term like "Atlanta" you will generate a lot
of Tweets quickly
#But if you select an esoteric term like "heteroscedasticity", it might take
a while to get any
tw<-searchTwitter("selection", n=1000, lang='en', resultType = "recent")
# Using "memoise" to automatically cache the results
getTermMatrix <- memoise(function(tw) {
text <- readLines(sprintf(tw),
encoding="UTF-8")
myCorpus = Corpus(VectorSource(text))
myCorpus = tm_map(myCorpus, content_transformer(tolower))
myCorpus = tm_map(myCorpus, removePunctuation)
myCorpus = tm_map(myCorpus, removeNumbers)
myCorpus = tm_map(myCorpus, removeWords,
c(stopwords("SMART"), "thy", "thou", "thee", "the",
"and", "but"))
myDTM = TermDocumentMatrix(myCorpus,
control = list(minWordLength = 1))
m = as.matrix(myDTM)
sort(rowSums(m), decreasing = TRUE)
})
# Change when the "update" button is pressed...
input$update
# ...but not for anything else
isolate({
withProgress({
setProgress(message = "Processing corpus...")
getTermMatrix(input$selection)
})
})
})
# Make the wordcloud drawing predictable during a session
wordcloud_rep <- reactive({
v <- terms()
wordcloud_rep(names(v), v, scale=c(4,0.5),
min.freq = input$freq, max.words=input$max,
colors=brewer.pal(8, "Dark2"))
})
}
# Run the application
shinyApp(ui = ui, server = server)
感谢任何试图提供帮助的人!!
编辑:啊,不好意思,没说清楚哪里出了问题!到目前为止,它打开了一个包含我想要的所有输入框的应用程序,但输入搜索词似乎实际上没有做任何事情。它只是永远加载。没有错误。
那么你的问题是什么?还是您寻求一般帮助?我看到的第一件事是你正在存储秘密信息 (consumer_key <- "XXXX",consumer_secret <- "XXXX", access_token <- "XXXX"...) 在您的服务器逻辑中。最好将它们存储在您的 .Renviron 中并使用 Sys.getenv("token")
.
嘿抱歉第一次回答...,
该应用程序使用以下代码,我认为您应该考虑一个基本的闪亮教程来完全理解所有内容,即输出以与输入相同的方式连接到 ui --> server: output$plot <- renderPlot({...})
和 ui: plotOutput("plot")
。我使用 observeEvent
检查更新按钮是否被点击。最大值的滑块输入。 Words 也在工作,但不是最小的。频率,因为我不知道你在用它做什么。我也只是做了一个基本的单词清理。希望对您有所帮助:
library(tm)
library(wordcloud)
library(memoise)
# new libs
library(twitteR)
library(tidyverse)
library(tidytext)
ui <- fluidPage(
# Application title
titlePanel("Word Cloud"),
sidebarLayout(
# Sidebar with a slider and selection inputs
sidebarPanel(
textInput("selection", "Input your search term:",
""),
actionButton("update", "Change"),
hr(),
sliderInput("freq",
"Minimum Frequency:",
min = 1, max = 50, value = 15),
sliderInput("max",
"Maximum Number of Words:",
min = 1, max = 300, value = 100)
),
# Show Word Cloud
mainPanel(
plotOutput("plot")
)
)
)
#Define server logic
api_key <- Sys.getenv("twitter_api")
api_secret <- Sys.getenv("twitter_secret")
access_token <- Sys.getenv("twitter_access_token")
access_secret <- Sys.getenv("twitter_access_secret")
server <- function(input, output, session) {
tweets_clean <- reactiveValues(df = NULL)
# Define a reactive expression for the document term matri
#Here we are creating the "handshake" with Twitter
setup_twitter_oauth(access_token = access_token ,access_secret = access_secret,
consumer_key = api_key,consumer_secret = api_secret )
observeEvent(input$update,{
tw <- searchTwitter(input$selection, n=input$max, lang='en', resultType = "recent")
# tweets to df so we could use tidytext
df <- twListToDF(tw)
# use dplyr and tidytext to clean your tweets
tweets_clean$df <- df %>%
dplyr::select(text) %>%
tidytext::unnest_tokens(word, text) %>%
count(word, sort = TRUE)
})
output$plot <- renderPlot({
# plot it
if(is.null(tweets_clean$df)){
NULL
} else{
wordcloud(tweets_clean$df$word, tweets_clean$df$n)
}
})
}
# Run the application
shinyApp(ui = ui, server = server)`enter code here`