Shiny 中 BigQuery 数据的更快速度
More speed for BigQuery data in Shiny
我不喜欢访问完整的 public BigQuery 数据集 geo_us_boundaries
。但我想通过 state
使用查询 glue::glue_sql("SELECT * FROM states WHERE state = {x}", x = input$selectedvariable1, .con=bq_con)
选择数据。我尝试做:
library(dplyr)
library(ggplot2)
library(bigrquery)
library(DBI)
library(sf)
library(glue)
# Open a public BigQuery dataset eg. "geo_us_boundaries"
bq_con <- dbConnect(
bigrquery::bigquery(),
project = "bigquery-public-data",
dataset = "geo_us_boundaries",
billing = "my-project"
)
bigrquery::dbListTables(bq_con) # List all the tables in BigQuery data set
# Take the table
dataset <- dplyr::tbl(bq_con,
"states") # connects to a table
# Enumerate the states
dataset_vars <- dataset %>% dplyr::distinct(geo_id, state, state_name)%>%
collect()
str(dataset_vars)
# Create the shiny dash
ui <- fluidPage(
titlePanel(title="States Dashboard"),
sidebarLayout(
sidebarPanel(
selectInput(inputId = "selectedvariable0",
label = "Geo ID",
choices = c(unique(dataset_vars$geo_id)),selected = TRUE ),
selectInput(inputId = "selectedvariable1",
label = "State",
choices = c(unique(dataset_vars$state)),selected = TRUE ),
selectInput(inputId = "selectedvariable2",
label = "State name",
choices = c(unique(dataset_vars$state_name)),selected = TRUE )
),
mainPanel(
fluidRow(
splitLayout(plotOutput("myplot")))
)
)
)
server <- function(input, output){
# # Selection of variables for plots constructions
sqlInput <- reactive({
glue::glue_sql("SELECT * FROM states WHERE state = {x}", x = input$selectedvariable1, .con=bq_con)
})
stands_sel <- function() dbGetQuery(bq_con, as.character(sqlInput()), stringsAsFactors = T)
print(sqlInput)
observe({
output$myplot <- renderPlot({
#Create the plot
stands_sel_sf <- st_as_sf(stands_sel(), wkt = "state_geom", crs = 4326)
ggplot() + geom_sf(data=stands_sel_sf) })
}) #end of observe function.
}
shinyApp(ui, server)
#
但是通过 state
选择进行的这种类型的数据访问非常慢,在我的现实世界问题中,我有很大的几何图形,我花了几分钟从 BQ 下载并完成绘图。有什么方法可以提高这个操作的速度?
请问有什么帮助吗?
提前致谢!
您确定下载数据所花费的时间吗?渲染它?查询需要几秒钟,即使是几何文本大小最大的州(德克萨斯州)也只有 1.3 MB。这不应该花几分钟来下载。
无论如何,一个典型的解决方案是简化状态几何:
SELECT * EXCEPT(state_geom), ST_Simplify(state_geom, 1000) AS state_geom
FROM states WHERE state = {x}
这使得最重的州(现在的阿拉斯加)小于 40KB,下载和绘制应该会更快。
请注意,这会在查询中每次执行 ST_Simplify
计算,这有点昂贵。您可能只想执行一次 - 使用简化的状态几何创建 table 并从中读取 - 这使得查询本身更便宜和更快,因为查询读取的数据更少,并且不必计算简化几何。
CREATE TABLE tmp.states AS
SELECT * EXCEPT(state_geom), ST_Simplify(state_geom, 1000) AS state_geom
FROM `bigquery-public-data`.geo_us_boundaries.states;
我不喜欢访问完整的 public BigQuery 数据集 geo_us_boundaries
。但我想通过 state
使用查询 glue::glue_sql("SELECT * FROM states WHERE state = {x}", x = input$selectedvariable1, .con=bq_con)
选择数据。我尝试做:
library(dplyr)
library(ggplot2)
library(bigrquery)
library(DBI)
library(sf)
library(glue)
# Open a public BigQuery dataset eg. "geo_us_boundaries"
bq_con <- dbConnect(
bigrquery::bigquery(),
project = "bigquery-public-data",
dataset = "geo_us_boundaries",
billing = "my-project"
)
bigrquery::dbListTables(bq_con) # List all the tables in BigQuery data set
# Take the table
dataset <- dplyr::tbl(bq_con,
"states") # connects to a table
# Enumerate the states
dataset_vars <- dataset %>% dplyr::distinct(geo_id, state, state_name)%>%
collect()
str(dataset_vars)
# Create the shiny dash
ui <- fluidPage(
titlePanel(title="States Dashboard"),
sidebarLayout(
sidebarPanel(
selectInput(inputId = "selectedvariable0",
label = "Geo ID",
choices = c(unique(dataset_vars$geo_id)),selected = TRUE ),
selectInput(inputId = "selectedvariable1",
label = "State",
choices = c(unique(dataset_vars$state)),selected = TRUE ),
selectInput(inputId = "selectedvariable2",
label = "State name",
choices = c(unique(dataset_vars$state_name)),selected = TRUE )
),
mainPanel(
fluidRow(
splitLayout(plotOutput("myplot")))
)
)
)
server <- function(input, output){
# # Selection of variables for plots constructions
sqlInput <- reactive({
glue::glue_sql("SELECT * FROM states WHERE state = {x}", x = input$selectedvariable1, .con=bq_con)
})
stands_sel <- function() dbGetQuery(bq_con, as.character(sqlInput()), stringsAsFactors = T)
print(sqlInput)
observe({
output$myplot <- renderPlot({
#Create the plot
stands_sel_sf <- st_as_sf(stands_sel(), wkt = "state_geom", crs = 4326)
ggplot() + geom_sf(data=stands_sel_sf) })
}) #end of observe function.
}
shinyApp(ui, server)
#
但是通过 state
选择进行的这种类型的数据访问非常慢,在我的现实世界问题中,我有很大的几何图形,我花了几分钟从 BQ 下载并完成绘图。有什么方法可以提高这个操作的速度?
请问有什么帮助吗?
提前致谢!
您确定下载数据所花费的时间吗?渲染它?查询需要几秒钟,即使是几何文本大小最大的州(德克萨斯州)也只有 1.3 MB。这不应该花几分钟来下载。
无论如何,一个典型的解决方案是简化状态几何:
SELECT * EXCEPT(state_geom), ST_Simplify(state_geom, 1000) AS state_geom
FROM states WHERE state = {x}
这使得最重的州(现在的阿拉斯加)小于 40KB,下载和绘制应该会更快。
请注意,这会在查询中每次执行 ST_Simplify
计算,这有点昂贵。您可能只想执行一次 - 使用简化的状态几何创建 table 并从中读取 - 这使得查询本身更便宜和更快,因为查询读取的数据更少,并且不必计算简化几何。
CREATE TABLE tmp.states AS
SELECT * EXCEPT(state_geom), ST_Simplify(state_geom, 1000) AS state_geom
FROM `bigquery-public-data`.geo_us_boundaries.states;