如何使用 R DBI 和 bigrquery 将虚拟 BQ table 写回 BQ?
How to write virtual BQ table back to BQ using R DBI and bigrquery?
我希望能够
- 访问 BQtable。这是class
[1] "tbl_BigQueryConnection" "tbl_dbi" "tbl_sql"
[4] "tbl_lazy" "tbl" `
- 使用 dbplyr 更改 table 以创建新的 table。同样,有 class
[1] "tbl_BigQueryConnection" "tbl_dbi" "tbl_sql"
[4] "tbl_lazy" "tbl"
- 将这个新的 table 写入 BQ。
我收到以下错误:
Error in (function (classes, fdef, mtable) :
unable to find an inherited method for function ‘dbWriteTable’ for signature ‘"BigQueryConnection", "character", "tbl_BigQueryConnection"’
MRE
library(DBI)
library(dplyr, warn.conflicts = FALSE)
library(bigrquery)
############ CREATE BQ TABLE TO ACCESS #################
dataset = bq_dataset(bq_test_project(), "test_dataset")
if (bq_dataset_exists(dataset))
{
bq_dataset_delete(dataset, delete_contents = T)
}
#> Suitable tokens found in the cache, associated with these emails:
#> * ariel.balter@gmail.com
#> * ariel.balter@providence.org
#> The first will be used.
#> Using an auto-discovered, cached token.
#> To suppress this message, modify your code or options to clearly consent to the use of a cached token.
#> See gargle's "Non-interactive auth" vignette for more details:
#> https://gargle.r-lib.org/articles/non-interactive-auth.html
#> The bigrquery package is using a cached token for ariel.balter@gmail.com.
bq_dataset_create(dataset)
#> <bq_dataset> elite-magpie-257717.test_dataset
conn = DBI::dbConnect(
bigrquery::bigquery(),
project = bq_test_project(),
dataset = "test_dataset",
KeyFilePath = "google_service_key.json",
OAuthMechanism = 0
)
if (dbExistsTable(conn, "mtcars"))
{
dbRemoveTable(conn, "mtcars")
}
dbWriteTable(conn, "mtcars", mtcars)
#######################################################
### Access BQ table
mtcars_tbl = tbl(conn, "mtcars")
class(mtcars_tbl)
#> [1] "tbl_BigQueryConnection" "tbl_dbi" "tbl_sql"
#> [4] "tbl_lazy" "tbl"
### Create new virtual table
hp_gt_100_tbl = mtcars_tbl %>% filter(hp>100)
class(hp_gt_100_tbl)
#> [1] "tbl_BigQueryConnection" "tbl_dbi" "tbl_sql"
#> [4] "tbl_lazy" "tbl"
### Write new table
dbWriteTable(conn, "hp_gt_100", hp_gt_100_tbl)
#> Error in (function (classes, fdef, mtable) : unable to find an inherited method for function 'dbWriteTable' for signature '"BigQueryConnection", "character", "tbl_BigQueryConnection"'
dbExecute(conn, "DROP TABLE mtcars")
#> [1] 0
dbExecute(conn, "DROP TABLE hp_gt_100")
#> Error: Job 'elite-magpie-257717.job_O8e7BtdfAnAb_8Vdtwybibgd7DpA.US' failed
#> x Not found: Table elite-magpie-257717:test_dataset.hp_gt_100 [notFound]
由 reprex package (v0.3.0)
于 2020-11-11 创建
我认为您无法使用 dbWriteTable
使用您当前的方法来做到这一点。 dbWriteTable
“将[本地]数据框写入、覆盖或附加到数据库table”(source)。
因此,一种选择是将该数据收集到 R 中,然后他们使用 dbWriteTable
将其写回 SQL。但这很可能是低效的。
我推荐的方法是创建一个 bigquery INSERT INTO 语句并将其传递给 dbExecute
。类似于以下内容:
sql_query <- glue::glue("INSERT INTO {db}.{schema}.{tbl_name}\n",
dbplyr::sql_render(input_tbl))
result <- dbExecute(db_connection, as.character(sql_query))
sql_render
将采用您当前定义的虚拟 table 和 return 查询的文本。 dbExecute
会将此命令传递给要执行的 bigquery 服务器。
请注意,我对 bigquery 的 INSERT INTO
语法不够熟悉,无法确保上面 sql_query
的语法正确,但我知道一般方法在我使用 dbplyr 和 DBI 时有效广泛存在于 SQL 服务器中。
我接受 Simon S.A. 的回答。但是,我确实设法使用 bigrquery
函数 bq_project_query
.
找到了更直接的方法
library(DBI)
library(dplyr, warn.conflicts = FALSE)
library(bigrquery)
bq_deauth()
bq_auth(email="ariel.balter@gmail.com")
############ CREATE BQ TABLE TO ACCESS #################
dataset = bq_dataset("elite-magpie-257717", "test_dataset")
if (bq_dataset_exists(dataset))
{
bq_dataset_delete(dataset, delete_contents = T)
}
bq_dataset_create(dataset)
#> <bq_dataset> elite-magpie-257717.test_dataset
conn = dbConnect(
bigrquery::bigquery(),
project = "elite-magpie-257717",
dataset = "test_dataset",
KeyFilePath = "google_service_key.json",
OAuthMechanism = 0
)
dbWriteTable(conn, "mtcars", mtcars, overwrite=T)
dbListTables(conn)
#> [1] "mtcars"
#######################################################
### Access BQ table
mtcars_tbl = tbl(conn, "test_dataset.mtcars")
class(mtcars_tbl)
#> [1] "tbl_BigQueryConnection" "tbl_dbi" "tbl_sql"
#> [4] "tbl_lazy" "tbl"
### Create new virtual table
hp_gt00_tbl = mtcars_tbl %>% filter(hp>100)
class(hp_gt00_tbl)
#> [1] "tbl_BigQueryConnection" "tbl_dbi" "tbl_sql"
#> [4] "tbl_lazy" "tbl"
hp_gt00_tbl %>% dbplyr::sql_render()
#> <SQL> SELECT *
#> FROM `test_dataset.mtcars`
#> WHERE (`hp` > 100.0)
bq_project_query(
x = dataset$project,
query = hp_gt00_tbl %>% dbplyr::sql_render(),
destination = bq_table(dataset, "hp_gt_00")
)
#> <bq_table> elite-magpie-257717.test_dataset.hp_gt_00
bq_dataset_tables(dataset)
#> [[1]]
#> <bq_table> elite-magpie-257717.test_dataset.hp_gt_00
#>
#> [[2]]
#> <bq_table> elite-magpie-257717.test_dataset.mtcars
bq_dataset_delete(dataset, delete_contents = T)
由 reprex package (v0.3.0)
于 2020-11-15 创建
我希望能够
- 访问 BQtable。这是class
[1] "tbl_BigQueryConnection" "tbl_dbi" "tbl_sql"
[4] "tbl_lazy" "tbl" `
- 使用 dbplyr 更改 table 以创建新的 table。同样,有 class
[1] "tbl_BigQueryConnection" "tbl_dbi" "tbl_sql"
[4] "tbl_lazy" "tbl"
- 将这个新的 table 写入 BQ。
我收到以下错误:
Error in (function (classes, fdef, mtable) : unable to find an inherited method for function ‘dbWriteTable’ for signature ‘"BigQueryConnection", "character", "tbl_BigQueryConnection"’
MRE
library(DBI)
library(dplyr, warn.conflicts = FALSE)
library(bigrquery)
############ CREATE BQ TABLE TO ACCESS #################
dataset = bq_dataset(bq_test_project(), "test_dataset")
if (bq_dataset_exists(dataset))
{
bq_dataset_delete(dataset, delete_contents = T)
}
#> Suitable tokens found in the cache, associated with these emails:
#> * ariel.balter@gmail.com
#> * ariel.balter@providence.org
#> The first will be used.
#> Using an auto-discovered, cached token.
#> To suppress this message, modify your code or options to clearly consent to the use of a cached token.
#> See gargle's "Non-interactive auth" vignette for more details:
#> https://gargle.r-lib.org/articles/non-interactive-auth.html
#> The bigrquery package is using a cached token for ariel.balter@gmail.com.
bq_dataset_create(dataset)
#> <bq_dataset> elite-magpie-257717.test_dataset
conn = DBI::dbConnect(
bigrquery::bigquery(),
project = bq_test_project(),
dataset = "test_dataset",
KeyFilePath = "google_service_key.json",
OAuthMechanism = 0
)
if (dbExistsTable(conn, "mtcars"))
{
dbRemoveTable(conn, "mtcars")
}
dbWriteTable(conn, "mtcars", mtcars)
#######################################################
### Access BQ table
mtcars_tbl = tbl(conn, "mtcars")
class(mtcars_tbl)
#> [1] "tbl_BigQueryConnection" "tbl_dbi" "tbl_sql"
#> [4] "tbl_lazy" "tbl"
### Create new virtual table
hp_gt_100_tbl = mtcars_tbl %>% filter(hp>100)
class(hp_gt_100_tbl)
#> [1] "tbl_BigQueryConnection" "tbl_dbi" "tbl_sql"
#> [4] "tbl_lazy" "tbl"
### Write new table
dbWriteTable(conn, "hp_gt_100", hp_gt_100_tbl)
#> Error in (function (classes, fdef, mtable) : unable to find an inherited method for function 'dbWriteTable' for signature '"BigQueryConnection", "character", "tbl_BigQueryConnection"'
dbExecute(conn, "DROP TABLE mtcars")
#> [1] 0
dbExecute(conn, "DROP TABLE hp_gt_100")
#> Error: Job 'elite-magpie-257717.job_O8e7BtdfAnAb_8Vdtwybibgd7DpA.US' failed
#> x Not found: Table elite-magpie-257717:test_dataset.hp_gt_100 [notFound]
由 reprex package (v0.3.0)
于 2020-11-11 创建我认为您无法使用 dbWriteTable
使用您当前的方法来做到这一点。 dbWriteTable
“将[本地]数据框写入、覆盖或附加到数据库table”(source)。
因此,一种选择是将该数据收集到 R 中,然后他们使用 dbWriteTable
将其写回 SQL。但这很可能是低效的。
我推荐的方法是创建一个 bigquery INSERT INTO 语句并将其传递给 dbExecute
。类似于以下内容:
sql_query <- glue::glue("INSERT INTO {db}.{schema}.{tbl_name}\n",
dbplyr::sql_render(input_tbl))
result <- dbExecute(db_connection, as.character(sql_query))
sql_render
将采用您当前定义的虚拟 table 和 return 查询的文本。 dbExecute
会将此命令传递给要执行的 bigquery 服务器。
请注意,我对 bigquery 的 INSERT INTO
语法不够熟悉,无法确保上面 sql_query
的语法正确,但我知道一般方法在我使用 dbplyr 和 DBI 时有效广泛存在于 SQL 服务器中。
我接受 Simon S.A. 的回答。但是,我确实设法使用 bigrquery
函数 bq_project_query
.
library(DBI)
library(dplyr, warn.conflicts = FALSE)
library(bigrquery)
bq_deauth()
bq_auth(email="ariel.balter@gmail.com")
############ CREATE BQ TABLE TO ACCESS #################
dataset = bq_dataset("elite-magpie-257717", "test_dataset")
if (bq_dataset_exists(dataset))
{
bq_dataset_delete(dataset, delete_contents = T)
}
bq_dataset_create(dataset)
#> <bq_dataset> elite-magpie-257717.test_dataset
conn = dbConnect(
bigrquery::bigquery(),
project = "elite-magpie-257717",
dataset = "test_dataset",
KeyFilePath = "google_service_key.json",
OAuthMechanism = 0
)
dbWriteTable(conn, "mtcars", mtcars, overwrite=T)
dbListTables(conn)
#> [1] "mtcars"
#######################################################
### Access BQ table
mtcars_tbl = tbl(conn, "test_dataset.mtcars")
class(mtcars_tbl)
#> [1] "tbl_BigQueryConnection" "tbl_dbi" "tbl_sql"
#> [4] "tbl_lazy" "tbl"
### Create new virtual table
hp_gt00_tbl = mtcars_tbl %>% filter(hp>100)
class(hp_gt00_tbl)
#> [1] "tbl_BigQueryConnection" "tbl_dbi" "tbl_sql"
#> [4] "tbl_lazy" "tbl"
hp_gt00_tbl %>% dbplyr::sql_render()
#> <SQL> SELECT *
#> FROM `test_dataset.mtcars`
#> WHERE (`hp` > 100.0)
bq_project_query(
x = dataset$project,
query = hp_gt00_tbl %>% dbplyr::sql_render(),
destination = bq_table(dataset, "hp_gt_00")
)
#> <bq_table> elite-magpie-257717.test_dataset.hp_gt_00
bq_dataset_tables(dataset)
#> [[1]]
#> <bq_table> elite-magpie-257717.test_dataset.hp_gt_00
#>
#> [[2]]
#> <bq_table> elite-magpie-257717.test_dataset.mtcars
bq_dataset_delete(dataset, delete_contents = T)
由 reprex package (v0.3.0)
于 2020-11-15 创建