如何在 google Bigquery 中使用加载作业时创建日期分区 table?
How to create a date partitioned table while using a loadjob in google Bigquery?
有人可以解释如何使用 JobConfig 在 google Bigquery 中使用加载作业时如何创建日期分区 table。
我看不懂文档,如果有人能用例子解释会很有帮助。
已编辑:
所以我想感谢@irvifa,我找到了对象,但我仍然无法创建 TimePartitioned Table,这是我正在尝试使用的代码。
import pandas
from google.cloud import bigquery
def load_df(self, df):
project_id="ProjectID"
dataset_id="Dataset"
table_id="TableName"
table_ref=project_id+"."+dataset_id+"."+table_id
time_partitioning = bigquery.table.TimePartitioning(field="PartitionColumn")
job_config = bigquery.LoadJobConfig(
schema="Schema",
destinationTable=table_ref
write_disposition="WRITE_TRUNCATE",
timePartitioning=time_partitioning
)
Job = Client.load_table_from_dataframe(df, table_ref,
job_config=job_config)
Job.result()
我不知道它是否有帮助,但您可以使用以下示例来加载带分区的作业:
from datetime import datetime, time
from concurrent import futures
import math
from pathlib import Path
from google.cloud import bigquery
def run_query(self, query_job_config):
time_partitioning = bigquery.table.TimePartitioning(field="partition_date")
job_config = bigquery.QueryJobConfig()
job_config.destination = query_job_config['destination_dataset_table']
job_config.time_partitioning = time_partitioning
job_config.use_legacy_sql = False
job_config.allow_large_results = True
job_config.write_disposition = 'WRITE_APPEND'
sql = query_job_config['sql']
query_job = self.client.query(sql, job_config=job_config)
query_job.result()
感谢 irvifa。
我试图加载数据帧并寻找 LoadJobConfig,但它非常相似。
我会 post 我的回答,以防万一有人需要 LoadJob 的任何示例。
import pandas
from google.cloud import bigquery
def load_df(self, df):
project_id="ProjectID"
dataset_id="Dataset"
table_id="TableName"
table_ref=project_id+"."+dataset_id+"."+table_id
time_partitioning = bigquery.table.TimePartitioning(field="PartitionColumn")
job_config = bigquery.LoadJobConfig(
schema="Schema",
write_disposition="WRITE_TRUNCATE",
time_partitioning=time_partitioning
)
Job = Client.load_table_from_dataframe(df, table_ref,
job_config=job_config)
Job.result()
有人可以解释如何使用 JobConfig 在 google Bigquery 中使用加载作业时如何创建日期分区 table。
我看不懂文档,如果有人能用例子解释会很有帮助。
已编辑: 所以我想感谢@irvifa,我找到了对象,但我仍然无法创建 TimePartitioned Table,这是我正在尝试使用的代码。
import pandas
from google.cloud import bigquery
def load_df(self, df):
project_id="ProjectID"
dataset_id="Dataset"
table_id="TableName"
table_ref=project_id+"."+dataset_id+"."+table_id
time_partitioning = bigquery.table.TimePartitioning(field="PartitionColumn")
job_config = bigquery.LoadJobConfig(
schema="Schema",
destinationTable=table_ref
write_disposition="WRITE_TRUNCATE",
timePartitioning=time_partitioning
)
Job = Client.load_table_from_dataframe(df, table_ref,
job_config=job_config)
Job.result()
我不知道它是否有帮助,但您可以使用以下示例来加载带分区的作业:
from datetime import datetime, time
from concurrent import futures
import math
from pathlib import Path
from google.cloud import bigquery
def run_query(self, query_job_config):
time_partitioning = bigquery.table.TimePartitioning(field="partition_date")
job_config = bigquery.QueryJobConfig()
job_config.destination = query_job_config['destination_dataset_table']
job_config.time_partitioning = time_partitioning
job_config.use_legacy_sql = False
job_config.allow_large_results = True
job_config.write_disposition = 'WRITE_APPEND'
sql = query_job_config['sql']
query_job = self.client.query(sql, job_config=job_config)
query_job.result()
感谢 irvifa。
我试图加载数据帧并寻找 LoadJobConfig,但它非常相似。
我会 post 我的回答,以防万一有人需要 LoadJob 的任何示例。
import pandas
from google.cloud import bigquery
def load_df(self, df):
project_id="ProjectID"
dataset_id="Dataset"
table_id="TableName"
table_ref=project_id+"."+dataset_id+"."+table_id
time_partitioning = bigquery.table.TimePartitioning(field="PartitionColumn")
job_config = bigquery.LoadJobConfig(
schema="Schema",
write_disposition="WRITE_TRUNCATE",
time_partitioning=time_partitioning
)
Job = Client.load_table_from_dataframe(df, table_ref,
job_config=job_config)
Job.result()