BigQuery - 获取 BigQuery 中的总列数 table
BigQuery - Get the total number of columns in a BigQuery table
有没有办法查询 BigQuery table 中的总列数?我浏览了 BigQuery 文档,但没有找到任何相关内容。
提前致谢!
有几种方法可以做到这一点:
一个。使用BQ command line tool, and the JQ linux库解析JSON.
bq --format=json show publicdata:samples.shakespeare | jq '.schema.fields | length'
这个输出:
4
乙。使用 REST api 进行 Tables:get 调用
GET https://www.googleapis.com/bigquery/v2/projects/projectId/datasets/datasetId/tables/tableId
这是一个 returns 完整的 JSON,您可以解析和查询 schema.field 长度。
{
"kind":"bigquery#table",
"description":"This dataset is a word index of the works of Shakespeare, giving the number of times each word appears in each corpus.",
"creationTime":"1335916045099",
"tableReference":{
"projectId":"publicdata",
"tableId":"shakespeare",
"datasetId":"samples"
},
"numRows":"164656",
"numBytes":"6432064",
"etag":"\"E7ZNanj79wmDHI9DmeCWoYoUpAE/MTQxMzkyNjgyNzI1Nw\"",
"lastModifiedTime":"1413926827257",
"type":"TABLE",
"id":"publicdata:samples.shakespeare",
"selfLink":"https://www.googleapis.com/bigquery/v2/projects/publicdata/datasets/samples/tables/shakespeare",
"schema":{
"fields":[
{
"description":"A single unique word (where whitespace is the delimiter) extracted from a corpus.",
"type":"STRING",
"name":"word",
"mode":"REQUIRED"
},
{
"description":"The number of times this word appears in this corpus.",
"type":"INTEGER",
"name":"word_count",
"mode":"REQUIRED"
},
{
"description":"The work from which this word was extracted.",
"type":"STRING",
"name":"corpus",
"mode":"REQUIRED"
},
{
"description":"The year in which this corpus was published.",
"type":"INTEGER",
"name":"corpus_date",
"mode":"REQUIRED"
}
]
}
}
这是一个不需要 JQ 的替代方案,但需要更多 "costly" ;-):
bq --format=csv query "select * FROM publicdata:samples.shakespeare LIMIT 1"|tail -n1|sed 's/[^,]//g' | wc -c
注意:我怀疑这是否适用于包含多个 repeated/nested 列的表。
只需添加一个片段即可获取 python 中的架构:
from gcloud import bigquery
client = bigquery.Client(project="project_id")
dataset = client.list_datasets()
flag=0
for ds in dataset[0]:
if flag==1:
break
if ds.name==<<dataset_name>>:
for table in ds.list_tables()[0]:
if table.name==<<table_name>>:
table.reload()
no_columns = len(table.schema)
flag=1
break
no_columns 变量包含所需 table.
的列长度
这会很有用
#standardSQL
with table1 as(
select "somename1" as name, "someaddress1" adrs union all
select "somename2" as name, "someaddress2" adrs union all
select "somename3" as name, "someaddress3" adrs
)
select array_length(regexp_extract_all(to_json_string(table1),"\":"))total_columns from table1 limit 1
在node.js中我使用这段代码来获取长度:
const { BigQuery } = require('@google-cloud/bigquery');
var params= {bq_project_id : "my_project_id"};//YOUR PROJECT ID
params.bq_dataset_id = "my_dataset_id"; //YOUR DATASET ID
params.bq_table_id = "my_table_id"; //YOUR TABLE ID
params.bq_keyFilename = './my_bq_key.json';//YOUR KEY PATH
const bigquery = new BigQuery({
projectId: params.bq_project_id,
keyFilename: params.bq_keyFilename,
});
async function colNums() {
let resp = await bigquery.dataset(params.bq_dataset_id).table(params.bq_table_id).get();
console.log(resp[1].schema.fields.length)
}
colNums();
我不确定"resp[1]"是否适用于所有人(如果有问题,请尝试查看resp对象)
您现在可以使用 INFORMATION_SCHEMA - 一系列视图,提供对有关数据集、表和视图的元数据的访问
例如
SELECT * EXCEPT(is_generated, generation_expression, is_stored, is_updatable)
FROM `bigquery-public-data.hacker_news.INFORMATION_SCHEMA.COLUMNS`
WHERE table_name = 'stories'
当您需要 RECORD(或 STRUCT)列中的所有嵌套字段时,INFORMATION_SCHEMA.COLUMN_FIELD_PATHS 视图也很有用。
使用 SQL 查询和内置 INFORMATION_SCHEMA 表:
SELECT count(distinct column_name)
FROM `project_id`.name_of_dataset.INFORMATION_SCHEMA.COLUMNS
WHERE table_name = "name_of_table"
有没有办法查询 BigQuery table 中的总列数?我浏览了 BigQuery 文档,但没有找到任何相关内容。
提前致谢!
有几种方法可以做到这一点:
一个。使用BQ command line tool, and the JQ linux库解析JSON.
bq --format=json show publicdata:samples.shakespeare | jq '.schema.fields | length'
这个输出:
4
乙。使用 REST api 进行 Tables:get 调用
GET https://www.googleapis.com/bigquery/v2/projects/projectId/datasets/datasetId/tables/tableId
这是一个 returns 完整的 JSON,您可以解析和查询 schema.field 长度。
{
"kind":"bigquery#table",
"description":"This dataset is a word index of the works of Shakespeare, giving the number of times each word appears in each corpus.",
"creationTime":"1335916045099",
"tableReference":{
"projectId":"publicdata",
"tableId":"shakespeare",
"datasetId":"samples"
},
"numRows":"164656",
"numBytes":"6432064",
"etag":"\"E7ZNanj79wmDHI9DmeCWoYoUpAE/MTQxMzkyNjgyNzI1Nw\"",
"lastModifiedTime":"1413926827257",
"type":"TABLE",
"id":"publicdata:samples.shakespeare",
"selfLink":"https://www.googleapis.com/bigquery/v2/projects/publicdata/datasets/samples/tables/shakespeare",
"schema":{
"fields":[
{
"description":"A single unique word (where whitespace is the delimiter) extracted from a corpus.",
"type":"STRING",
"name":"word",
"mode":"REQUIRED"
},
{
"description":"The number of times this word appears in this corpus.",
"type":"INTEGER",
"name":"word_count",
"mode":"REQUIRED"
},
{
"description":"The work from which this word was extracted.",
"type":"STRING",
"name":"corpus",
"mode":"REQUIRED"
},
{
"description":"The year in which this corpus was published.",
"type":"INTEGER",
"name":"corpus_date",
"mode":"REQUIRED"
}
]
}
}
这是一个不需要 JQ 的替代方案,但需要更多 "costly" ;-):
bq --format=csv query "select * FROM publicdata:samples.shakespeare LIMIT 1"|tail -n1|sed 's/[^,]//g' | wc -c
注意:我怀疑这是否适用于包含多个 repeated/nested 列的表。
只需添加一个片段即可获取 python 中的架构:
from gcloud import bigquery
client = bigquery.Client(project="project_id")
dataset = client.list_datasets()
flag=0
for ds in dataset[0]:
if flag==1:
break
if ds.name==<<dataset_name>>:
for table in ds.list_tables()[0]:
if table.name==<<table_name>>:
table.reload()
no_columns = len(table.schema)
flag=1
break
no_columns 变量包含所需 table.
的列长度这会很有用
#standardSQL
with table1 as(
select "somename1" as name, "someaddress1" adrs union all
select "somename2" as name, "someaddress2" adrs union all
select "somename3" as name, "someaddress3" adrs
)
select array_length(regexp_extract_all(to_json_string(table1),"\":"))total_columns from table1 limit 1
在node.js中我使用这段代码来获取长度:
const { BigQuery } = require('@google-cloud/bigquery');
var params= {bq_project_id : "my_project_id"};//YOUR PROJECT ID
params.bq_dataset_id = "my_dataset_id"; //YOUR DATASET ID
params.bq_table_id = "my_table_id"; //YOUR TABLE ID
params.bq_keyFilename = './my_bq_key.json';//YOUR KEY PATH
const bigquery = new BigQuery({
projectId: params.bq_project_id,
keyFilename: params.bq_keyFilename,
});
async function colNums() {
let resp = await bigquery.dataset(params.bq_dataset_id).table(params.bq_table_id).get();
console.log(resp[1].schema.fields.length)
}
colNums();
我不确定"resp[1]"是否适用于所有人(如果有问题,请尝试查看resp对象)
您现在可以使用 INFORMATION_SCHEMA - 一系列视图,提供对有关数据集、表和视图的元数据的访问
例如
SELECT * EXCEPT(is_generated, generation_expression, is_stored, is_updatable)
FROM `bigquery-public-data.hacker_news.INFORMATION_SCHEMA.COLUMNS`
WHERE table_name = 'stories'
当您需要 RECORD(或 STRUCT)列中的所有嵌套字段时,INFORMATION_SCHEMA.COLUMN_FIELD_PATHS 视图也很有用。
使用 SQL 查询和内置 INFORMATION_SCHEMA 表:
SELECT count(distinct column_name)
FROM `project_id`.name_of_dataset.INFORMATION_SCHEMA.COLUMNS
WHERE table_name = "name_of_table"