标准 sql:将数组中的字符串转换为 float64
standard sql: casting strings in array as float64
我正在尝试将字段 GCAM_value 和 Themes_value 转换为数字,例如 float64。
我试过 CAST(regexp_extract(x, r'^(.*?):') as FLOAT64) 但当代码遇到 NULL 值时收到错误消息。
#standardSQL
SELECT
GKGRECORDID,
DATE,
ARRAY(
SELECT regexp_extract(x, r'^(.*?)\:')
FROM UNNEST(split(GCAM,',')) AS x
WHERE regexp_extract(x, r'^(.*?)\:') IS NOT NULL
) AS GCAM_field,
ARRAY(
SELECT regexp_extract(x, r'[^:]*$')
FROM UNNEST(split(GCAM,',')) AS x
WHERE regexp_extract(x, r'[^:]*$') IS NOT NULL
) AS GCAM_value,
ARRAY(
SELECT regexp_extract(x, r'^(.*?)\,')
FROM UNNEST(split(V2Themes,';')) AS x
WHERE regexp_extract(x, r'^(.*?)\,') IS NOT NULL
) AS Theme_field,
ARRAY(
SELECT regexp_extract(x, r'[^,]*$')
FROM UNNEST(split(V2Themes,';')) AS x
WHERE regexp_extract(x, r'[^,]*$') IS NOT NULL
) AS Theme_value,
TranslationInfo,
V2Tone,
SourceCollectionIdentifier,
SourceCommonName,
DocumentIdentifier
FROM `gdelt-bq.gdeltv2.gkg_partitioned`
WHERE _PARTITIONTIME BETWEEN TIMESTAMP('2019-02-02') AND TIMESTAMP('2019-
02-02')
我希望 GCAM_value 和 Themes_value 的数组项采用某种数字格式,如 float64。
下面应该是您不错的起点
#standardSQL
SELECT
GKGRECORDID,
DATE,
ARRAY(
SELECT REGEXP_EXTRACT(x, r'^(.*?)\:')
FROM UNNEST(SPLIT(GCAM,',')) AS x
WHERE REGEXP_EXTRACT(x, r'^(.*?)\:') IS NOT NULL
) AS GCAM_field,
ARRAY(
SELECT CAST(REGEXP_EXTRACT(x, r'[^:]*$') AS FLOAT64)
FROM UNNEST(SPLIT(GCAM,',')) AS x
WHERE IFNULL(REGEXP_EXTRACT(x, r'[^:]*$'), '') != ''
) AS GCAM_value,
ARRAY(
SELECT REGEXP_EXTRACT(x, r'^(.*?)\,')
FROM UNNEST(SPLIT(V2Themes,';')) AS x
WHERE REGEXP_EXTRACT(x, r'^(.*?)\,') IS NOT NULL
) AS Theme_field,
ARRAY(
SELECT CAST(REGEXP_EXTRACT(x, r'[^,]*$') AS FLOAT64)
FROM UNNEST(SPLIT(V2Themes,';')) AS x
WHERE IFNULL(REGEXP_EXTRACT(x, r'[^,]*$'), '') != ''
) AS Theme_value,
TranslationInfo,
V2Tone,
SourceCollectionIdentifier,
SourceCommonName,
DocumentIdentifier
FROM `gdelt-bq.gdeltv2.gkg_partitioned`
WHERE _PARTITIONTIME BETWEEN TIMESTAMP('2019-02-02') AND TIMESTAMP('2019-02-02')
它 returns REPEATED FLOAT
而不是 REPEATED STRING
各个字段
我正在尝试将字段 GCAM_value 和 Themes_value 转换为数字,例如 float64。
我试过 CAST(regexp_extract(x, r'^(.*?):') as FLOAT64) 但当代码遇到 NULL 值时收到错误消息。
#standardSQL
SELECT
GKGRECORDID,
DATE,
ARRAY(
SELECT regexp_extract(x, r'^(.*?)\:')
FROM UNNEST(split(GCAM,',')) AS x
WHERE regexp_extract(x, r'^(.*?)\:') IS NOT NULL
) AS GCAM_field,
ARRAY(
SELECT regexp_extract(x, r'[^:]*$')
FROM UNNEST(split(GCAM,',')) AS x
WHERE regexp_extract(x, r'[^:]*$') IS NOT NULL
) AS GCAM_value,
ARRAY(
SELECT regexp_extract(x, r'^(.*?)\,')
FROM UNNEST(split(V2Themes,';')) AS x
WHERE regexp_extract(x, r'^(.*?)\,') IS NOT NULL
) AS Theme_field,
ARRAY(
SELECT regexp_extract(x, r'[^,]*$')
FROM UNNEST(split(V2Themes,';')) AS x
WHERE regexp_extract(x, r'[^,]*$') IS NOT NULL
) AS Theme_value,
TranslationInfo,
V2Tone,
SourceCollectionIdentifier,
SourceCommonName,
DocumentIdentifier
FROM `gdelt-bq.gdeltv2.gkg_partitioned`
WHERE _PARTITIONTIME BETWEEN TIMESTAMP('2019-02-02') AND TIMESTAMP('2019-
02-02')
我希望 GCAM_value 和 Themes_value 的数组项采用某种数字格式,如 float64。
下面应该是您不错的起点
#standardSQL
SELECT
GKGRECORDID,
DATE,
ARRAY(
SELECT REGEXP_EXTRACT(x, r'^(.*?)\:')
FROM UNNEST(SPLIT(GCAM,',')) AS x
WHERE REGEXP_EXTRACT(x, r'^(.*?)\:') IS NOT NULL
) AS GCAM_field,
ARRAY(
SELECT CAST(REGEXP_EXTRACT(x, r'[^:]*$') AS FLOAT64)
FROM UNNEST(SPLIT(GCAM,',')) AS x
WHERE IFNULL(REGEXP_EXTRACT(x, r'[^:]*$'), '') != ''
) AS GCAM_value,
ARRAY(
SELECT REGEXP_EXTRACT(x, r'^(.*?)\,')
FROM UNNEST(SPLIT(V2Themes,';')) AS x
WHERE REGEXP_EXTRACT(x, r'^(.*?)\,') IS NOT NULL
) AS Theme_field,
ARRAY(
SELECT CAST(REGEXP_EXTRACT(x, r'[^,]*$') AS FLOAT64)
FROM UNNEST(SPLIT(V2Themes,';')) AS x
WHERE IFNULL(REGEXP_EXTRACT(x, r'[^,]*$'), '') != ''
) AS Theme_value,
TranslationInfo,
V2Tone,
SourceCollectionIdentifier,
SourceCommonName,
DocumentIdentifier
FROM `gdelt-bq.gdeltv2.gkg_partitioned`
WHERE _PARTITIONTIME BETWEEN TIMESTAMP('2019-02-02') AND TIMESTAMP('2019-02-02')
它 returns REPEATED FLOAT
而不是 REPEATED STRING
各个字段