具有 AND 条件的 AWS Athena 查询 JSON 数组
AWS Athena query JSON array with AND Condition
我有 JSON 这样的数据保存在 S3 中。我正在使用 ATHENA 编写 select 语句。
{
"sample_data":{
"people":[
{
"firstName":"Emily",
"address":{
"streetAddress":"101",
"city":"abc",
"state":"",
"phoneNumbers":[
{
"type":"home",
"number":"3"
},
{
"type":"city",
"number":"4"
}
]
}
},
{
"firstName":"Smily",
"address":{
"streetAddress":"102",
"city":"def",
"state":"",
"phoneNumbers":[
{
"type":"home",
"number":"1"
},
{
"type":"city",
"number":"1"
}
]
}
}
]
}
}
如何编写 select 语句 selects streetaddress 和 city where home>2 and city=4;
我试过 UNNEST 但没有用。
预期输出:
streetAddress city
101 abc
尝试了这个 UNNEST,但它将电话号码提取到多行。所以不能
按 home 和 city 查询,因为它们现在在 different 行中。
SELECT idx,JSON_EXTRACT_SCALAR(x.n, '$.address.streetaddress') as streetaddress,
JSON_EXTRACT_SCALAR(x.n, '$.address.city') as city, JSON_EXTRACT_SCALAR(x.m, '$.type') as type, JSON_EXTRACT_SCALAR(x.m, '$.number') as value
FROM sample_data1 cross join
UNNEST (CAST(JSON_EXTRACT(sample_data,'$.people') AS ARRAY<JSON>)) AS x(n)
CROSS JOIN
UNNEST (CAST(JSON_EXTRACT(x.n,'$.address.phonenumbers') AS ARRAY<JSON>)) WITH ordinality AS x(m,idx) ;
unnest
将数据展平为多行,因此您可以在不使用数组函数取消嵌套的情况下处理数组。 Athena 当前使用的 Presto 版本不支持 any_match
因此您需要使用 cardinality
+ filter
组合(并且它不支持通过 json 路径进行过滤):
-- sample data
WITH dataset (json_str) AS (
VALUES (
json '{
"firstName":"Emily",
"address":{
"streetAddress":"101",
"city":"abc",
"state":"",
"phoneNumbers":[
{
"type":"home",
"number":"11"
},
{
"type":"city",
"number":"4"
}
]
}
}'
),
(
json '{
"firstName":"Smily",
"address":{
"streetAddress":"102",
"city":"def",
"state":"",
"phoneNumbers":[
{
"type":"home",
"number":"1"
},
{
"type":"city",
"number":"1"
}
]
}
}'
)
) -- query
select street_address,
city
from (
select JSON_EXTRACT_SCALAR(json_str, '$.address.streetAddress') as street_address,
JSON_EXTRACT_SCALAR(json_str, '$.address.city') as city,
cast(
JSON_EXTRACT(json_str, '$.address.phoneNumbers') as array(json)
) phones
from dataset
)
where cardinality(
filter(
phones,
js->json_extract_scalar(js, '$.type') = 'home'
and try_cast(json_extract_scalar(js, '$.number') as integer) > 2
)
) > 0 -- check for home
and
cardinality(
filter(
phones,
js->json_extract_scalar(js, '$.type') = 'city'
and json_extract_scalar(js, '$.number') = '4'
)
) > 0 -- check for city
输出:
street_address
city
101
abc
我有 JSON 这样的数据保存在 S3 中。我正在使用 ATHENA 编写 select 语句。
{
"sample_data":{
"people":[
{
"firstName":"Emily",
"address":{
"streetAddress":"101",
"city":"abc",
"state":"",
"phoneNumbers":[
{
"type":"home",
"number":"3"
},
{
"type":"city",
"number":"4"
}
]
}
},
{
"firstName":"Smily",
"address":{
"streetAddress":"102",
"city":"def",
"state":"",
"phoneNumbers":[
{
"type":"home",
"number":"1"
},
{
"type":"city",
"number":"1"
}
]
}
}
]
}
}
如何编写 select 语句 selects streetaddress 和 city where home>2 and city=4;
我试过 UNNEST 但没有用。
预期输出:
streetAddress city
101 abc
尝试了这个 UNNEST,但它将电话号码提取到多行。所以不能 按 home 和 city 查询,因为它们现在在 different 行中。
SELECT idx,JSON_EXTRACT_SCALAR(x.n, '$.address.streetaddress') as streetaddress,
JSON_EXTRACT_SCALAR(x.n, '$.address.city') as city, JSON_EXTRACT_SCALAR(x.m, '$.type') as type, JSON_EXTRACT_SCALAR(x.m, '$.number') as value
FROM sample_data1 cross join
UNNEST (CAST(JSON_EXTRACT(sample_data,'$.people') AS ARRAY<JSON>)) AS x(n)
CROSS JOIN
UNNEST (CAST(JSON_EXTRACT(x.n,'$.address.phonenumbers') AS ARRAY<JSON>)) WITH ordinality AS x(m,idx) ;
unnest
将数据展平为多行,因此您可以在不使用数组函数取消嵌套的情况下处理数组。 Athena 当前使用的 Presto 版本不支持 any_match
因此您需要使用 cardinality
+ filter
组合(并且它不支持通过 json 路径进行过滤):
-- sample data
WITH dataset (json_str) AS (
VALUES (
json '{
"firstName":"Emily",
"address":{
"streetAddress":"101",
"city":"abc",
"state":"",
"phoneNumbers":[
{
"type":"home",
"number":"11"
},
{
"type":"city",
"number":"4"
}
]
}
}'
),
(
json '{
"firstName":"Smily",
"address":{
"streetAddress":"102",
"city":"def",
"state":"",
"phoneNumbers":[
{
"type":"home",
"number":"1"
},
{
"type":"city",
"number":"1"
}
]
}
}'
)
) -- query
select street_address,
city
from (
select JSON_EXTRACT_SCALAR(json_str, '$.address.streetAddress') as street_address,
JSON_EXTRACT_SCALAR(json_str, '$.address.city') as city,
cast(
JSON_EXTRACT(json_str, '$.address.phoneNumbers') as array(json)
) phones
from dataset
)
where cardinality(
filter(
phones,
js->json_extract_scalar(js, '$.type') = 'home'
and try_cast(json_extract_scalar(js, '$.number') as integer) > 2
)
) > 0 -- check for home
and
cardinality(
filter(
phones,
js->json_extract_scalar(js, '$.type') = 'city'
and json_extract_scalar(js, '$.number') = '4'
)
) > 0 -- check for city
输出:
street_address | city |
---|---|
101 | abc |