将 JSON 转换为 Athena/Presto 中的 ARRAY<MAP>
Transform JSON to to ARRAY<MAP> in Athena/Presto
我在 Athena 中有一个可用的 table,其中有一列 JSON 的结构如下:
{
"455a9410-29a8-48a3-ad22-345afa3cd295":
{
"legacy_id": 1599677886,
"w_ids":
[
"845254682",
"831189092"
]
},
"5e74c911-0b63-4b84-8ad4-77dd9bed7b53":
{
"legacy_id": 1599707069,
"w_ids":
[
"1032024432"
]
},
"7b988890-20ff-4279-94df-198369a58848":
{
"legacy_id": 1601097861,
"w_ids":
[
"1032024432"
]
}
}
我想将其转换为以下格式的 ARRAY:
[
{"new_id"="455a9410-29a8-48a3-ad22-345afa3cd295","legacy_id"=1599677886,"w_ids"=["845254682","831189092"]},
{"new_id"="5e74c911-0b63-4b84-8ad4-77dd9bed7b53","legacy_id"=1599707069,"w_ids"=["1032024432"]},
{"new_id"="7b988890-20ff-4279-94df-198369a58848","legacy_id"=1601097861,"w_ids"=["1032024432"]}
]
我已经能够使用以下语句提取 legacy_id
和 w_ids
,但我很难将原始键添加为值:
with example_data as
(
select * from (
VALUES('{ "455a9410-29a8-48a3-ad22-345afa3cd295": { "legacy_id": 1599677886, "w_ids": [ "845254682", "831189092" ] }, "5e74c911-0b63-4b84-8ad4-77dd9bed7b53": { "legacy_id": 1599707069, "w_ids": [ "1032024432" ] }, "7b988890-20ff-4279-94df-198369a58848": { "legacy_id": 1601097861, "w_ids": [ "1032024432" ] }}')
) as t(col)
)
select *
,transform(map_values(cast(json_parse(col) AS map(varchar, json))),entry -> MAP_FROM_ENTRIES(ARRAY[('legacy_id',json_extract(entry,'$.legacy_id')),('w_ids',json_extract(entry,'$.w_ids'))]))
from example_data;
一种方法是在 transform_values
上使用 map_values
而不是在 map_values
上使用 transform
:
select map_values(
transform_values(
cast(json_parse(col) AS map(varchar, json)),
(key, entry)->MAP_FROM_ENTRIES(
ARRAY [('new_id', cast(key as json)),
('legacy_id', json_extract(entry, '$.legacy_id')),
('w_ids', json_extract(entry, '$.w_ids')) ]
)
)
)
from example_data;
输出:
_col0
[{new_id='455a9410-29a8-48a3-ad22-345afa3cd295', legacy_id=1599677886, w_ids=['845254682','831189092']}, {new_id='5e74c911-0b63-4b84-8ad4-77dd9bed7b53', legacy_id=1599707069, w_ids=['1032024432']}, {new_id='7b988890-20ff-4279-94df-198369a58848', legacy_id=1601097861, w_ids=['1032024432']}]
我在 Athena 中有一个可用的 table,其中有一列 JSON 的结构如下:
{
"455a9410-29a8-48a3-ad22-345afa3cd295":
{
"legacy_id": 1599677886,
"w_ids":
[
"845254682",
"831189092"
]
},
"5e74c911-0b63-4b84-8ad4-77dd9bed7b53":
{
"legacy_id": 1599707069,
"w_ids":
[
"1032024432"
]
},
"7b988890-20ff-4279-94df-198369a58848":
{
"legacy_id": 1601097861,
"w_ids":
[
"1032024432"
]
}
}
我想将其转换为以下格式的 ARRAY:
[
{"new_id"="455a9410-29a8-48a3-ad22-345afa3cd295","legacy_id"=1599677886,"w_ids"=["845254682","831189092"]},
{"new_id"="5e74c911-0b63-4b84-8ad4-77dd9bed7b53","legacy_id"=1599707069,"w_ids"=["1032024432"]},
{"new_id"="7b988890-20ff-4279-94df-198369a58848","legacy_id"=1601097861,"w_ids"=["1032024432"]}
]
我已经能够使用以下语句提取 legacy_id
和 w_ids
,但我很难将原始键添加为值:
with example_data as
(
select * from (
VALUES('{ "455a9410-29a8-48a3-ad22-345afa3cd295": { "legacy_id": 1599677886, "w_ids": [ "845254682", "831189092" ] }, "5e74c911-0b63-4b84-8ad4-77dd9bed7b53": { "legacy_id": 1599707069, "w_ids": [ "1032024432" ] }, "7b988890-20ff-4279-94df-198369a58848": { "legacy_id": 1601097861, "w_ids": [ "1032024432" ] }}')
) as t(col)
)
select *
,transform(map_values(cast(json_parse(col) AS map(varchar, json))),entry -> MAP_FROM_ENTRIES(ARRAY[('legacy_id',json_extract(entry,'$.legacy_id')),('w_ids',json_extract(entry,'$.w_ids'))]))
from example_data;
一种方法是在 transform_values
上使用 map_values
而不是在 map_values
上使用 transform
:
select map_values(
transform_values(
cast(json_parse(col) AS map(varchar, json)),
(key, entry)->MAP_FROM_ENTRIES(
ARRAY [('new_id', cast(key as json)),
('legacy_id', json_extract(entry, '$.legacy_id')),
('w_ids', json_extract(entry, '$.w_ids')) ]
)
)
)
from example_data;
输出:
_col0 |
---|
[{new_id='455a9410-29a8-48a3-ad22-345afa3cd295', legacy_id=1599677886, w_ids=['845254682','831189092']}, {new_id='5e74c911-0b63-4b84-8ad4-77dd9bed7b53', legacy_id=1599707069, w_ids=['1032024432']}, {new_id='7b988890-20ff-4279-94df-198369a58848', legacy_id=1601097861, w_ids=['1032024432']}] |