HIVE json 列解析为键和值
HIVE json column parse as key and value
在具有 2 列和 2 条记录的 table 中:
记录 1:第 1 列 - my_col 值为:{"XXX": ["123","456"],"YYY": ["246","135"]}
,第 2 列 - ID 为 A123
记录 2:第 1 列 - my_col 值为:{"ZZZ":["333"]}
,第 2 列 - ID 为 B222
期望:
Key
Value
ID
XXX
123
A123
XXX
456
A123
YYY
246
A123
YYY
135
A123
ZZZ
333
B222
下面的查询只检索键 XXX 和 YYY,我的场景有很多记录。所以查询它自己识别键并在没有指定的情况下展平值
SELECT
t.key,
kv.kval as value,
t.ID
FROM (
SELECT
e.key, e.value, --columns from lateral view
t.id --column from table
FROM
input_df t --add alias
--move explode to lateral view in the FROM
lateral view explode(map(
'XXX',
split(regexp_replace(get_json_object(my_col,'$.XXX'),'"|\[|\]',''),','),
'YYY',
split(regexp_replace(get_json_object(my_col,'$.YYY'),'"|\[|\]',''),',')
)) e as key, value --add alias and col names
) t LATERAL VIEW explode(t.value) kv as kval
代码见注释:
WITH input_df AS (
SELECT 'A123' as id, '{"XXX": ["123","456"],"YYY": ["246","135"]}' my_col union all
select 'B222' as id, '{"ZZZ":["333"]}'
)
select split(element,':')[0] key,
e.value,
id
from
(
SELECT
regexp_replace(e.element,'^\{|"| *\[|\]|\}$','') element, --remove extra chars to get element like this XXX:123,456
t.id
FROM
input_df t
lateral view explode(split(my_col,'(?<=\]) *, *(?=\")')) e as element --split by comma between ] and " with optional spaces
)s lateral view explode(split(split(element,':')[1],',')) e as value
结果:
key value id
XXX 123 A123
XXX 456 A123
YYY 246 A123
YYY 135 A123
ZZZ 333 B222
在具有 2 列和 2 条记录的 table 中:
记录 1:第 1 列 - my_col 值为:{"XXX": ["123","456"],"YYY": ["246","135"]}
,第 2 列 - ID 为 A123
记录 2:第 1 列 - my_col 值为:{"ZZZ":["333"]}
,第 2 列 - ID 为 B222
期望:
Key | Value | ID |
---|---|---|
XXX | 123 | A123 |
XXX | 456 | A123 |
YYY | 246 | A123 |
YYY | 135 | A123 |
ZZZ | 333 | B222 |
下面的查询只检索键 XXX 和 YYY,我的场景有很多记录。所以查询它自己识别键并在没有指定的情况下展平值
SELECT
t.key,
kv.kval as value,
t.ID
FROM (
SELECT
e.key, e.value, --columns from lateral view
t.id --column from table
FROM
input_df t --add alias
--move explode to lateral view in the FROM
lateral view explode(map(
'XXX',
split(regexp_replace(get_json_object(my_col,'$.XXX'),'"|\[|\]',''),','),
'YYY',
split(regexp_replace(get_json_object(my_col,'$.YYY'),'"|\[|\]',''),',')
)) e as key, value --add alias and col names
) t LATERAL VIEW explode(t.value) kv as kval
代码见注释:
WITH input_df AS (
SELECT 'A123' as id, '{"XXX": ["123","456"],"YYY": ["246","135"]}' my_col union all
select 'B222' as id, '{"ZZZ":["333"]}'
)
select split(element,':')[0] key,
e.value,
id
from
(
SELECT
regexp_replace(e.element,'^\{|"| *\[|\]|\}$','') element, --remove extra chars to get element like this XXX:123,456
t.id
FROM
input_df t
lateral view explode(split(my_col,'(?<=\]) *, *(?=\")')) e as element --split by comma between ] and " with optional spaces
)s lateral view explode(split(split(element,':')[1],',')) e as value
结果:
key value id
XXX 123 A123
XXX 456 A123
YYY 246 A123
YYY 135 A123
ZZZ 333 B222