如何在 Hive 中提取嵌套的 JSON 对象
How to extract nested JSON Object in Hive
我有一个名为“impact”的列,其中包含嵌套 Json 格式的数据
输入:
[{"internalid":"079","impactid":[{"position":"1","typeid":"NOEUD","value":"G1"},{"position":"2","typeid":"ID","value":"001"},{"position":"3","typeid":"CODE_CI","value":"14"}],"typeid":"BTS","cdrs":"X110","belong":"OF","impactclass":"R","count":"0","numberaccessimpacted":"0","impactcalculationrequest":null},{"internalid":"6381075","impactid":[{"position":"1","typeid":"NOEUD","value":"G3"},{"position":"2","typeid":"ID","value":"003"},{"position":"3","typeid":"CI","value":"58"}],"typeid":"BTS","cdrs":"X110","belong":"OF","impactclass":"R","count":"0","numberaccessimpacted":"0","impactcalculationrequest":null},{"internalid":"6381071","impactid":[{"position":"1","typeid":"NOEUD","value":"G2"},{"position":"2","typeid":"IDT","value":"002"},{"position":"3","typeid":"CI","value":"57"}],"typeid":"BTS","cdrs":"X110","belong":"OF","impactclass":"R","count":"0","numberaccessimpacted":"0","impactcalculationrequest":null}]
我使用下面的代码:
SELECT
get_json_object(single_json_table.identifiant, '$.position') AS impact_position,
get_json_object(single_json_table.identifiant, '$.value') AS impact_value
FROM
(SELECT exp2.identifiant
FROM socle s
lateral view explode(split(regexp_replace(substr(impact, 2, length(impact)-2),
'},\{"', '},,,,{"'), ',,,,')) exp2 as identifiant
)single_json_table
这是结果,它跳过了第一个位置和值,有人知道我该如何解决吗?
impact_position | impact_value
(null) (null)
2 001
3 14
(null) (null)
2 003
3 58
(null) (null)
2 002
3 57
您的输入是带有嵌套数组的 JSON。上层数组是整个输入,包含struct> >,impactid
是一个嵌套数组,包含这样的struct元素:{"position":"1","typeid":"NOEUD","value":"G1"}
您需要分解两个数组。首先分解上层数组:更改分隔符、拆分、分解,然后对嵌套数组执行相同的操作。
演示:
with socle as (
select '[{"internalid":"079","impactid":[{"position":"1","typeid":"NOEUD","value":"G1"},{"position":"2","typeid":"ID","value":"001"},{"position":"3","typeid":"CODE_CI","value":"14"}],"typeid":"BTS","cdrs":"X110","belong":"OF","impactclass":"R","count":"0","numberaccessimpacted":"0","impactcalculationrequest":null},{"internalid":"6381075","impactid":[{"position":"1","typeid":"NOEUD","value":"G3"},{"position":"2","typeid":"ID","value":"003"},{"position":"3","typeid":"CI","value":"58"}],"typeid":"BTS","cdrs":"X110","belong":"OF","impactclass":"R","count":"0","numberaccessimpacted":"0","impactcalculationrequest":null},{"internalid":"6381071","impactid":[{"position":"1","typeid":"NOEUD","value":"G2"},{"position":"2","typeid":"IDT","value":"002"},{"position":"3","typeid":"CI","value":"57"}],"typeid":"BTS","cdrs":"X110","belong":"OF","impactclass":"R","count":"0","numberaccessimpacted":"0","impactcalculationrequest":null}]'
as impact
)
select internalid,
get_json_object(e.impact, '$.position') as position,
get_json_object(e.impact, '$.value') as value
from
(
select get_json_object(impacts, '$.internalid') internalid,
--extract inner impact array, remove [], convert delimiters
regexp_replace(regexp_replace(get_json_object(impacts,'$.impactid'),'^\[|\]$',''),'\},\{','},,,,{') impact
from
(
SELECT --First we need to explode upper array. Since it is a string,
--we need to prepare delimiters to be able to explode it
--remove first [ and last ], replace delimiters between inner structure with 4 commas
regexp_replace(regexp_replace(s.impact,'^\[|\]$',''),'\},\{"internalid"','},,,,{"internalid"') upper_array_str
FROM socle s
)s lateral view explode (split(upper_array_str, ',,,,')) e as impacts --get upper array element
)s lateral view explode (split(impact, ',,,,') ) e as impact
结果:
internalid position value
079 1 G1
079 2 001
079 3 14
6381075 1 G3
6381075 2 003
6381075 3 58
6381071 1 G2
6381071 2 002
6381071 3 57
我有一个名为“impact”的列,其中包含嵌套 Json 格式的数据
输入:
[{"internalid":"079","impactid":[{"position":"1","typeid":"NOEUD","value":"G1"},{"position":"2","typeid":"ID","value":"001"},{"position":"3","typeid":"CODE_CI","value":"14"}],"typeid":"BTS","cdrs":"X110","belong":"OF","impactclass":"R","count":"0","numberaccessimpacted":"0","impactcalculationrequest":null},{"internalid":"6381075","impactid":[{"position":"1","typeid":"NOEUD","value":"G3"},{"position":"2","typeid":"ID","value":"003"},{"position":"3","typeid":"CI","value":"58"}],"typeid":"BTS","cdrs":"X110","belong":"OF","impactclass":"R","count":"0","numberaccessimpacted":"0","impactcalculationrequest":null},{"internalid":"6381071","impactid":[{"position":"1","typeid":"NOEUD","value":"G2"},{"position":"2","typeid":"IDT","value":"002"},{"position":"3","typeid":"CI","value":"57"}],"typeid":"BTS","cdrs":"X110","belong":"OF","impactclass":"R","count":"0","numberaccessimpacted":"0","impactcalculationrequest":null}]
我使用下面的代码:
SELECT
get_json_object(single_json_table.identifiant, '$.position') AS impact_position,
get_json_object(single_json_table.identifiant, '$.value') AS impact_value
FROM
(SELECT exp2.identifiant
FROM socle s
lateral view explode(split(regexp_replace(substr(impact, 2, length(impact)-2),
'},\{"', '},,,,{"'), ',,,,')) exp2 as identifiant
)single_json_table
这是结果,它跳过了第一个位置和值,有人知道我该如何解决吗?
impact_position | impact_value
(null) (null)
2 001
3 14
(null) (null)
2 003
3 58
(null) (null)
2 002
3 57
您的输入是带有嵌套数组的 JSON。上层数组是整个输入,包含structimpactid
是一个嵌套数组,包含这样的struct元素:{"position":"1","typeid":"NOEUD","value":"G1"}
您需要分解两个数组。首先分解上层数组:更改分隔符、拆分、分解,然后对嵌套数组执行相同的操作。
演示:
with socle as (
select '[{"internalid":"079","impactid":[{"position":"1","typeid":"NOEUD","value":"G1"},{"position":"2","typeid":"ID","value":"001"},{"position":"3","typeid":"CODE_CI","value":"14"}],"typeid":"BTS","cdrs":"X110","belong":"OF","impactclass":"R","count":"0","numberaccessimpacted":"0","impactcalculationrequest":null},{"internalid":"6381075","impactid":[{"position":"1","typeid":"NOEUD","value":"G3"},{"position":"2","typeid":"ID","value":"003"},{"position":"3","typeid":"CI","value":"58"}],"typeid":"BTS","cdrs":"X110","belong":"OF","impactclass":"R","count":"0","numberaccessimpacted":"0","impactcalculationrequest":null},{"internalid":"6381071","impactid":[{"position":"1","typeid":"NOEUD","value":"G2"},{"position":"2","typeid":"IDT","value":"002"},{"position":"3","typeid":"CI","value":"57"}],"typeid":"BTS","cdrs":"X110","belong":"OF","impactclass":"R","count":"0","numberaccessimpacted":"0","impactcalculationrequest":null}]'
as impact
)
select internalid,
get_json_object(e.impact, '$.position') as position,
get_json_object(e.impact, '$.value') as value
from
(
select get_json_object(impacts, '$.internalid') internalid,
--extract inner impact array, remove [], convert delimiters
regexp_replace(regexp_replace(get_json_object(impacts,'$.impactid'),'^\[|\]$',''),'\},\{','},,,,{') impact
from
(
SELECT --First we need to explode upper array. Since it is a string,
--we need to prepare delimiters to be able to explode it
--remove first [ and last ], replace delimiters between inner structure with 4 commas
regexp_replace(regexp_replace(s.impact,'^\[|\]$',''),'\},\{"internalid"','},,,,{"internalid"') upper_array_str
FROM socle s
)s lateral view explode (split(upper_array_str, ',,,,')) e as impacts --get upper array element
)s lateral view explode (split(impact, ',,,,') ) e as impact
结果:
internalid position value
079 1 G1
079 2 001
079 3 14
6381075 1 G3
6381075 2 003
6381075 3 58
6381071 1 G2
6381071 2 002
6381071 3 57