在 JSON 文件上配置单元

Hive on JSON File

我在使用 HIVE 解析 JSON 文件时需要帮助。该文件具有嵌套数组,当我尝试使用 HiVE UDF 解析文件或查询时,我只能向下钻取到一个级别。然后下一级数组在我的结果中出现 Null 。我已经给出了下面的例子。文件有几个部分(数组),但下面给出的是最复杂的部分。我尝试使用 get_json_object 来解析,我只能获取一级数据,它根本没有拉出嵌套数组。如果有人指导我解析嵌套的 json 数组

会很有帮助
"section": {
                        "moodCode": "xxx",
                        "classCode": "xxx",
                        "templateId": {
                            "root": "2.xx.840"
                        },
                        "code": {
                            "codeSystemName": "LOINC",

                        },
                        "title": "problems",
                        "text": {
                            "mediaType": "text/x-hl7-text+xml",
                            "list": [{
                                "caption": "Recorded",
                                "item": {
                                    "ID": "pr101",
                                    "content": [{
                                        "ID": "pr101-desc",
                                        "text": "Salm"
                                    },
                                    "003.1"],
                                    "text": "                                              "
                                },
                                "text": "                                        "
                            },
                            {
                                "caption": "Reported",
                                "item": "None Reported",
                                "text": "                                        "
                            }],
                            "text": "                                  "
                        },
                        "entry": {
                            "typeCode": "DRIV",
                            "act": {
                                "moodCode": "EVN",
                                "classCode": "ACT",
                                "templateId": [{
                                    "root": "2.16.840.1.0.1.27"
                                },
                                {
                                    "root": "1.3.6.1.4..1"
                                },
                                {
                                    "root": "1.3.6.1.4.4.5.2"
                                }],
                                "id": {
                                    "root": "068fd4d4-dfa2-48190768f"
                                },
                                "code": {
                                    "nullFlavor": "NA"
                                },
                                "statusCode": {
                                    "code": "completed"
                                },
                                "effectiveTime": {
                                    "low": {
                                        "value": "20140428144743+0100"
                                    },
                                    "high": {
                                        "value": "20140428144743+0100"
                                    },
                                    "text": "                                              "
                                },
                                "entryRelationship": {
                                    "typeCode": "SUBJ",
                                    "observation": {
                                        "moodCode": "EVN",
                                        "classCode": "OBS",
                                        "templateId": [{
                                            "root": "2.16.840.1.20.1.28"
                                        },
                                        {
                                            "root": "1.3.6.1.4.1.5.3.1.4.5"
                                        }],
                                        "id": {
                                            "root": "fa34b4da4dbb-b090-01bd4d6ef62b"
                                        },
                                        "code": {
                                            "codeSystemName": "SNOMED CT",
                                            "code": "282009",
                                            "displayName": "diagnosis",
                                            "codeSystem": "2.16.840.1.6.96"
                                        },
                                        "text": {
                                            "reference": {
                                                "value": "#pr101"
                                            },
                                            "text": "                                      "
                                        },
                                        "statusCode": {
                                            "code": "completed"
                                        },
                                        "effectiveTime": {
                                            "low": {
                                                "value": "20140428144743+0100"
                                            },
                                            "high": {
                                                "nullFlavor": "UNK"
                                            },
                                            "text": "                                                          "
                                        },
                                        "value": {
                                            "codeSystemName": "ICD-9",
                                            "xsi:type": "CD",
                                            "code": "003.1",
                                            "displayName": "Sla sia",
                                            "codeSystem": "2.16.840.1.103",
                                            "originalText": {
                                                "reference": {
                                                    "value": "#pr101-desc"
                                                },
                                                "text": "                                          "
                                            },
                                            "text": "                                      "
                                        },
                                        "entryRelationship": {
                                            "typeCode": "REFR",
                                            "observation": {
                                                "moodCode": "EVN",
                                                "classCode": "OBS",
                                                "templateId": [{
                                                    "root": "2.16.840..1.50"
                                                },
                                                {
                                                    "root": "2.16.8410.20.1.57"
                                                },
                                                {
                                                    "root": "1.3.6.13.1.4.1.1"
                                                }],
                                                "code": {
                                                    "codeSystemName": "LOINC",
                                                    "code": "33999-4",
                                                    "displayName": "Status",
                                                    "codeSystem": "2.16.840.1.113883.6.1"
                                                },
                                                "statusCode": {
                                                    "code": "completed"
                                                },
                                                "value": {
                                                    "codeSystemName": " CT",
                                                    "xsi:type": "CE",
                                                    "code": "55563",
                                                    "displayName": "active",
                                                    "codeSystem": "2.16.8406.96"
                                                },
                                                "text": "                                                                                                                                                        "
                                            },
                                            "text": "                                      "
                                        },
                                        "text": "                                                                                                                                                                                  "
                                    },
                                    "text": "                              "
                                },
                                "text": "                                                                                                                            "
                            },
                            "text": "                      "
                        },
                        "text": "                                                          "
                    },
                    "text": "              "
                },

当我使用下面提到的 get_json_object 数据时 null

"title": "problems",
                        "text": {
                            "mediaType": "text/x-hl7-text+xml",
                            "list": [{
                                "caption": "Recorded",
                                "item": {
                                    "ID": "pr101",
                                    "content": [{
                                        "ID": "pr101-desc",
                                        "text": "Salm"
                                    },
                                    "003.1"],

已更新-您必须以这样一种方式格式化您的 JSON 文件,即每条记录应仅在一行中,例如

'{"section":{"moodCode":"xxx","classCode":"xxx","templateId":{"root":"2.xx.840"},"code":{"codeSystemName":"LOINC"},"title":"problems","text":{"mediaType":"text/x-hl7-text+xml","list":[{"caption":"Recorded","item":{"ID":"pr101","content":[{"ID":"pr101-desc","text":"Salm"},"003.1"],"text":"                                              "},"text":"                                        "},{"caption":"Reported","item":"None Reported","text":"                                        "}],"text":"                                  "},"entry":{"typeCode":"DRIV","act":{"moodCode":"EVN","classCode":"ACT","templateId":[{"root":"2.16.840.1.0.1.27"},{"root":"1.3.6.1.4..1"},{"root":"1.3.6.1.4.4.5.2"}],"id":{"root":"068fd4d4-dfa2-48190768f"},"code":{"nullFlavor":"NA"},"statusCode":{"code":"completed"},"effectiveTime":{"low":{"value":"20140428144743+0100"},"high":{"value":"20140428144743+0100"},"text":""},"entryRelationship":{"typeCode":"SUBJ","observation":{"moodCode":"EVN","classCode":"OBS","templateId":[{"root":"2.16.840.1.20.1.28"},{"root":"1.3.6.1.4.1.5.3.1.4.5"}],"id":{"root":"fa34b4da4dbb-b090-01bd4d6ef62b"},"code":{"codeSystemName":"SNOMEDCT","code":"282009","displayName":"diagnosis","codeSystem":"2.16.840.1.6.96"},"text":"","statusCode":{"code":"completed"},"effectiveTime":{"low":{"value":"20140428144743+0100"},"high":{"nullFlavor":"UNK"},"text":""},"value":{"codeSystemName":"ICD-9","xsi: type":"CD","code":"003.1","displayName":"Slasia","codeSystem":"2.16.840.1.103","originalText":{"reference":{"value":"#pr101-desc"},"text":""},"text":""},"entryRelationship":{"typeCode":"REFR","observation":{"moodCode":"EVN","classCode":"OBS","templateId":[{"root":"2.16.840..1.50"},{"root":"2.16.8410.20.1.57"},{"root":"1.3.6.13.1.4.1.1"}],"code":{"codeSystemName":"LOINC","code":"33999-4","displayName":"Status","codeSystem":"2.16.840.1.113883.6.1"},"statusCode":{"code":"completed"},"value":{"codeSystemName":"CT","xsi: type":"CE","code":"55563","displayName":"active","codeSystem":"2.16.8406.96"},"text":""},"text":""}},"text":""},"text":""},"text":""}},"text":""}'

现在创建一个 table 将整个字符串视为一列

'drop table json_test;
create external table json_test(value string)
LOCATION 'path'; '

您可以横向查看 json_tuple 以获取您现在需要的字段。

'set hive.cli.print.header=true;
 SELECT c.moodCode,c.classCode,d.root,c.title,e.mediaType FROM json_test a  LATERAL VIEW json_tuple(a.value, 'section') b AS section LATERAL VIEW   json_tuple(b.section,'moodCode','classCode','templateId','title','text')c
 AS moodCode,classCode,templateId,title,text LATERAL VIEW json_tuple(c.templateId,'root')d
 AS root LATERAL VIEW json_tuple(c.text,'mediaType')e AS mediaType;'

结果

'c.moodcode|c.classcode|d.root   |c.title |e.mediatype
   xxx     | xxx       |2.xx.840 |problems|text/x-hl7-text+xml'