存储嵌套 json,其字段在配置单元外部 table 中由 \ 分隔
store nested json whose fields are seperated by \ in hive external table
我嵌套了 JSON,其字段由 \ 分隔,同时将 json 保存到配置单元外部 table 然后我收到错误。
{"value":"{\"DUUID\": 67, \"GUUID\": 514, \"EOT\": 219.0, \"cc\": 3, \"enghr\": 20.0, \"battvolt\": 0.0, \"EOP\": 120.0, \"ts\": \"2020-12-31T14:22:37\", \"ts1\": 1609404757.2771647}"}
以上是我的json消息,保存在hdfs/lambda3/test目录
我在配置单元中编写了如下查询---
> CREATE EXTERNAL TABLE demo1.json11(
> value struct<
>
> DUUID: INTEGER,
> GUUID :INTEGER,
> EOT: Double,
> cc :Double,
> enghr :double,
> battvolt : double,
> EOP : double,
> ts : timestamp,
> ts1 : timestamp >
> )
> ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
> LOCATION 'hdfs://localhost:9000/lambda3/test/';
然后当我发出命令时 select * from json11
然后我收到如下错误消息
Exception in thread "main" java.lang.Error: Data is not JSONObject but java.lang.String with value {"DUUID": 67, "GUUID": 514, "EOT": 219.0, "cc": 3, "enghr": 20.0, "battvolt": 0.0, "EOP": 120.0, "ts": "2020-12-31T14:22:37", "ts1": 1609404757.2771647}
at org.openx.data.jsonserde.objectinspector.JsonStructObjectInspector.getStructFieldData(JsonStructObjectInspector.java:73)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:366)
at org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:202)
at org.apache.hadoop.hive.serde2.DelimitedJSONSerDe.serializeField(DelimitedJSONSerDe.java:61)
at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.doSerialize(LazySimpleSerDe.java:231)
at org.apache.hadoop.hive.serde2.AbstractEncodingAwareSerDe.serialize(AbstractEncodingAwareSerDe.java:55)
at org.apache.hadoop.hive.serde2.DefaultFetchFormatter.convert(DefaultFetchFormatter.java:67)
at org.apache.hadoop.hive.serde2.DefaultFetchFormatter.convert(DefaultFetchFormatter.java:36)
at org.apache.hadoop.hive.ql.exec.ListSinkOperator.process(ListSinkOperator.java:94)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:438)
at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:430)
at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:147)
at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2208)
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:253)
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403)
at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:821)
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:686)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.RunJar.run(RunJar.java:226)
at org.apache.hadoop.util.RunJar.main(RunJar.java:141)
请告诉我如何将那些 JSON 存储到 Hive table。
提前谢谢你
您的 JSON“值”是一个包含 JSON {"value":string
} 的字符串,而不是嵌套的 JSON 结构。
嵌套的 JSON 结构应该如下所示:
{"value": {"DUUID": 67, "GUUID": 514, "EOT": 219.0, "cc": 3, "enghr": 20.0, "battvolt": 0.0, "EOP": 120.0, "ts": "2020-12-31T14:22:37", "ts1": 1609404757.2771647}}
如果您无法修复 JSON,则使用值 STRING 创建 table 并使用 json_tuple:
解析它
CREATE EXTERNAL TABLE demo1.json11(
value string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
LOCATION 'hdfs://localhost:9000/lambda3/test/';
select DUUID, GUUID,EOT,cc,enghr,battvolt,EOP,ts,ts1
from demo1.json11 j
lateral view json_tuple (j.value, 'DUUID', 'GUUID','EOT','cc','enghr','battvolt','EOP','ts','ts1') e
as DUUID, GUUID,EOT,cc,enghr,battvolt,EOP,ts,ts1
必要时转换类型,如下所示:
CAST(DUUID as int) as DUUID,
...
CAST(ts as timestamp) as ts,
CAST(ts1as timestamp) as ts1
我嵌套了 JSON,其字段由 \ 分隔,同时将 json 保存到配置单元外部 table 然后我收到错误。
{"value":"{\"DUUID\": 67, \"GUUID\": 514, \"EOT\": 219.0, \"cc\": 3, \"enghr\": 20.0, \"battvolt\": 0.0, \"EOP\": 120.0, \"ts\": \"2020-12-31T14:22:37\", \"ts1\": 1609404757.2771647}"}
以上是我的json消息,保存在hdfs/lambda3/test目录
我在配置单元中编写了如下查询---
> CREATE EXTERNAL TABLE demo1.json11(
> value struct<
>
> DUUID: INTEGER,
> GUUID :INTEGER,
> EOT: Double,
> cc :Double,
> enghr :double,
> battvolt : double,
> EOP : double,
> ts : timestamp,
> ts1 : timestamp >
> )
> ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
> LOCATION 'hdfs://localhost:9000/lambda3/test/';
然后当我发出命令时 select * from json11
然后我收到如下错误消息
Exception in thread "main" java.lang.Error: Data is not JSONObject but java.lang.String with value {"DUUID": 67, "GUUID": 514, "EOT": 219.0, "cc": 3, "enghr": 20.0, "battvolt": 0.0, "EOP": 120.0, "ts": "2020-12-31T14:22:37", "ts1": 1609404757.2771647}
at org.openx.data.jsonserde.objectinspector.JsonStructObjectInspector.getStructFieldData(JsonStructObjectInspector.java:73)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:366)
at org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:202)
at org.apache.hadoop.hive.serde2.DelimitedJSONSerDe.serializeField(DelimitedJSONSerDe.java:61)
at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.doSerialize(LazySimpleSerDe.java:231)
at org.apache.hadoop.hive.serde2.AbstractEncodingAwareSerDe.serialize(AbstractEncodingAwareSerDe.java:55)
at org.apache.hadoop.hive.serde2.DefaultFetchFormatter.convert(DefaultFetchFormatter.java:67)
at org.apache.hadoop.hive.serde2.DefaultFetchFormatter.convert(DefaultFetchFormatter.java:36)
at org.apache.hadoop.hive.ql.exec.ListSinkOperator.process(ListSinkOperator.java:94)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:438)
at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:430)
at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:147)
at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2208)
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:253)
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403)
at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:821)
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:686)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.RunJar.run(RunJar.java:226)
at org.apache.hadoop.util.RunJar.main(RunJar.java:141)
请告诉我如何将那些 JSON 存储到 Hive table。 提前谢谢你
您的 JSON“值”是一个包含 JSON {"value":string
} 的字符串,而不是嵌套的 JSON 结构。
嵌套的 JSON 结构应该如下所示:
{"value": {"DUUID": 67, "GUUID": 514, "EOT": 219.0, "cc": 3, "enghr": 20.0, "battvolt": 0.0, "EOP": 120.0, "ts": "2020-12-31T14:22:37", "ts1": 1609404757.2771647}}
如果您无法修复 JSON,则使用值 STRING 创建 table 并使用 json_tuple:
解析它CREATE EXTERNAL TABLE demo1.json11(
value string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
LOCATION 'hdfs://localhost:9000/lambda3/test/';
select DUUID, GUUID,EOT,cc,enghr,battvolt,EOP,ts,ts1
from demo1.json11 j
lateral view json_tuple (j.value, 'DUUID', 'GUUID','EOT','cc','enghr','battvolt','EOP','ts','ts1') e
as DUUID, GUUID,EOT,cc,enghr,battvolt,EOP,ts,ts1
必要时转换类型,如下所示:
CAST(DUUID as int) as DUUID,
...
CAST(ts as timestamp) as ts,
CAST(ts1as timestamp) as ts1