如何正确解析嵌套 json 和 Python 其中几个键相等
How to correctly parse nested json with Python where several keys are equal
我对 flatten
/ json_normalize
函数有疑问。有一个嵌套的 json,里面有 6 个 "receipts",但是把这个 json 压平只给我 1 行和 1 个收据,这也是最后一个,我需要我的 [=86= 中的所有 6 个]数据框。
[
{
"_index": "packets-2020-02-03",
"_type": "receipts_file",
"_score": null,
"_source": {
"layers": {
"frame": {
"frame.encap_type": "25",
"frame.time": "Feb 3, 2019 00:17:14.004011000 MSK",
"frame.offset_shift": "0.000000000",
"frame.time_epoch": "2575325034.004011000",
"frame.time_delta": "0.002843000",
"frame.time_delta_displayed": "0.002843000",
"frame.time_relative": "0.002852000",
"frame.number": "4",
"frame.len": "1294",
"frame.cap_len": "1294",
"frame.marked": "0",
"frame.ignored": "0",
"frame.protocols": "several"
},
"receipts": {
"receipts.command_length": "238",
"receipts.command_id": "0x00000005",
"receipts.sequence_number": "47207",
"receipts.data_coding": "0x00000000",
"receipts.data_coding_tree": {
"receipts.rps": "0x00000000",
"Receipt Type 1 Data Coding": {
"receipts.rps.rc_coding_group": "0x00000000",
"receipts.rps.text_compression": "0",
"receipts.rps.class_present": "0",
"receipts.rps.charset": "0x00000000"
},
"Receipt Type 2 Data Coding": {
"receipts.rps.rpk._coding_group": "0x00000000",
"receipts.rps.rpk._language": "0x00000000"
}
},
"receipts.rc_default_receipt_id": "0",
"receipts.rc_length": "117",
"receipts.receipt": "29831",
"receipts.opt_params": {
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003002",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "47912"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003001",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "98982"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003004",
"receipts.opt_param_len": "1",
"receipts.vendor_op": "00"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003000",
"receipts.opt_param_len": "4",
"receipts.vendor_op": "23080"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003003",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "29849"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x0000001e",
"receipts.opt_param_len": "9",
"receipts.receipted_receipt_id": "949BB6DE"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00000427",
"receipts.opt_param_len": "1",
"receipts.receipt_state": "2"
}
}
},
"receipts": {
"receipts.command_length": "241",
"receipts.command_id": "0x00000005",
"receipts.sequence_number": "47208",
"receipts.data_coding": "0x00000000",
"receipts.data_coding_tree": {
"receipts.rps": "0x00000000",
"Receipt Type 1 Data Coding": {
"receipts.rps.rc_coding_group": "0x00000000",
"receipts.rps.text_compression": "0",
"receipts.rps.class_present": "0",
"receipts.rps.charset": "0x00000000"
},
"Receipt Type 2 Data Coding": {
"receipts.rps.rpk._coding_group": "0x00000000",
"receipts.rps.rpk._language": "0x00000000"
}
},
"receipts.rc_default_receipt_id": "0",
"receipts.rc_length": "117",
"receipts.receipt": "98341",
"receipts.opt_params": {
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003002",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "38220"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003001",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "93813"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003004",
"receipts.opt_param_len": "1",
"receipts.vendor_op": "00"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003000",
"receipts.opt_param_len": "4",
"receipts.vendor_op": "98381"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003003",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "77371"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x0000001e",
"receipts.opt_param_len": "9",
"receipts.receipted_receipt_id": "6DED391C"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00000427",
"receipts.opt_param_len": "1",
"receipts.receipt_state": "2"
}
}
},
"receipts": {
"receipts.command_length": "238",
"receipts.command_id": "0x00000005",
"receipts.sequence_number": "47209",
"receipts.data_coding": "0x00000000",
"receipts.data_coding_tree": {
"receipts.rps": "0x00000000",
"Receipt Type 1 Data Coding": {
"receipts.rps.rc_coding_group": "0x00000000",
"receipts.rps.text_compression": "0",
"receipts.rps.class_present": "0",
"receipts.rps.charset": "0x00000000"
},
"Receipt Type 2 Data Coding": {
"receipts.rps.rpk._coding_group": "0x00000000",
"receipts.rps.rpk._language": "0x00000000"
}
},
"receipts.rc_default_receipt_id": "0",
"receipts.rc_length": "117",
"receipts.opt_params": {
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003002",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "38717"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003001",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "37788"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003004",
"receipts.opt_param_len": "1",
"receipts.vendor_op": "74818"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003000",
"receipts.opt_param_len": "4",
"receipts.vendor_op": "77812"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003003",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "39999"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x0000001e",
"receipts.opt_param_len": "9",
"receipts.receipted_receipt_id": "273A872F"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00000427",
"receipts.opt_param_len": "1",
"receipts.receipt_state": "2"
}
}
},
"receipts": {
"receipts.command_length": "242",
"receipts.command_id": "0x00000005",
"receipts.sequence_number": "47210",
"receipts.data_coding": "0x00000000",
"receipts.data_coding_tree": {
"receipts.rps": "0x00000000",
"Receipt Type 1 Data Coding": {
"receipts.rps.rc_coding_group": "0x00000000",
"receipts.rps.text_compression": "0",
"receipts.rps.class_present": "0",
"receipts.rps.charset": "0x00000000"
},
"Receipt Type 2 Data Coding": {
"receipts.rps.rpk._coding_group": "0x00000000",
"receipts.rps.rpk._language": "0x00000000"
}
},
"receipts.rc_default_receipt_id": "0",
"receipts.rc_length": "118",
"receipts.receipt": "69322",
"receipts.opt_params": {
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003002",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "83881"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003001",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "73188"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003004",
"receipts.opt_param_len": "1",
"receipts.vendor_op": "00"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003000",
"receipts.opt_param_len": "4",
"receipts.vendor_op": "78881"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003003",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "74388"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x0000001e",
"receipts.opt_param_len": "9",
"receipts.receipted_receipt_id": "949C60DF"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00000427",
"receipts.opt_param_len": "1",
"receipts.receipt_state": "2"
}
}
},
"receipts": {
"receipts.command_length": "238",
"receipts.command_id": "0x00000005",
"receipts.sequence_number": "47211",
"receipts.data_coding": "0x00000000",
"receipts.data_coding_tree": {
"receipts.rps": "0x00000000",
"Receipt Type 1 Data Coding": {
"receipts.rps.rc_coding_group": "0x00000000",
"receipts.rps.text_compression": "0",
"receipts.rps.class_present": "0",
"receipts.rps.charset": "0x00000000"
},
"Receipt Type 2 Data Coding": {
"receipts.rps.rpk._coding_group": "0x00000000",
"receipts.rps.rpk._language": "0x00000000"
}
},
"receipts.rc_default_receipt_id": "0",
"receipts.rc_length": "117",
"receipts.receipt": "12281",
"receipts.opt_params": {
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003002",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "12727"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003001",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "18828"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003004",
"receipts.opt_param_len": "1",
"receipts.vendor_op": "00"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003000",
"receipts.opt_param_len": "4",
"receipts.vendor_op": "38218"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003003",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "47718"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x0000001e",
"receipts.opt_param_len": "9",
"receipts.receipted_receipt_id": "949BD094"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00000427",
"receipts.opt_param_len": "1",
"receipts.receipt_state": "2"
}
}
},
"receipts": {
"receipts.command_length": "25",
"receipts.command_id": "0x80000004",
"receipts.command_status": "0x00000000",
"receipts.sequence_number": "35572",
"receipts.receipt_id": "949C23B8"
}
}
}
}
]
我尝试使用此代码:
import json
import pandas as pd
from flatten_json import flatten
i_file_name = 'example.json'
with open(i_file_name) as fd:
json_data = json.load(fd)
json_data = (flatten(d, '.') for d in json_data)
df = pd.DataFrame(json_data)
df.head()
和
import pandas as pd
i_file_name = 'example.json'
df = pd.read_json(i_file_name)
df = pd.json_normalize(df['_source'])
df.head()
他们给了我相同的结果:只有 1 行,而不是 6。我试图用 json_normalize
设置 record_path
和 meta
但我不知道该怎么做那。我对 json 解析有点陌生,我在这里找不到类似的问题。我知道我需要设置正确的键,但我不知道如何
编辑:
不幸的是,Whosebug 对问题中的表的支持有限,因此我将尝试解释我的预期输出。
现在我只得到一行包含这些列:
- _index
- _类型
- _分数
- _source.layers.frame.*
- _source.source.receipts.*
其中 * 表示同一级别下有多个列
收据。* 仅包含 5 列:
- command_length
- command_id
- command_status
- sequence_number
- receipt_id
我得到的那 1 行包含来自最后 "receipts" 级记录的这些列的值:
"receipts": {
"receipts.command_length": "25",
"receipts.command_id": "0x80000004",
"receipts.command_status": "0x00000000",
"receipts.sequence_number": "35572",
"receipts.receipt_id": "949C23B8"
}
但也有其他"receipts"级的记录,例如:
"receipts": {
"receipts.command_length": "238",
"receipts.command_id": "0x00000005",
"receipts.sequence_number": "47207",
"receipts.data_coding": "0x00000000",
"receipts.data_coding_tree": {
"receipts.rps": "0x00000000",
"Receipt Type 1 Data Coding": {
"receipts.rps.rc_coding_group": "0x00000000",
"receipts.rps.text_compression": "0",
"receipts.rps.class_present": "0",
"receipts.rps.charset": "0x00000000"
},
"Receipt Type 2 Data Coding": {
"receipts.rps.rpk._coding_group": "0x00000000",
"receipts.rps.rpk._language": "0x00000000"
}
},
"receipts.rc_default_receipt_id": "0",
"receipts.rc_length": "117",
"receipts.receipt": "29831",
"receipts.opt_params": {
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003002",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "47912"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003001",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "98982"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003004",
"receipts.opt_param_len": "1",
"receipts.vendor_op": "00"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003000",
"receipts.opt_param_len": "4",
"receipts.vendor_op": "23080"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003003",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "29849"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x0000001e",
"receipts.opt_param_len": "9",
"receipts.receipted_receipt_id": "949BB6DE"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00000427",
"receipts.opt_param_len": "1",
"receipts.receipt_state": "2"
}
}
},
我还想将其视为 pandas 数据框中的行。所以我得到的当前行应该是第 6 行。
我有点理解我的 json 有点坏了,因为它有 6 个同名的不同键(收据),但也许我可以用不同的方式解析它,以便我可以将它导入 Pandas正确
我意识到我没有回答我的问题,但实际上设法解决了它。对于以下代码,我深表歉意,但如果您想解决这样的问题,它可能会有所帮助。我决定宁愿向世界展示我愚蠢的代码,也不愿留下它没有任何解决方案。
首先,我按照我在问题中提到的那样做了:
import pandas as pd
i_file_name = 'example.json'
df = pd.read_json(i_file_name)
df = pd.json_normalize(df['_source'])
然后我将其转换为 json 并再次导入到 Pandas:
df_json = df.to_json(orient='records')
df = pd.read_json(df_json, orient='columns')
然后我融化了一些图层:
df_melt = pd.melt(df, id_vars=['layers.frame.frame.time',
'layers.frame.frame.number'
value_vars=['layers.receipts'])
之后,我用这些融化的值创建了一个新的 DataFrame 并保存了索引,以便稍后加入 2 个数据帧。
df_melt2 = pd.DataFrame(df_melt['value'].values.tolist(), index=df_melt)
然后我将 2 个数据框连接在一起并删除了不再需要的列
df_melt_full = pd.concat([df_melt, df_melt2], axis=1)
df_melt_full = df_melt_full.drop(['value', 'variable'], axis=1)
之后,我又把它融化了(是的,那是我二月份的代码,我很惭愧)
df_melt_full_melt = pd.melt(df_melt_full,
id_vars=['layers.frame.frame.time',
'layers.frame.frame.number']
)
并再次导入
df_normalized = pd.json_normalize(df_melt_full_melt['value'])
然后,最后,我将 2 个数据帧连接在一起并解决了我的问题
df_final = pd.concat([df_melt, df_normalized], axis=1)
我对 flatten
/ json_normalize
函数有疑问。有一个嵌套的 json,里面有 6 个 "receipts",但是把这个 json 压平只给我 1 行和 1 个收据,这也是最后一个,我需要我的 [=86= 中的所有 6 个]数据框。
[
{
"_index": "packets-2020-02-03",
"_type": "receipts_file",
"_score": null,
"_source": {
"layers": {
"frame": {
"frame.encap_type": "25",
"frame.time": "Feb 3, 2019 00:17:14.004011000 MSK",
"frame.offset_shift": "0.000000000",
"frame.time_epoch": "2575325034.004011000",
"frame.time_delta": "0.002843000",
"frame.time_delta_displayed": "0.002843000",
"frame.time_relative": "0.002852000",
"frame.number": "4",
"frame.len": "1294",
"frame.cap_len": "1294",
"frame.marked": "0",
"frame.ignored": "0",
"frame.protocols": "several"
},
"receipts": {
"receipts.command_length": "238",
"receipts.command_id": "0x00000005",
"receipts.sequence_number": "47207",
"receipts.data_coding": "0x00000000",
"receipts.data_coding_tree": {
"receipts.rps": "0x00000000",
"Receipt Type 1 Data Coding": {
"receipts.rps.rc_coding_group": "0x00000000",
"receipts.rps.text_compression": "0",
"receipts.rps.class_present": "0",
"receipts.rps.charset": "0x00000000"
},
"Receipt Type 2 Data Coding": {
"receipts.rps.rpk._coding_group": "0x00000000",
"receipts.rps.rpk._language": "0x00000000"
}
},
"receipts.rc_default_receipt_id": "0",
"receipts.rc_length": "117",
"receipts.receipt": "29831",
"receipts.opt_params": {
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003002",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "47912"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003001",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "98982"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003004",
"receipts.opt_param_len": "1",
"receipts.vendor_op": "00"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003000",
"receipts.opt_param_len": "4",
"receipts.vendor_op": "23080"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003003",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "29849"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x0000001e",
"receipts.opt_param_len": "9",
"receipts.receipted_receipt_id": "949BB6DE"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00000427",
"receipts.opt_param_len": "1",
"receipts.receipt_state": "2"
}
}
},
"receipts": {
"receipts.command_length": "241",
"receipts.command_id": "0x00000005",
"receipts.sequence_number": "47208",
"receipts.data_coding": "0x00000000",
"receipts.data_coding_tree": {
"receipts.rps": "0x00000000",
"Receipt Type 1 Data Coding": {
"receipts.rps.rc_coding_group": "0x00000000",
"receipts.rps.text_compression": "0",
"receipts.rps.class_present": "0",
"receipts.rps.charset": "0x00000000"
},
"Receipt Type 2 Data Coding": {
"receipts.rps.rpk._coding_group": "0x00000000",
"receipts.rps.rpk._language": "0x00000000"
}
},
"receipts.rc_default_receipt_id": "0",
"receipts.rc_length": "117",
"receipts.receipt": "98341",
"receipts.opt_params": {
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003002",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "38220"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003001",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "93813"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003004",
"receipts.opt_param_len": "1",
"receipts.vendor_op": "00"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003000",
"receipts.opt_param_len": "4",
"receipts.vendor_op": "98381"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003003",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "77371"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x0000001e",
"receipts.opt_param_len": "9",
"receipts.receipted_receipt_id": "6DED391C"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00000427",
"receipts.opt_param_len": "1",
"receipts.receipt_state": "2"
}
}
},
"receipts": {
"receipts.command_length": "238",
"receipts.command_id": "0x00000005",
"receipts.sequence_number": "47209",
"receipts.data_coding": "0x00000000",
"receipts.data_coding_tree": {
"receipts.rps": "0x00000000",
"Receipt Type 1 Data Coding": {
"receipts.rps.rc_coding_group": "0x00000000",
"receipts.rps.text_compression": "0",
"receipts.rps.class_present": "0",
"receipts.rps.charset": "0x00000000"
},
"Receipt Type 2 Data Coding": {
"receipts.rps.rpk._coding_group": "0x00000000",
"receipts.rps.rpk._language": "0x00000000"
}
},
"receipts.rc_default_receipt_id": "0",
"receipts.rc_length": "117",
"receipts.opt_params": {
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003002",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "38717"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003001",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "37788"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003004",
"receipts.opt_param_len": "1",
"receipts.vendor_op": "74818"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003000",
"receipts.opt_param_len": "4",
"receipts.vendor_op": "77812"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003003",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "39999"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x0000001e",
"receipts.opt_param_len": "9",
"receipts.receipted_receipt_id": "273A872F"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00000427",
"receipts.opt_param_len": "1",
"receipts.receipt_state": "2"
}
}
},
"receipts": {
"receipts.command_length": "242",
"receipts.command_id": "0x00000005",
"receipts.sequence_number": "47210",
"receipts.data_coding": "0x00000000",
"receipts.data_coding_tree": {
"receipts.rps": "0x00000000",
"Receipt Type 1 Data Coding": {
"receipts.rps.rc_coding_group": "0x00000000",
"receipts.rps.text_compression": "0",
"receipts.rps.class_present": "0",
"receipts.rps.charset": "0x00000000"
},
"Receipt Type 2 Data Coding": {
"receipts.rps.rpk._coding_group": "0x00000000",
"receipts.rps.rpk._language": "0x00000000"
}
},
"receipts.rc_default_receipt_id": "0",
"receipts.rc_length": "118",
"receipts.receipt": "69322",
"receipts.opt_params": {
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003002",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "83881"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003001",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "73188"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003004",
"receipts.opt_param_len": "1",
"receipts.vendor_op": "00"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003000",
"receipts.opt_param_len": "4",
"receipts.vendor_op": "78881"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003003",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "74388"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x0000001e",
"receipts.opt_param_len": "9",
"receipts.receipted_receipt_id": "949C60DF"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00000427",
"receipts.opt_param_len": "1",
"receipts.receipt_state": "2"
}
}
},
"receipts": {
"receipts.command_length": "238",
"receipts.command_id": "0x00000005",
"receipts.sequence_number": "47211",
"receipts.data_coding": "0x00000000",
"receipts.data_coding_tree": {
"receipts.rps": "0x00000000",
"Receipt Type 1 Data Coding": {
"receipts.rps.rc_coding_group": "0x00000000",
"receipts.rps.text_compression": "0",
"receipts.rps.class_present": "0",
"receipts.rps.charset": "0x00000000"
},
"Receipt Type 2 Data Coding": {
"receipts.rps.rpk._coding_group": "0x00000000",
"receipts.rps.rpk._language": "0x00000000"
}
},
"receipts.rc_default_receipt_id": "0",
"receipts.rc_length": "117",
"receipts.receipt": "12281",
"receipts.opt_params": {
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003002",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "12727"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003001",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "18828"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003004",
"receipts.opt_param_len": "1",
"receipts.vendor_op": "00"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003000",
"receipts.opt_param_len": "4",
"receipts.vendor_op": "38218"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003003",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "47718"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x0000001e",
"receipts.opt_param_len": "9",
"receipts.receipted_receipt_id": "949BD094"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00000427",
"receipts.opt_param_len": "1",
"receipts.receipt_state": "2"
}
}
},
"receipts": {
"receipts.command_length": "25",
"receipts.command_id": "0x80000004",
"receipts.command_status": "0x00000000",
"receipts.sequence_number": "35572",
"receipts.receipt_id": "949C23B8"
}
}
}
}
]
我尝试使用此代码:
import json
import pandas as pd
from flatten_json import flatten
i_file_name = 'example.json'
with open(i_file_name) as fd:
json_data = json.load(fd)
json_data = (flatten(d, '.') for d in json_data)
df = pd.DataFrame(json_data)
df.head()
和
import pandas as pd
i_file_name = 'example.json'
df = pd.read_json(i_file_name)
df = pd.json_normalize(df['_source'])
df.head()
他们给了我相同的结果:只有 1 行,而不是 6。我试图用 json_normalize
设置 record_path
和 meta
但我不知道该怎么做那。我对 json 解析有点陌生,我在这里找不到类似的问题。我知道我需要设置正确的键,但我不知道如何
编辑:
不幸的是,Whosebug 对问题中的表的支持有限,因此我将尝试解释我的预期输出。
现在我只得到一行包含这些列:
- _index
- _类型
- _分数
- _source.layers.frame.*
- _source.source.receipts.*
其中 * 表示同一级别下有多个列
收据。* 仅包含 5 列:
- command_length
- command_id
- command_status
- sequence_number
- receipt_id
我得到的那 1 行包含来自最后 "receipts" 级记录的这些列的值:
"receipts": {
"receipts.command_length": "25",
"receipts.command_id": "0x80000004",
"receipts.command_status": "0x00000000",
"receipts.sequence_number": "35572",
"receipts.receipt_id": "949C23B8"
}
但也有其他"receipts"级的记录,例如:
"receipts": {
"receipts.command_length": "238",
"receipts.command_id": "0x00000005",
"receipts.sequence_number": "47207",
"receipts.data_coding": "0x00000000",
"receipts.data_coding_tree": {
"receipts.rps": "0x00000000",
"Receipt Type 1 Data Coding": {
"receipts.rps.rc_coding_group": "0x00000000",
"receipts.rps.text_compression": "0",
"receipts.rps.class_present": "0",
"receipts.rps.charset": "0x00000000"
},
"Receipt Type 2 Data Coding": {
"receipts.rps.rpk._coding_group": "0x00000000",
"receipts.rps.rpk._language": "0x00000000"
}
},
"receipts.rc_default_receipt_id": "0",
"receipts.rc_length": "117",
"receipts.receipt": "29831",
"receipts.opt_params": {
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003002",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "47912"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003001",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "98982"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003004",
"receipts.opt_param_len": "1",
"receipts.vendor_op": "00"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003000",
"receipts.opt_param_len": "4",
"receipts.vendor_op": "23080"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00003003",
"receipts.opt_param_len": "10",
"receipts.vendor_op": "29849"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x0000001e",
"receipts.opt_param_len": "9",
"receipts.receipted_receipt_id": "949BB6DE"
},
"receipts.opt_param": {
"receipts.opt_param_tag": "0x00000427",
"receipts.opt_param_len": "1",
"receipts.receipt_state": "2"
}
}
},
我还想将其视为 pandas 数据框中的行。所以我得到的当前行应该是第 6 行。
我有点理解我的 json 有点坏了,因为它有 6 个同名的不同键(收据),但也许我可以用不同的方式解析它,以便我可以将它导入 Pandas正确
我意识到我没有回答我的问题,但实际上设法解决了它。对于以下代码,我深表歉意,但如果您想解决这样的问题,它可能会有所帮助。我决定宁愿向世界展示我愚蠢的代码,也不愿留下它没有任何解决方案。
首先,我按照我在问题中提到的那样做了:
import pandas as pd
i_file_name = 'example.json'
df = pd.read_json(i_file_name)
df = pd.json_normalize(df['_source'])
然后我将其转换为 json 并再次导入到 Pandas:
df_json = df.to_json(orient='records')
df = pd.read_json(df_json, orient='columns')
然后我融化了一些图层:
df_melt = pd.melt(df, id_vars=['layers.frame.frame.time',
'layers.frame.frame.number'
value_vars=['layers.receipts'])
之后,我用这些融化的值创建了一个新的 DataFrame 并保存了索引,以便稍后加入 2 个数据帧。
df_melt2 = pd.DataFrame(df_melt['value'].values.tolist(), index=df_melt)
然后我将 2 个数据框连接在一起并删除了不再需要的列
df_melt_full = pd.concat([df_melt, df_melt2], axis=1)
df_melt_full = df_melt_full.drop(['value', 'variable'], axis=1)
之后,我又把它融化了(是的,那是我二月份的代码,我很惭愧)
df_melt_full_melt = pd.melt(df_melt_full,
id_vars=['layers.frame.frame.time',
'layers.frame.frame.number']
)
并再次导入
df_normalized = pd.json_normalize(df_melt_full_melt['value'])
然后,最后,我将 2 个数据帧连接在一起并解决了我的问题
df_final = pd.concat([df_melt, df_normalized], axis=1)