如何用 Python 解析嵌套的 Json?
How to Parsing Nested Json with Python?
我正在尝试解析这个嵌套的 JSON 文件,但我 运行 无法获取我需要的每个元素。
这里是 json 例子:
{
"sensor-time" : {
"timezone" : "New_York",
"time" : "2020-07-15T12:45:02-04:00"
},
"status" : {
"code" : "OK"
},
"content" : {
"element" : [ {
"element-id" : 0,
"element-name" : "Line 0",
"sensor-type" : "SINGLE_SENSOR",
"data-type" : "LINE",
"from" : "2020-07-15T12:30:00-04:00",
"to" : "2020-07-15T12:45:00-04:00",
"resolution" : "FIVE_MINUTES",
"measurement" : [ {
"from" : "2020-07-15T12:30:00-04:00",
"to" : "2020-07-15T12:35:00-04:00",
"value" : [ {
"value" : 1,
"label" : "fw"
}, {
"value" : 2,
"label" : "bw"
} ]
}, {
"from" : "2020-07-15T12:35:00-04:00",
"to" : "2020-07-15T12:40:00-04:00",
"value" : [ {
"value" : 3,
"label" : "fw"
}, {
"value" : 4,
"label" : "bw"
} ]
}, {
"from" : "2020-07-15T12:40:00-04:00",
"to" : "2020-07-15T12:45:00-04:00",
"value" : [ {
"value" : 5,
"label" : "fw"
}, {
"value" : 6,
"label" : "bw"
} ]
} ]
}, {
"element-id" : 1,
"element-name" : "Test Line",
"sensor-type" : "SINGLE_SENSOR",
"data-type" : "LINE",
"from" : "2020-07-15T12:30:00-04:00",
"to" : "2020-07-15T12:45:00-04:00",
"resolution" : "FIVE_MINUTES",
"measurement" : [ {
"from" : "2020-07-15T12:30:00-04:00",
"to" : "2020-07-15T12:35:00-04:00",
"value" : [ {
"value" : 7,
"label" : "fw"
}, {
"value" : 8,
"label" : "bw"
} ]
}, {
"from" : "2020-07-15T12:35:00-04:00",
"to" : "2020-07-15T12:40:00-04:00",
"value" : [ {
"value" : 9,
"label" : "fw"
}, {
"value" : 10,
"label" : "bw"
} ]
}, {
"from" : "2020-07-15T12:40:00-04:00",
"to" : "2020-07-15T12:45:00-04:00",
"value" : [ {
"value" : 11,
"label" : "fw"
}, {
"value" : 12,
"label" : "bw"
} ]
} ]
} ]
},
"sensor-info" : {
"serial-number" : "D7:40:1:7F:4A:72",
"ip-address" : "192.168.130.44",
"name" : "DemoNew",
"group" : "Internal Test Devices",
"device-type" : "PC2"
}
}
我想获取的是每个元素名称的测量数据。请看下面的例子:
这是我尝试过的:
data = {} # element-name ↦ direction ↦ {readings ↦ (timestamp × value) list, meta ↦ name ↦ value}
for element in json_data['content']['element']:
element_name = element['element-name']
element_data = {}
# collect
for measurement in element['measurement']:
dt = datetime.strptime(measurement['to'][:-3]+'00', '%Y-%m-%dT%H:%M:%S%z')
t = mktime(dt.timetuple())
for pair in measurement['value']:
direction = pair['label']
value = pair['value']
if not direction in element_data: element_data[direction] = []
element_data[direction].append( (t, value) )
# insert
metadata = {}
for key in element:
if not key in ['measurement', 'from', 'to']:
metadata[key] = element[key]
data[element_name] = {}
for direction in element_data:
data[element_name][direction] = {'readings': element_data[direction], 'meta': metadata}
camera_metadata = {}
for key in json_data:
if not key in ['content']:
camera_metadata[key] = json_data[key]
她就是我得到的结果:
{'Line 0': {'fw': {'readings': [(1594830900.0, 1),
(1594831200.0, 3),
(1594831500.0, 5)],
'meta': {'element-id': 0,
'element-name': 'Line 0',
'sensor-type': 'SINGLE_SENSOR',
'data-type': 'LINE',
'resolution': 'FIVE_MINUTES'}},
'bw': {'readings': [(1594830900.0, 2), (1594831200.0, 4), (1594831500.0, 6)],
'meta': {'element-id': 0,
'element-name': 'Line 0',
'sensor-type': 'SINGLE_SENSOR',
'data-type': 'LINE',
'resolution': 'FIVE_MINUTES'}}},
'GP Test CL.01': {'fw': {'readings': [(1594830900.0, 7),
(1594831200.0, 9),
(1594831500.0, 11)],
'meta': {'element-id': 1,
'element-name': 'GP Test CL.01',
'sensor-type': 'SINGLE_SENSOR',
'data-type': 'LINE',
'resolution': 'FIVE_MINUTES'}},
'bw': {'readings': [(1594830900.0, 8),
(1594831200.0, 10),
(1594831500.0, 12)],
'meta': {'element-id': 1,
'element-name': 'GP Test CL.01',
'sensor-type': 'SINGLE_SENSOR',
'data-type': 'LINE',
'resolution': 'FIVE_MINUTES'}}}}
我需要调整什么才能使结果看起来像上面的屏幕截图示例?
您试图一次获取一部分信息。但是要将 json 解析为数据框,您需要在嵌套循环中完成所有操作。
result = []
for element in json_data['content']['element']:
for m in element['measurement']:
data = {}
for val in m['value']:
data['SERIAL_NUMBER'] = json_data['sensor-info']['serial-number']
data['IP'] = json_data['sensor-info']['ip-address']
data['name'] = json_data['sensor-info']['name']
data['Group'] = json_data['sensor-info']['group']
data['Device Type'] = json_data['sensor-info']['device-type']
data['element-id'] = element['element-id']
data['Line name'] = element['element-name']
data['From time'] = m['from']
data['to time'] = m['to']
data[val['label']] = val['value']
result.append(data)
df = pd.DataFrame(result)
输出:
SERIAL_NUMBER IP name Group \
0 D7:40:1:7F:4A:72 192.168.130.44 DemoNew Internal Test Devices
1 D7:40:1:7F:4A:72 192.168.130.44 DemoNew Internal Test Devices
2 D7:40:1:7F:4A:72 192.168.130.44 DemoNew Internal Test Devices
3 D7:40:1:7F:4A:72 192.168.130.44 DemoNew Internal Test Devices
4 D7:40:1:7F:4A:72 192.168.130.44 DemoNew Internal Test Devices
5 D7:40:1:7F:4A:72 192.168.130.44 DemoNew Internal Test Devices
Device Type element-id Line name From time \
0 PC2 0 Line 0 2020-07-15T12:30:00-04:00
1 PC2 0 Line 0 2020-07-15T12:35:00-04:00
2 PC2 0 Line 0 2020-07-15T12:40:00-04:00
3 PC2 1 Test Line 2020-07-15T12:30:00-04:00
4 PC2 1 Test Line 2020-07-15T12:35:00-04:00
5 PC2 1 Test Line 2020-07-15T12:40:00-04:00
to time fw bw
0 2020-07-15T12:35:00-04:00 1 2
1 2020-07-15T12:40:00-04:00 3 4
2 2020-07-15T12:45:00-04:00 5 6
3 2020-07-15T12:35:00-04:00 7 8
4 2020-07-15T12:40:00-04:00 9 10
5 2020-07-15T12:45:00-04:00 11 12
如您所见,我没有弄清楚您的时间格式。另外,我认为你切换了“组”和“设备类型”。
我正在尝试解析这个嵌套的 JSON 文件,但我 运行 无法获取我需要的每个元素。
这里是 json 例子:
{
"sensor-time" : {
"timezone" : "New_York",
"time" : "2020-07-15T12:45:02-04:00"
},
"status" : {
"code" : "OK"
},
"content" : {
"element" : [ {
"element-id" : 0,
"element-name" : "Line 0",
"sensor-type" : "SINGLE_SENSOR",
"data-type" : "LINE",
"from" : "2020-07-15T12:30:00-04:00",
"to" : "2020-07-15T12:45:00-04:00",
"resolution" : "FIVE_MINUTES",
"measurement" : [ {
"from" : "2020-07-15T12:30:00-04:00",
"to" : "2020-07-15T12:35:00-04:00",
"value" : [ {
"value" : 1,
"label" : "fw"
}, {
"value" : 2,
"label" : "bw"
} ]
}, {
"from" : "2020-07-15T12:35:00-04:00",
"to" : "2020-07-15T12:40:00-04:00",
"value" : [ {
"value" : 3,
"label" : "fw"
}, {
"value" : 4,
"label" : "bw"
} ]
}, {
"from" : "2020-07-15T12:40:00-04:00",
"to" : "2020-07-15T12:45:00-04:00",
"value" : [ {
"value" : 5,
"label" : "fw"
}, {
"value" : 6,
"label" : "bw"
} ]
} ]
}, {
"element-id" : 1,
"element-name" : "Test Line",
"sensor-type" : "SINGLE_SENSOR",
"data-type" : "LINE",
"from" : "2020-07-15T12:30:00-04:00",
"to" : "2020-07-15T12:45:00-04:00",
"resolution" : "FIVE_MINUTES",
"measurement" : [ {
"from" : "2020-07-15T12:30:00-04:00",
"to" : "2020-07-15T12:35:00-04:00",
"value" : [ {
"value" : 7,
"label" : "fw"
}, {
"value" : 8,
"label" : "bw"
} ]
}, {
"from" : "2020-07-15T12:35:00-04:00",
"to" : "2020-07-15T12:40:00-04:00",
"value" : [ {
"value" : 9,
"label" : "fw"
}, {
"value" : 10,
"label" : "bw"
} ]
}, {
"from" : "2020-07-15T12:40:00-04:00",
"to" : "2020-07-15T12:45:00-04:00",
"value" : [ {
"value" : 11,
"label" : "fw"
}, {
"value" : 12,
"label" : "bw"
} ]
} ]
} ]
},
"sensor-info" : {
"serial-number" : "D7:40:1:7F:4A:72",
"ip-address" : "192.168.130.44",
"name" : "DemoNew",
"group" : "Internal Test Devices",
"device-type" : "PC2"
}
}
我想获取的是每个元素名称的测量数据。请看下面的例子:
这是我尝试过的:
data = {} # element-name ↦ direction ↦ {readings ↦ (timestamp × value) list, meta ↦ name ↦ value}
for element in json_data['content']['element']:
element_name = element['element-name']
element_data = {}
# collect
for measurement in element['measurement']:
dt = datetime.strptime(measurement['to'][:-3]+'00', '%Y-%m-%dT%H:%M:%S%z')
t = mktime(dt.timetuple())
for pair in measurement['value']:
direction = pair['label']
value = pair['value']
if not direction in element_data: element_data[direction] = []
element_data[direction].append( (t, value) )
# insert
metadata = {}
for key in element:
if not key in ['measurement', 'from', 'to']:
metadata[key] = element[key]
data[element_name] = {}
for direction in element_data:
data[element_name][direction] = {'readings': element_data[direction], 'meta': metadata}
camera_metadata = {}
for key in json_data:
if not key in ['content']:
camera_metadata[key] = json_data[key]
她就是我得到的结果:
{'Line 0': {'fw': {'readings': [(1594830900.0, 1),
(1594831200.0, 3),
(1594831500.0, 5)],
'meta': {'element-id': 0,
'element-name': 'Line 0',
'sensor-type': 'SINGLE_SENSOR',
'data-type': 'LINE',
'resolution': 'FIVE_MINUTES'}},
'bw': {'readings': [(1594830900.0, 2), (1594831200.0, 4), (1594831500.0, 6)],
'meta': {'element-id': 0,
'element-name': 'Line 0',
'sensor-type': 'SINGLE_SENSOR',
'data-type': 'LINE',
'resolution': 'FIVE_MINUTES'}}},
'GP Test CL.01': {'fw': {'readings': [(1594830900.0, 7),
(1594831200.0, 9),
(1594831500.0, 11)],
'meta': {'element-id': 1,
'element-name': 'GP Test CL.01',
'sensor-type': 'SINGLE_SENSOR',
'data-type': 'LINE',
'resolution': 'FIVE_MINUTES'}},
'bw': {'readings': [(1594830900.0, 8),
(1594831200.0, 10),
(1594831500.0, 12)],
'meta': {'element-id': 1,
'element-name': 'GP Test CL.01',
'sensor-type': 'SINGLE_SENSOR',
'data-type': 'LINE',
'resolution': 'FIVE_MINUTES'}}}}
我需要调整什么才能使结果看起来像上面的屏幕截图示例?
您试图一次获取一部分信息。但是要将 json 解析为数据框,您需要在嵌套循环中完成所有操作。
result = []
for element in json_data['content']['element']:
for m in element['measurement']:
data = {}
for val in m['value']:
data['SERIAL_NUMBER'] = json_data['sensor-info']['serial-number']
data['IP'] = json_data['sensor-info']['ip-address']
data['name'] = json_data['sensor-info']['name']
data['Group'] = json_data['sensor-info']['group']
data['Device Type'] = json_data['sensor-info']['device-type']
data['element-id'] = element['element-id']
data['Line name'] = element['element-name']
data['From time'] = m['from']
data['to time'] = m['to']
data[val['label']] = val['value']
result.append(data)
df = pd.DataFrame(result)
输出:
SERIAL_NUMBER IP name Group \
0 D7:40:1:7F:4A:72 192.168.130.44 DemoNew Internal Test Devices
1 D7:40:1:7F:4A:72 192.168.130.44 DemoNew Internal Test Devices
2 D7:40:1:7F:4A:72 192.168.130.44 DemoNew Internal Test Devices
3 D7:40:1:7F:4A:72 192.168.130.44 DemoNew Internal Test Devices
4 D7:40:1:7F:4A:72 192.168.130.44 DemoNew Internal Test Devices
5 D7:40:1:7F:4A:72 192.168.130.44 DemoNew Internal Test Devices
Device Type element-id Line name From time \
0 PC2 0 Line 0 2020-07-15T12:30:00-04:00
1 PC2 0 Line 0 2020-07-15T12:35:00-04:00
2 PC2 0 Line 0 2020-07-15T12:40:00-04:00
3 PC2 1 Test Line 2020-07-15T12:30:00-04:00
4 PC2 1 Test Line 2020-07-15T12:35:00-04:00
5 PC2 1 Test Line 2020-07-15T12:40:00-04:00
to time fw bw
0 2020-07-15T12:35:00-04:00 1 2
1 2020-07-15T12:40:00-04:00 3 4
2 2020-07-15T12:45:00-04:00 5 6
3 2020-07-15T12:35:00-04:00 7 8
4 2020-07-15T12:40:00-04:00 9 10
5 2020-07-15T12:45:00-04:00 11 12
如您所见,我没有弄清楚您的时间格式。另外,我认为你切换了“组”和“设备类型”。