如何使用 Python 将自定义文件解析为 JSON 格式
How to use Python to parse custom file into JSON format
我正在尝试将 machine/software 生成的文件类型解析为 JSON 文件类型,以便于使用其他软件和其他 Python 脚本进行分析。该文件的结构类似于 JSON 文件,但据我所知不能自动转换。
该文件类似于此(.bpf 文件类型):
PACKET fileName.bpf
STYLE 502
last_modified 1651620170 # Tue May 03 19:22:50 2022
STRUCTURE BuildInfo
PARAM Version
Value = 1128
ENDPARAM
PARAM build_height
Units = 1 # Inches
Value = 0.905512
ENDPARAM
PARAM build_time_s
Value = "3:22:53"
ENDPARAM
... # Parameters continue
ENDSTRUCTURE #BuildInfo only called once
STRUCTURE PartInfo
PARAM BandOffset
Category_mask = 65537
GUIName = "Stripe Offset"
Order = 38
Type = 3
Units = 1 # Inches
ZUnits = 1
profile_z= 0.000000 profile_value = 0.243307
ENDPARAM
PARAM Color
B = 0.380000
G = 0.380000
R = 0.380000
UseDefault = 0
ENDPARAM
... # Parameters continue
ENDSTRUCTURE #PartInfo ranges from 1 to however many parts are needed, max ~100
ENDPACKET
checksum 0xa61d
我希望最终产品看起来像这样:
{
"name": "fileName.bpf",
"style": "502",
"last_modified": "",
"BuildInfo": {
"Version": "1128",
"build_height": {
"Units": "1",
"Value": "0.905512"
},
"build_time_s": "3:22:53",
... # parameters continue
},
"PartInfo-001": [
"id": "1" #incremented for each part
"BandOffset": {
"Category_mask": "65537",
"GUIName": "Stripe Offset",
"Order": "38",
"Type": "3",
"Units": "1",
"ZUnits": "1",
"profile_z": "0.000000",
"profile_value": "0.243307",
}
"Color": {
"B": "0.380000",
"G": "0.380000",
"R": "0.380000",
}
... # parameters continue
... # PartInfo repeats
]
}
文件超过 55,000 行,参数太多,无法手动创建字典。我开始编写一个脚本来将一个 PartInfo 的文件的一小部分解析到 python 字典中,然后保存到 JSON 文件,但脚本贯穿文档的 none .
# Python program to convert text
# file to JSON
import json
def main():
# the file to be converted to
# json format
filename = r'samplePartParameters.txt'
# dictionary where the lines from
# text will be stored
partParameters = {}
paramStart = []
paramEnd = []
# creating dictionary
count = 0
with open(filename, 'r') as file:
for currentLine in file.readlines():
if currentLine[0:4:1] == 'PARAM':
paramStart.append(count)
elif currentLine[0:2:1] == 'END':
paramEnd.append(count)
content = file.readlines()
numParam = len(paramEnd)
for paramNum in range(0, numParam-1, 1):
paramName = content[paramNum][6:]
partParameters[paramName] = {}
for propertyNum in range(paramStart[paramNum]+1, paramEnd[paramNum]-1, 1):
splitPos = content[paramNum].find("=")
propertyName = content[paramNum][:,splitPos-1]
propertyVal = content[paramNum][splitPos+1,:]
partParameters[paramName][propertyName] = propertyVal
# creating json file
# the JSON file is named as test1
out_file = open("test1.json", "w")
json.dump(partParameters, out_file, indent = 4, sort_keys = False)
out_file.close()
if __name__ == "__main__":
print("Running.")
main()
print("Done.")
如果您在我的代码中看到错误,或者您知道更简单的方法,请告诉我。
谢谢!
以下 python 模块应该有所帮助。请看例子:
!pip install ttp
from ttp import ttp
import json
data_to_parse = """
PACKET fileName.bpf
STYLE 502
last_modified 1651620170 # Tue May 03 19:22:50 2022
STRUCTURE BuildInfo
PARAM Version
Value = 1128
ENDPARAM
PARAM build_height
Units = 1 # Inches
Value = 0.905512
ENDPARAM
PARAM build_time_s
Value = "3:22:53"
ENDPARAM
... # Parameters continue
ENDSTRUCTURE #BuildInfo only called once
STRUCTURE PartInfo-001
PARAM BandOffset
Category_mask = 65537
GUIName = "Stripe_Offset"
Order = 38
Type = 3
Units = 1 # Inches
ZUnits = 1
profile_z= 0.000000 profile_value = 0.243307
ENDPARAM
PARAM Color
B = 0.380000
G = 0.380000
R = 0.380000
UseDefault = 0
ENDPARAM
... # Parameters continue
ENDSTRUCTURE #PartInfo ranges from 1 to however many parts are needed, max ~100
ENDPACKET
checksum 0xa61d
STRUCTURE PartInfo-002
PARAM BandOffset
Category_mask = 65538
GUIName = "Stripe_Offset"
Order = 39
Type = 4
Units = 2 # Inches
ZUnits = 1
profile_z= 0.100000 profile_value = 0.253307
ENDPARAM
PARAM Color
B = 0.390000
G = 0.390000
R = 0.390000
UseDefault = 0
ENDPARAM
... # Parameters continue
ENDSTRUCTURE #PartInfo ranges from 1 to however many parts are needed, max ~100
ENDPACKET
checksum 0xa61d
"""
ttp_template = """
<group name="MyData">
PACKET {{name}}
STYLE {{style}}
<group name="{{INFO}}">
STRUCTURE {{INFO}}
<group name="{{INFO_TYPE}}">
PARAM {{INFO_TYPE}}
Value = {{Value}}
Units = {{Units}} {{ignore}} {{ignore}}
</group>
</group>
<group name="{{INFO}}">
STRUCTURE {{INFO}}-{{id}}
<group name="{{INFO_TYPE}}">
PARAM {{INFO_TYPE}}
Value = {{Value}}
Units = {{Units}} {{ignore}} {{ignore}}
Category_mask = {{Category_mask}}
GUIName = {{GUIName}}
Order = {{Order}}
Type = {{Type}}
ZUnits = {{ZUnits}}
profile_z= {{profile_z}} profile_value = {{profile_value}}
B = {{B}}
G = {{G}}
R = {{R}}
</group>
</group>
</group>
"""
parser = ttp(data=data_to_parse, template=ttp_template)
parser.parse()
# print result in JSON format
results = parser.result(format='json')[0]
# converting str to json.
result = json.loads(results)
print(results)
查看输出为:
[
{
"MyData": {
"BuildInfo": {
"Version": {
"Value": "1128"
},
"build_height": {
"Units": "1",
"Value": "0.905512"
},
"build_time_s": {
"Value": "\"3:22:53\""
}
},
"PartInfo": [
{
"BandOffset": {
"Category_mask": "65537",
"GUIName": "\"Stripe_Offset\"",
"Order": "38",
"Type": "3",
"Units": "1",
"ZUnits": "1",
"profile_value": "0.243307",
"profile_z": "0.000000"
},
"Color": {
"B": "0.380000",
"G": "0.380000",
"R": "0.380000"
},
"id": "001"
},
{
"BandOffset": {
"Category_mask": "65538",
"GUIName": "\"Stripe_Offset\"",
"Order": "39",
"Type": "4",
"Units": "2",
"ZUnits": "1",
"profile_value": "0.253307",
"profile_z": "0.100000"
},
"Color": {
"B": "0.390000",
"G": "0.390000",
"R": "0.390000"
},
"id": "002"
}
],
"name": "fileName.bpf",
"style": "502"
}
}
]
我正在尝试将 machine/software 生成的文件类型解析为 JSON 文件类型,以便于使用其他软件和其他 Python 脚本进行分析。该文件的结构类似于 JSON 文件,但据我所知不能自动转换。
该文件类似于此(.bpf 文件类型):
PACKET fileName.bpf
STYLE 502
last_modified 1651620170 # Tue May 03 19:22:50 2022
STRUCTURE BuildInfo
PARAM Version
Value = 1128
ENDPARAM
PARAM build_height
Units = 1 # Inches
Value = 0.905512
ENDPARAM
PARAM build_time_s
Value = "3:22:53"
ENDPARAM
... # Parameters continue
ENDSTRUCTURE #BuildInfo only called once
STRUCTURE PartInfo
PARAM BandOffset
Category_mask = 65537
GUIName = "Stripe Offset"
Order = 38
Type = 3
Units = 1 # Inches
ZUnits = 1
profile_z= 0.000000 profile_value = 0.243307
ENDPARAM
PARAM Color
B = 0.380000
G = 0.380000
R = 0.380000
UseDefault = 0
ENDPARAM
... # Parameters continue
ENDSTRUCTURE #PartInfo ranges from 1 to however many parts are needed, max ~100
ENDPACKET
checksum 0xa61d
我希望最终产品看起来像这样:
{
"name": "fileName.bpf",
"style": "502",
"last_modified": "",
"BuildInfo": {
"Version": "1128",
"build_height": {
"Units": "1",
"Value": "0.905512"
},
"build_time_s": "3:22:53",
... # parameters continue
},
"PartInfo-001": [
"id": "1" #incremented for each part
"BandOffset": {
"Category_mask": "65537",
"GUIName": "Stripe Offset",
"Order": "38",
"Type": "3",
"Units": "1",
"ZUnits": "1",
"profile_z": "0.000000",
"profile_value": "0.243307",
}
"Color": {
"B": "0.380000",
"G": "0.380000",
"R": "0.380000",
}
... # parameters continue
... # PartInfo repeats
]
}
文件超过 55,000 行,参数太多,无法手动创建字典。我开始编写一个脚本来将一个 PartInfo 的文件的一小部分解析到 python 字典中,然后保存到 JSON 文件,但脚本贯穿文档的 none .
# Python program to convert text
# file to JSON
import json
def main():
# the file to be converted to
# json format
filename = r'samplePartParameters.txt'
# dictionary where the lines from
# text will be stored
partParameters = {}
paramStart = []
paramEnd = []
# creating dictionary
count = 0
with open(filename, 'r') as file:
for currentLine in file.readlines():
if currentLine[0:4:1] == 'PARAM':
paramStart.append(count)
elif currentLine[0:2:1] == 'END':
paramEnd.append(count)
content = file.readlines()
numParam = len(paramEnd)
for paramNum in range(0, numParam-1, 1):
paramName = content[paramNum][6:]
partParameters[paramName] = {}
for propertyNum in range(paramStart[paramNum]+1, paramEnd[paramNum]-1, 1):
splitPos = content[paramNum].find("=")
propertyName = content[paramNum][:,splitPos-1]
propertyVal = content[paramNum][splitPos+1,:]
partParameters[paramName][propertyName] = propertyVal
# creating json file
# the JSON file is named as test1
out_file = open("test1.json", "w")
json.dump(partParameters, out_file, indent = 4, sort_keys = False)
out_file.close()
if __name__ == "__main__":
print("Running.")
main()
print("Done.")
如果您在我的代码中看到错误,或者您知道更简单的方法,请告诉我。
谢谢!
以下 python 模块应该有所帮助。请看例子:
!pip install ttp
from ttp import ttp
import json
data_to_parse = """
PACKET fileName.bpf
STYLE 502
last_modified 1651620170 # Tue May 03 19:22:50 2022
STRUCTURE BuildInfo
PARAM Version
Value = 1128
ENDPARAM
PARAM build_height
Units = 1 # Inches
Value = 0.905512
ENDPARAM
PARAM build_time_s
Value = "3:22:53"
ENDPARAM
... # Parameters continue
ENDSTRUCTURE #BuildInfo only called once
STRUCTURE PartInfo-001
PARAM BandOffset
Category_mask = 65537
GUIName = "Stripe_Offset"
Order = 38
Type = 3
Units = 1 # Inches
ZUnits = 1
profile_z= 0.000000 profile_value = 0.243307
ENDPARAM
PARAM Color
B = 0.380000
G = 0.380000
R = 0.380000
UseDefault = 0
ENDPARAM
... # Parameters continue
ENDSTRUCTURE #PartInfo ranges from 1 to however many parts are needed, max ~100
ENDPACKET
checksum 0xa61d
STRUCTURE PartInfo-002
PARAM BandOffset
Category_mask = 65538
GUIName = "Stripe_Offset"
Order = 39
Type = 4
Units = 2 # Inches
ZUnits = 1
profile_z= 0.100000 profile_value = 0.253307
ENDPARAM
PARAM Color
B = 0.390000
G = 0.390000
R = 0.390000
UseDefault = 0
ENDPARAM
... # Parameters continue
ENDSTRUCTURE #PartInfo ranges from 1 to however many parts are needed, max ~100
ENDPACKET
checksum 0xa61d
"""
ttp_template = """
<group name="MyData">
PACKET {{name}}
STYLE {{style}}
<group name="{{INFO}}">
STRUCTURE {{INFO}}
<group name="{{INFO_TYPE}}">
PARAM {{INFO_TYPE}}
Value = {{Value}}
Units = {{Units}} {{ignore}} {{ignore}}
</group>
</group>
<group name="{{INFO}}">
STRUCTURE {{INFO}}-{{id}}
<group name="{{INFO_TYPE}}">
PARAM {{INFO_TYPE}}
Value = {{Value}}
Units = {{Units}} {{ignore}} {{ignore}}
Category_mask = {{Category_mask}}
GUIName = {{GUIName}}
Order = {{Order}}
Type = {{Type}}
ZUnits = {{ZUnits}}
profile_z= {{profile_z}} profile_value = {{profile_value}}
B = {{B}}
G = {{G}}
R = {{R}}
</group>
</group>
</group>
"""
parser = ttp(data=data_to_parse, template=ttp_template)
parser.parse()
# print result in JSON format
results = parser.result(format='json')[0]
# converting str to json.
result = json.loads(results)
print(results)
查看输出为:
[
{
"MyData": {
"BuildInfo": {
"Version": {
"Value": "1128"
},
"build_height": {
"Units": "1",
"Value": "0.905512"
},
"build_time_s": {
"Value": "\"3:22:53\""
}
},
"PartInfo": [
{
"BandOffset": {
"Category_mask": "65537",
"GUIName": "\"Stripe_Offset\"",
"Order": "38",
"Type": "3",
"Units": "1",
"ZUnits": "1",
"profile_value": "0.243307",
"profile_z": "0.000000"
},
"Color": {
"B": "0.380000",
"G": "0.380000",
"R": "0.380000"
},
"id": "001"
},
{
"BandOffset": {
"Category_mask": "65538",
"GUIName": "\"Stripe_Offset\"",
"Order": "39",
"Type": "4",
"Units": "2",
"ZUnits": "1",
"profile_value": "0.253307",
"profile_z": "0.100000"
},
"Color": {
"B": "0.390000",
"G": "0.390000",
"R": "0.390000"
},
"id": "002"
}
],
"name": "fileName.bpf",
"style": "502"
}
}
]