如何使用 Python 将自定义文件解析为 JSON 格式

How to use Python to parse custom file into JSON format

我正在尝试将 machine/software 生成的文件类型解析为 JSON 文件类型,以便于使用其他软件和其他 Python 脚本进行分析。该文件的结构类似于 JSON 文件,但据我所知不能自动转换。

该文件类似于此(.bpf 文件类型):

PACKET fileName.bpf
    STYLE 502
    last_modified 1651620170 # Tue May 03 19:22:50 2022

    STRUCTURE BuildInfo
        PARAM Version
            Value = 1128
        ENDPARAM
        PARAM build_height
            Units = 1 # Inches
            Value = 0.905512
        ENDPARAM
        PARAM build_time_s
            Value = "3:22:53"
        ENDPARAM
        ... # Parameters continue
    ENDSTRUCTURE #BuildInfo only called once

    STRUCTURE PartInfo
        PARAM BandOffset
            Category_mask = 65537
            GUIName = "Stripe Offset"
            Order = 38
            Type = 3
            Units = 1 # Inches
            ZUnits = 1
            profile_z= 0.000000 profile_value = 0.243307
        ENDPARAM
        PARAM Color
            B = 0.380000
            G = 0.380000
            R = 0.380000
            UseDefault = 0
        ENDPARAM
        ... # Parameters continue
    ENDSTRUCTURE #PartInfo ranges from 1 to however many parts are needed, max ~100
ENDPACKET
checksum 0xa61d

我希望最终产品看起来像这样:

{
    "name": "fileName.bpf",
    "style": "502",
    "last_modified": "",
    "BuildInfo": {
        "Version": "1128",
        "build_height": {
            "Units": "1",
            "Value": "0.905512"
        },
        "build_time_s": "3:22:53",
        ... # parameters continue
    },
    "PartInfo-001": [
        "id": "1" #incremented for each part         
        "BandOffset": {
            "Category_mask": "65537",
            "GUIName": "Stripe Offset",
            "Order": "38",
            "Type": "3",
            "Units": "1",
            "ZUnits": "1",
            "profile_z": "0.000000",
            "profile_value": "0.243307",
        }
        "Color": {
            "B": "0.380000",
            "G": "0.380000",
            "R": "0.380000",
        }
        ... # parameters continue
    ... # PartInfo repeats
    ]
}

文件超过 55,000 行,参数太多,无法手动创建字典。我开始编写一个脚本来将一个 PartInfo 的文件的一小部分解析到 python 字典中,然后保存到 JSON 文件,但脚本贯穿文档的 none .

# Python program to convert text
# file to JSON

import json

def main():
    # the file to be converted to
    # json format
    filename = r'samplePartParameters.txt'


    # dictionary where the lines from
    # text will be stored
    partParameters = {}

    paramStart = []
    paramEnd = []

    # creating dictionary
    count = 0

    with open(filename, 'r') as file:
        for currentLine in file.readlines():
            if currentLine[0:4:1] == 'PARAM':
                paramStart.append(count)

            elif currentLine[0:2:1] == 'END':
                paramEnd.append(count)


        content = file.readlines()
        numParam = len(paramEnd)
        for paramNum in range(0, numParam-1, 1):
            paramName = content[paramNum][6:]
            partParameters[paramName] = {}
            for propertyNum in range(paramStart[paramNum]+1, paramEnd[paramNum]-1, 1):
                splitPos = content[paramNum].find("=")
                propertyName = content[paramNum][:,splitPos-1]
                propertyVal = content[paramNum][splitPos+1,:]
                partParameters[paramName][propertyName] = propertyVal



    # creating json file
    # the JSON file is named as test1
    out_file = open("test1.json", "w")
    json.dump(partParameters, out_file, indent = 4, sort_keys = False)
    out_file.close()

if __name__ == "__main__":
    print("Running.")
    main()
    print("Done.")

如果您在我的代码中看到错误,或者您知道更简单的方法,请告诉我。

谢谢!

以下 python 模块应该有所帮助。请看例子:

!pip install ttp
from ttp import ttp
import json

data_to_parse = """
PACKET fileName.bpf
    STYLE 502
    last_modified 1651620170 # Tue May 03 19:22:50 2022

    STRUCTURE BuildInfo
        PARAM Version
            Value = 1128
        ENDPARAM
        PARAM build_height
            Units = 1 # Inches
            Value = 0.905512
        ENDPARAM
        PARAM build_time_s
            Value = "3:22:53"
        ENDPARAM
        ... # Parameters continue
    ENDSTRUCTURE #BuildInfo only called once

    STRUCTURE PartInfo-001
        PARAM BandOffset
            Category_mask = 65537
            GUIName = "Stripe_Offset"
            Order = 38
            Type = 3
            Units = 1 # Inches
            ZUnits = 1
            profile_z= 0.000000 profile_value = 0.243307
        ENDPARAM
        PARAM Color
            B = 0.380000
            G = 0.380000
            R = 0.380000
            UseDefault = 0
        ENDPARAM
        ... # Parameters continue
    ENDSTRUCTURE #PartInfo ranges from 1 to however many parts are needed, max ~100
ENDPACKET
checksum 0xa61d

    STRUCTURE PartInfo-002
        PARAM BandOffset
            Category_mask = 65538
            GUIName = "Stripe_Offset"
            Order = 39
            Type = 4
            Units = 2 # Inches
            ZUnits = 1
            profile_z= 0.100000 profile_value = 0.253307
        ENDPARAM
        PARAM Color
            B = 0.390000
            G = 0.390000
            R = 0.390000
            UseDefault = 0
        ENDPARAM
        ... # Parameters continue
    ENDSTRUCTURE #PartInfo ranges from 1 to however many parts are needed, max ~100
ENDPACKET
checksum 0xa61d

"""

ttp_template = """
<group name="MyData">
PACKET {{name}}
    STYLE {{style}}
  <group name="{{INFO}}">
    STRUCTURE {{INFO}}
    <group name="{{INFO_TYPE}}">
        PARAM {{INFO_TYPE}}
            Value = {{Value}}
            Units = {{Units}} {{ignore}} {{ignore}}
    </group>
  </group>
  <group name="{{INFO}}">
    STRUCTURE {{INFO}}-{{id}}
    <group name="{{INFO_TYPE}}">
        PARAM {{INFO_TYPE}}
            Value = {{Value}}
            Units = {{Units}} {{ignore}} {{ignore}}
            Category_mask = {{Category_mask}}
            GUIName = {{GUIName}}
            Order = {{Order}}
            Type = {{Type}}
            ZUnits = {{ZUnits}}
            profile_z= {{profile_z}} profile_value = {{profile_value}}
            B = {{B}}
            G = {{G}}
            R = {{R}}
    </group>
  </group>
</group>
"""

parser = ttp(data=data_to_parse, template=ttp_template)
parser.parse()

# print result in JSON format
results = parser.result(format='json')[0]

# converting str to json. 
result = json.loads(results)

print(results)

查看输出为:

[
    {
        "MyData": {
            "BuildInfo": {
                "Version": {
                    "Value": "1128"
                },
                "build_height": {
                    "Units": "1",
                    "Value": "0.905512"
                },
                "build_time_s": {
                    "Value": "\"3:22:53\""
                }
            },
            "PartInfo": [
                {
                    "BandOffset": {
                        "Category_mask": "65537",
                        "GUIName": "\"Stripe_Offset\"",
                        "Order": "38",
                        "Type": "3",
                        "Units": "1",
                        "ZUnits": "1",
                        "profile_value": "0.243307",
                        "profile_z": "0.000000"
                    },
                    "Color": {
                        "B": "0.380000",
                        "G": "0.380000",
                        "R": "0.380000"
                    },
                    "id": "001"
                },
                {
                    "BandOffset": {
                        "Category_mask": "65538",
                        "GUIName": "\"Stripe_Offset\"",
                        "Order": "39",
                        "Type": "4",
                        "Units": "2",
                        "ZUnits": "1",
                        "profile_value": "0.253307",
                        "profile_z": "0.100000"
                    },
                    "Color": {
                        "B": "0.390000",
                        "G": "0.390000",
                        "R": "0.390000"
                    },
                    "id": "002"
                }
            ],
            "name": "fileName.bpf",
            "style": "502"
        }
    }
]