将 Erlang 数据解析为 Python 字典
Parsing Erlang data to Python dictionary
我有一个 erlang 脚本,我想从中获取一些数据并将其存储在 python 字典中。
解析脚本很容易得到这样的字符串:
{userdata,
[{tags,
[#dt{number=111},
#mp{id='X23.W'}]},
{log,
'LG22'},
{instruction,
"String that can contain characters like -, _ or numbers"}
]
}.
期望的结果:
userdata = {"tags": {"dt": {"number": 111}, "mp": {"id": "X23.W"}},
"log": "LG22",
"instruction": "String that can contain characters like -, _ or numbers"}
# "#" mark for data in "tags" is not required in this structure.
# Also value for "tags" can be any iterable structure: tuple, list or dictionary.
但我不确定如何将此数据传输到 python 字典中。我的第一个想法是使用 json.loads
,但它需要做很多修改(将单词放在引号中,将“,”替换为“:”等等)。
此外,用户数据中的键不限于某个池。在这种情况下,有 'tags'、'log' 和 'instruction',但可以有更多,例如。 'slogan'、'ids' 等
另外,我不确定订单。我假设键可以随机出现。
我的代码(它不适用于 id='X23.W'
,所以我从输入中删除了“.”):
import re
import json
in_ = """{userdata, [{tags, [#dt{number=111}, #mp{id='X23W'}]}, {log, 'LG22'}, {instruction, "String that can contain characters like -, _ or numbers"}]}"""
buff = in_.replace("{userdata, [", "")[:-2]
re_helper = re.compile(r"(#\w+)")
buff = re_helper.sub(r':', buff)
partition = buff.partition("instruction")
section_to_replace = partition[0]
replacer = re.compile(r"(\w+)")
match = replacer.sub(r'""', section_to_replace)
buff = ''.join([match, '"instruction"', partition[2]])
buff = buff.replace("#", "")
buff = buff.replace('",', '":')
buff = buff.replace("}, {", "}, \n{")
buff = buff.replace("=", ":")
buff = buff.replace("'", "")
temp = buff.split("\n")
userdata = {}
buff = temp[0][:-2]
buff = buff.replace("[", "{")
buff = buff.replace("]", "}")
userdata .update(json.loads(buff))
for i, v in enumerate(temp[1:]):
v = v.strip()
if v.endswith(","):
v = v[:-1]
userdata .update(json.loads(v))
print(userdata)
输出:
{'tags': {'dt': {'number': '111'}, 'mp': {'id': 'X23W'}}, 'instruction': 'String that can contain characters like -, _ or numbers', 'log': 'LG22'}
import json
import re
in_ = """{userdata, [{tags, [#dt{number=111}, #mp{id='X23.W'}]}, {log, 'LG22'}, {instruction, "String that can contain characters like -, _ or numbers"}]}"""
qouted_headers = re.sub(r"\{(\w+),", r'{"":', in_)
changed_hashed_list_to_dict = re.sub(r"\[(#.*?)\]", r'{}', qouted_headers)
hashed_variables = re.sub(r'#(\w+)', r'"":', changed_hashed_list_to_dict)
equality_signes_replaced_and_quoted = re.sub(r'{(\w+)=', r'{"":', hashed_variables)
replace_single_qoutes = equality_signes_replaced_and_quoted.replace('\'', '"')
result = json.loads(replace_single_qoutes)
print(result)
生产:
{'userdata': [{'tags': {'dt': {'number': 111}, 'mp': {'id': 'X23.W'}}}, {'log': 'LG22'}, {'instruction': 'String that can contain characters like -, _ or numbers'}]}
我有一个 erlang 脚本,我想从中获取一些数据并将其存储在 python 字典中。 解析脚本很容易得到这样的字符串:
{userdata,
[{tags,
[#dt{number=111},
#mp{id='X23.W'}]},
{log,
'LG22'},
{instruction,
"String that can contain characters like -, _ or numbers"}
]
}.
期望的结果:
userdata = {"tags": {"dt": {"number": 111}, "mp": {"id": "X23.W"}},
"log": "LG22",
"instruction": "String that can contain characters like -, _ or numbers"}
# "#" mark for data in "tags" is not required in this structure.
# Also value for "tags" can be any iterable structure: tuple, list or dictionary.
但我不确定如何将此数据传输到 python 字典中。我的第一个想法是使用 json.loads
,但它需要做很多修改(将单词放在引号中,将“,”替换为“:”等等)。
此外,用户数据中的键不限于某个池。在这种情况下,有 'tags'、'log' 和 'instruction',但可以有更多,例如。 'slogan'、'ids' 等 另外,我不确定订单。我假设键可以随机出现。
我的代码(它不适用于 id='X23.W'
,所以我从输入中删除了“.”):
import re
import json
in_ = """{userdata, [{tags, [#dt{number=111}, #mp{id='X23W'}]}, {log, 'LG22'}, {instruction, "String that can contain characters like -, _ or numbers"}]}"""
buff = in_.replace("{userdata, [", "")[:-2]
re_helper = re.compile(r"(#\w+)")
buff = re_helper.sub(r':', buff)
partition = buff.partition("instruction")
section_to_replace = partition[0]
replacer = re.compile(r"(\w+)")
match = replacer.sub(r'""', section_to_replace)
buff = ''.join([match, '"instruction"', partition[2]])
buff = buff.replace("#", "")
buff = buff.replace('",', '":')
buff = buff.replace("}, {", "}, \n{")
buff = buff.replace("=", ":")
buff = buff.replace("'", "")
temp = buff.split("\n")
userdata = {}
buff = temp[0][:-2]
buff = buff.replace("[", "{")
buff = buff.replace("]", "}")
userdata .update(json.loads(buff))
for i, v in enumerate(temp[1:]):
v = v.strip()
if v.endswith(","):
v = v[:-1]
userdata .update(json.loads(v))
print(userdata)
输出:
{'tags': {'dt': {'number': '111'}, 'mp': {'id': 'X23W'}}, 'instruction': 'String that can contain characters like -, _ or numbers', 'log': 'LG22'}
import json
import re
in_ = """{userdata, [{tags, [#dt{number=111}, #mp{id='X23.W'}]}, {log, 'LG22'}, {instruction, "String that can contain characters like -, _ or numbers"}]}"""
qouted_headers = re.sub(r"\{(\w+),", r'{"":', in_)
changed_hashed_list_to_dict = re.sub(r"\[(#.*?)\]", r'{}', qouted_headers)
hashed_variables = re.sub(r'#(\w+)', r'"":', changed_hashed_list_to_dict)
equality_signes_replaced_and_quoted = re.sub(r'{(\w+)=', r'{"":', hashed_variables)
replace_single_qoutes = equality_signes_replaced_and_quoted.replace('\'', '"')
result = json.loads(replace_single_qoutes)
print(result)
生产:
{'userdata': [{'tags': {'dt': {'number': 111}, 'mp': {'id': 'X23.W'}}}, {'log': 'LG22'}, {'instruction': 'String that can contain characters like -, _ or numbers'}]}