如何解析文件并用其内容填充 python 字典
How to parse a file and populate a python dictionary with its content
所以我有以下文件 summary1:
---
Project: pgm1
Last-Status: success
summary: 102 passed, 88 warnings in 26.11s
---
Project: pgm2
Last-Status: failed
summary: 1 failed, 316 passed, 204 warnings in 42.94s
---
Project: pgm3
Last-Status: success
summary: 400 passed, 40 skipped, 1 xfailed in 3.17s
---
我需要解析它的内容,然后在一个循环中,创建一个具有预定义值的字典:
entry = dict()
entry = {
"{#STATUS}": 0,
"{#PASSED}": 0,
"{#FAILED}": 0,
"{#WARNING}": 0,
"{#SKIPPED}": 0,
"{#XFAILED}": 0
}
然后用文件中的解析值填充相应的字典键,结果如下:
entry = {
"{#STATUS}": 1
"{#DESCRIPTION}": "kytos/mef_eline",
"{#PASSED}": 316,
"{#FAILED}": 1,
"{#WARNING}": 0,
"{#SKIPPED}": 0,
"{#XFAILED}": 0,
}... And so on for all 3 Project-Desc data sections in the file
但是我一直无法弄清楚如何解析文件和分配变量,通过搜索,我发现正则表达式是一个很好的工具,但我从来没有以前用过
def break_text(lst_text):
import re
desc = re.findall(r": (.*)", lst_text[1])
status = re.findall(r": (.*)", lst_text[2])
summa = re.findall(r"\d+ \w+", lst_text[3])
return desc, status, summa
def create_dict(lst):
entry = {
"{#Status}": lst[1],
"{#DESCRIPTION}": lst[0],
"{#PASSED}": 0,
"{#FAILED}": 0,
"{#WARNING}": 0,
"{#SKIPPED}": 0,
"{#XFAILED}": 0,
}
dict_temp = {
"passed": "{#PASSED}",
"failed": "{#FAILED}",
"warnings": "{#WARNING}",
"skipped": "{#SKIPPED}",
"xfailed": "{#XFAILED}",
}
for i in lst[2]:
v, k = i.split()
entry[dict_temp[k]] = v
return entry
with open("t.txt", "r") as file:
file = file.read().splitlines()
final_dict={}
c=0
for i in range(0, len(file), 4):#read 4 lines of the file each time
text = file[i : i + 4]
if len(text) <= 1:
continue
res_tmp = break_text(text)
res = create_dict(res_tmp)
final_dict[c]=res
c+=1
print(final_dict)
- 输出
{0: {'{#Status}': ['success'], '{#DESCRIPTION}': ['kytos/mef_eline'], '{#PASSED}': '102', '{#FAILED}': 0, '{#WARNING}': '88', '{#SKIPPED}': 0, '{#XFAILED}': 0}, 1: {'{#Status}': ['failed'], '{#DESCRIPTION}': ['kytos/kytos'], '{#PASSED}': '316', '{#FAILED}': '1', '{#WARNING}': '204', '{#SKIPPED}': 0, '{#XFAILED}': 0}, 2: {'{#Status}': ['success'], '{#DESCRIPTION}': ['kytos/python-openflow'], '{#PASSED}': '400', '{#FAILED}': 0, '{#WARNING}': 0, '{#SKIPPED}': '40', '{#XFAILED}': '1'}}
您可以从文件中的每个条目中解析出各种报告值,并用结果形成单独的字典:
import re, itertools as it
hds = {'passed': '{#PASSED}', 'failed': '{#FAILED}', 'warnings': '{#WARNING}', 'skipped': '{#SKIPPED}', 'xfailed': '{#XFAILED}'}
with open('your_file.txt') as f:
contents = [i.strip('\n') for i in f]
d = [list(b) for a, b in it.groupby(contents, key=lambda x:x == '---') if not a]
def get_dict(entry):
_, [d, s, ps] = zip(*[i.split(': ') for i in entry])
d1 = {i.split()[-1]:i.split()[0] for i in re.findall('\d+\s[a-z]+', ps)}
return {"{#STATUS}":s, "{#DESCRIPTION}":d, **({b:d1.get(a, 0) for a, b in hds.items()})}
result = [get_dict(i) for i in d]
输出
[{'{#STATUS}': 'success', '{#DESCRIPTION}': 'kytos/mef_eline', '{#PASSED}': '102', '{#FAILED}': 0, '{#WARNING}': '88', '{#SKIPPED}': 0, '{#XFAILED}': 0}, {'{#STATUS}': 'failed', '{#DESCRIPTION}': 'kytos/kytos', '{#PASSED}': '316', '{#FAILED}': '1', '{#WARNING}': '204', '{#SKIPPED}': 0, '{#XFAILED}': 0}, {'{#STATUS}': 'success', '{#DESCRIPTION}': 'kytos/python-openflow', '{#PASSED}': '400', '{#FAILED}': 0, '{#WARNING}': 0, '{#SKIPPED}': '40', '{#XFAILED}': '1'}]
这可能有点过头了,但您的文件可以看作是某种 DSL - 一种领域特定语言。也就是说,为什么不自己写一个小解析器,例如在 parsimonious
:
的帮助下
from parsimonious.grammar import Grammar
from parsimonious.grammar import NodeVisitor
import re
data = """
---
Project-Desc: kytos/mef_eline
Last-Status: success
pytest summary: 102 passed, 88 warnings in 26.11s
---
Project-Desc: kytos/kytos
Last-Status: failed
pytest summary: 1 failed, 316 passed, 204 warnings in 42.94s
---
Project-Desc: kytos/python-openflow
Last-Status: success
pytest summary: 400 passed, 40 skipped, 1 xfailed in 3.17s
---
"""
class DSL(NodeVisitor):
rx = re.compile(r'(\d+)\s+(\w+).*')
grammar = Grammar(r"""
content = (block / ws)+
block = sep line*
line = key colon ws value nl?
key = ~"^[^:\n]+"m
value = ~".+"
colon = ":"
nl = ~"[\n\r]+"
sep = "---" nl
ws = ~"\s*"
""")
def generic_visit(self, node, visited_children):
return visited_children or None
def visit_line(self, node, visited_children):
key, _, _, value, _ = visited_children
if key:
if len(value) > 1:
values = {}
for item in value:
item = item.strip()
value, key = self.rx.search(item).groups()
key = "#" + key.upper()
values[key] = value
return values
else:
value = value[0]
if key.endswith("Status"):
return {"#STATUS": value}
elif key.endswith("Desc"):
return {"#DESCRIPTION": value}
def visit_key(self, node, visited_children):
return node.text
def visit_value(self, node, visited_children):
return node.text.split(",")
def visit_block(self, node, visited_children):
_, values = visited_children
return values
def visit_content(self, node, visited_children):
for child in visited_children:
if child[0]:
yield {key: value for dct in child[0] for key, value in dct.items()}
dsl = DSL()
for block in dsl.parse(data):
print(block)
哪个会产生
{'#DESCRIPTION': 'kytos/mef_eline', '#STATUS': 'success', '#PASSED': '102', '#WARNINGS': '88'}
{'#DESCRIPTION': 'kytos/kytos', '#STATUS': 'failed', '#FAILED': '1', '#PASSED': '316', '#WARNINGS': '204'}
{'#DESCRIPTION': 'kytos/python-openflow', '#STATUS': 'success', '#PASSED': '400', '#SKIPPED': '40', '#XFAILED': '1'}
是的,它学习起来更长更复杂但是非常宽容(尝试在任何你想要的地方添加空行)。
所以我有以下文件 summary1:
---
Project: pgm1
Last-Status: success
summary: 102 passed, 88 warnings in 26.11s
---
Project: pgm2
Last-Status: failed
summary: 1 failed, 316 passed, 204 warnings in 42.94s
---
Project: pgm3
Last-Status: success
summary: 400 passed, 40 skipped, 1 xfailed in 3.17s
---
我需要解析它的内容,然后在一个循环中,创建一个具有预定义值的字典:
entry = dict()
entry = {
"{#STATUS}": 0,
"{#PASSED}": 0,
"{#FAILED}": 0,
"{#WARNING}": 0,
"{#SKIPPED}": 0,
"{#XFAILED}": 0
}
然后用文件中的解析值填充相应的字典键,结果如下:
entry = {
"{#STATUS}": 1
"{#DESCRIPTION}": "kytos/mef_eline",
"{#PASSED}": 316,
"{#FAILED}": 1,
"{#WARNING}": 0,
"{#SKIPPED}": 0,
"{#XFAILED}": 0,
}... And so on for all 3 Project-Desc data sections in the file
但是我一直无法弄清楚如何解析文件和分配变量,通过搜索,我发现正则表达式是一个很好的工具,但我从来没有以前用过
def break_text(lst_text):
import re
desc = re.findall(r": (.*)", lst_text[1])
status = re.findall(r": (.*)", lst_text[2])
summa = re.findall(r"\d+ \w+", lst_text[3])
return desc, status, summa
def create_dict(lst):
entry = {
"{#Status}": lst[1],
"{#DESCRIPTION}": lst[0],
"{#PASSED}": 0,
"{#FAILED}": 0,
"{#WARNING}": 0,
"{#SKIPPED}": 0,
"{#XFAILED}": 0,
}
dict_temp = {
"passed": "{#PASSED}",
"failed": "{#FAILED}",
"warnings": "{#WARNING}",
"skipped": "{#SKIPPED}",
"xfailed": "{#XFAILED}",
}
for i in lst[2]:
v, k = i.split()
entry[dict_temp[k]] = v
return entry
with open("t.txt", "r") as file:
file = file.read().splitlines()
final_dict={}
c=0
for i in range(0, len(file), 4):#read 4 lines of the file each time
text = file[i : i + 4]
if len(text) <= 1:
continue
res_tmp = break_text(text)
res = create_dict(res_tmp)
final_dict[c]=res
c+=1
print(final_dict)
- 输出
{0: {'{#Status}': ['success'], '{#DESCRIPTION}': ['kytos/mef_eline'], '{#PASSED}': '102', '{#FAILED}': 0, '{#WARNING}': '88', '{#SKIPPED}': 0, '{#XFAILED}': 0}, 1: {'{#Status}': ['failed'], '{#DESCRIPTION}': ['kytos/kytos'], '{#PASSED}': '316', '{#FAILED}': '1', '{#WARNING}': '204', '{#SKIPPED}': 0, '{#XFAILED}': 0}, 2: {'{#Status}': ['success'], '{#DESCRIPTION}': ['kytos/python-openflow'], '{#PASSED}': '400', '{#FAILED}': 0, '{#WARNING}': 0, '{#SKIPPED}': '40', '{#XFAILED}': '1'}}
您可以从文件中的每个条目中解析出各种报告值,并用结果形成单独的字典:
import re, itertools as it
hds = {'passed': '{#PASSED}', 'failed': '{#FAILED}', 'warnings': '{#WARNING}', 'skipped': '{#SKIPPED}', 'xfailed': '{#XFAILED}'}
with open('your_file.txt') as f:
contents = [i.strip('\n') for i in f]
d = [list(b) for a, b in it.groupby(contents, key=lambda x:x == '---') if not a]
def get_dict(entry):
_, [d, s, ps] = zip(*[i.split(': ') for i in entry])
d1 = {i.split()[-1]:i.split()[0] for i in re.findall('\d+\s[a-z]+', ps)}
return {"{#STATUS}":s, "{#DESCRIPTION}":d, **({b:d1.get(a, 0) for a, b in hds.items()})}
result = [get_dict(i) for i in d]
输出
[{'{#STATUS}': 'success', '{#DESCRIPTION}': 'kytos/mef_eline', '{#PASSED}': '102', '{#FAILED}': 0, '{#WARNING}': '88', '{#SKIPPED}': 0, '{#XFAILED}': 0}, {'{#STATUS}': 'failed', '{#DESCRIPTION}': 'kytos/kytos', '{#PASSED}': '316', '{#FAILED}': '1', '{#WARNING}': '204', '{#SKIPPED}': 0, '{#XFAILED}': 0}, {'{#STATUS}': 'success', '{#DESCRIPTION}': 'kytos/python-openflow', '{#PASSED}': '400', '{#FAILED}': 0, '{#WARNING}': 0, '{#SKIPPED}': '40', '{#XFAILED}': '1'}]
这可能有点过头了,但您的文件可以看作是某种 DSL - 一种领域特定语言。也就是说,为什么不自己写一个小解析器,例如在 parsimonious
:
from parsimonious.grammar import Grammar
from parsimonious.grammar import NodeVisitor
import re
data = """
---
Project-Desc: kytos/mef_eline
Last-Status: success
pytest summary: 102 passed, 88 warnings in 26.11s
---
Project-Desc: kytos/kytos
Last-Status: failed
pytest summary: 1 failed, 316 passed, 204 warnings in 42.94s
---
Project-Desc: kytos/python-openflow
Last-Status: success
pytest summary: 400 passed, 40 skipped, 1 xfailed in 3.17s
---
"""
class DSL(NodeVisitor):
rx = re.compile(r'(\d+)\s+(\w+).*')
grammar = Grammar(r"""
content = (block / ws)+
block = sep line*
line = key colon ws value nl?
key = ~"^[^:\n]+"m
value = ~".+"
colon = ":"
nl = ~"[\n\r]+"
sep = "---" nl
ws = ~"\s*"
""")
def generic_visit(self, node, visited_children):
return visited_children or None
def visit_line(self, node, visited_children):
key, _, _, value, _ = visited_children
if key:
if len(value) > 1:
values = {}
for item in value:
item = item.strip()
value, key = self.rx.search(item).groups()
key = "#" + key.upper()
values[key] = value
return values
else:
value = value[0]
if key.endswith("Status"):
return {"#STATUS": value}
elif key.endswith("Desc"):
return {"#DESCRIPTION": value}
def visit_key(self, node, visited_children):
return node.text
def visit_value(self, node, visited_children):
return node.text.split(",")
def visit_block(self, node, visited_children):
_, values = visited_children
return values
def visit_content(self, node, visited_children):
for child in visited_children:
if child[0]:
yield {key: value for dct in child[0] for key, value in dct.items()}
dsl = DSL()
for block in dsl.parse(data):
print(block)
哪个会产生
{'#DESCRIPTION': 'kytos/mef_eline', '#STATUS': 'success', '#PASSED': '102', '#WARNINGS': '88'}
{'#DESCRIPTION': 'kytos/kytos', '#STATUS': 'failed', '#FAILED': '1', '#PASSED': '316', '#WARNINGS': '204'}
{'#DESCRIPTION': 'kytos/python-openflow', '#STATUS': 'success', '#PASSED': '400', '#SKIPPED': '40', '#XFAILED': '1'}
是的,它学习起来更长更复杂但是非常宽容(尝试在任何你想要的地方添加空行)。