解析 YAML,即使在有序映射中也能获取行号
Parsing YAML, get line numbers even in ordered maps
我需要获取 YAML 文件中某些键的行号。
请注意,this answer does not solve the issue: I do use ruamel.yaml,答案不适用于有序地图。
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from ruamel import yaml
data = yaml.round_trip_load("""
key1: !!omap
- key2: item2
- key3: item3
- key4: !!omap
- key5: item5
- key6: item6
""")
print(data)
结果我得到这个:
CommentedMap([('key1', CommentedOrderedMap([('key2', 'item2'), ('key3', 'item3'), ('key4', CommentedOrderedMap([('key5', 'item5'), ('key6', 'item6')]))]))])
什么不允许访问行号,!!omap
键除外:
print(data['key1'].lc.line) # output: 1
print(data['key1']['key4'].lc.line) # output: 4
但是:
print(data['key1']['key2'].lc.line) # output: AttributeError: 'str' object has no attribute 'lc'
的确,data['key1']['key2]
是 str
。
我找到了解决方法:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from ruamel import yaml
DATA = yaml.round_trip_load("""
key1: !!omap
- key2: item2
- key3: item3
- key4: !!omap
- key5: item5
- key6: item6
""")
def get_line_nb(data):
if isinstance(data, dict):
offset = data.lc.line
for i, key in enumerate(data):
if isinstance(data[key], dict):
get_line_nb(data[key])
else:
print('{}|{} found in line {}\n'
.format(key, data[key], offset + i + 1))
get_line_nb(DATA)
输出:
key2|item2 found in line 2
key3|item3 found in line 3
key5|item5 found in line 5
key6|item6 found in line 6
但这看起来有点"dirty"。有没有更合适的做法?
编辑:此解决方法不仅脏,而且仅适用于上述简单情况,并且一旦出现嵌套列表就会给出错误结果
这个问题不是因为您使用的是 !omap
,而是它没有像 "normal" 映射那样为您提供 line-numbers。从执行 print(data['key1']['key4'].lc.line)
得到 4(其中 key4
是外部 !omap
中的键)这一事实应该很清楚。
如 this 答案所示,
you can access the property lc on collection items
data['key1']['key4']
的值是 collection 项目(另一个 !omap
),但 data['key1']['key2']
的值不是 collection 项目而是, built-in, python 字符串, 没有存储 lc
属性的槽.
要在 non-collection 上获得一个 .lc
属性,就像字符串一样,您必须子类化 RoundTripConstructor
,才能使用 [=21= 中的 类 之类的东西](with__slots__
调整为接受lc
属性然后将节点中可用的行信息传递给该属性然后设置行,列信息:
import sys
import ruamel.yaml
yaml_str = """
key1: !!omap
- key2: item2
- key3: item3
- key4: !!omap
- key5: 'item5'
- key6: |
item6
"""
class Str(ruamel.yaml.scalarstring.ScalarString):
__slots__ = ('lc')
style = ""
def __new__(cls, value):
return ruamel.yaml.scalarstring.ScalarString.__new__(cls, value)
class MyPreservedScalarString(ruamel.yaml.scalarstring.PreservedScalarString):
__slots__ = ('lc')
class MyDoubleQuotedScalarString(ruamel.yaml.scalarstring.DoubleQuotedScalarString):
__slots__ = ('lc')
class MySingleQuotedScalarString(ruamel.yaml.scalarstring.SingleQuotedScalarString):
__slots__ = ('lc')
class MyConstructor(ruamel.yaml.constructor.RoundTripConstructor):
def construct_scalar(self, node):
# type: (Any) -> Any
if not isinstance(node, ruamel.yaml.nodes.ScalarNode):
raise ruamel.yaml.constructor.ConstructorError(
None, None,
"expected a scalar node, but found %s" % node.id,
node.start_mark)
if node.style == '|' and isinstance(node.value, ruamel.yaml.compat.text_type):
ret_val = MyPreservedScalarString(node.value)
elif bool(self._preserve_quotes) and isinstance(node.value, ruamel.yaml.compat.text_type):
if node.style == "'":
ret_val = MySingleQuotedScalarString(node.value)
elif node.style == '"':
ret_val = MyDoubleQuotedScalarString(node.value)
else:
ret_val = Str(node.value)
else:
ret_val = Str(node.value)
ret_val.lc = ruamel.yaml.comments.LineCol()
ret_val.lc.line = node.start_mark.line
ret_val.lc.col = node.start_mark.column
return ret_val
yaml = ruamel.yaml.YAML()
yaml.Constructor = MyConstructor
data = yaml.load(yaml_str)
print(data['key1']['key4'].lc.line)
print(data['key1']['key2'].lc.line)
print(data['key1']['key4']['key6'].lc.line)
请注意,最后一次调用 print
的输出是 6,因为文字标量字符串以 |
.
开头
如果您还想转储 data
,您需要让 Representer
知道那些 My....
类型。
我修改了基于ruamel.yaml版本0.17.17的@Anthon解决方案,处理标量、int和bool位置。
class MyLiteralScalarString(ruamel.yaml.scalarstring.LiteralScalarString):
__slots__ = ('comment', 'lc')
class MyFoldedScalarString(ruamel.yaml.scalarstring.FoldedScalarString):
__slots__ = ('fold_pos', 'comment', 'lc')
class MyDoubleQuotedScalarString(ruamel.yaml.scalarstring.DoubleQuotedScalarString):
__slots__ = ('lc')
class MySingleQuotedScalarString(ruamel.yaml.scalarstring.SingleQuotedScalarString):
__slots__ = ('lc')
class MyPlainScalarString(ruamel.yaml.scalarstring.PlainScalarString):
__slots__ = ('lc')
class MyScalarInt(ruamel.yaml.scalarint.ScalarInt):
lc = None
class MyScalarBoolean(ruamel.yaml.scalarbool.ScalarBoolean):
lc = None
class MyConstructor(ruamel.yaml.constructor.RoundTripConstructor):
def __init__(self, preserve_quotes=None, loader=None):
super(MyConstructor, self).__init__(preserve_quotes=preserve_quotes, loader=loader)
if not hasattr(self.loader, 'comment_handling'):
self.loader.comment_handling = None
def construct_scalar(self, node):
# type: (Any) -> Any
if not isinstance(node, ScalarNode):
raise ConstructorError(None, None, _F('expected a scalar node, but found {node_id!s}', node_id=node.id),
node.start_mark,)
ret_val = None
if node.style == '|' and isinstance(node.value, str):
lss = MyLiteralScalarString(node.value, anchor=node.anchor)
if self.loader and self.loader.comment_handling is None:
if node.comment and node.comment[1]:
lss.comment = node.comment[1][0] # type: ignore
else:
# NEWCMNT
if node.comment is not None and node.comment[1]:
# nprintf('>>>>nc1', node.comment)
# EOL comment after |
lss.comment = self.comment(node.comment[1][0]) # type: ignore
ret_val = lss
elif node.style == '>' and isinstance(node.value, str):
fold_positions = [] # type: List[int]
idx = -1
while True:
idx = node.value.find('\a', idx + 1)
if idx < 0:
break
fold_positions.append(idx - len(fold_positions))
fss = MyFoldedScalarString(node.value.replace('\a', ''), anchor=node.anchor)
if self.loader and self.loader.comment_handling is None:
if node.comment and node.comment[1]:
fss.comment = node.comment[1][0] # type: ignore
else:
# NEWCMNT
if node.comment is not None and node.comment[1]:
# nprintf('>>>>nc2', node.comment)
# EOL comment after >
fss.comment = self.comment(node.comment[1][0]) # type: ignore
if fold_positions:
fss.fold_pos = fold_positions # type: ignore
ret_val = fss
elif bool(self._preserve_quotes) and isinstance(node.value, str):
if node.style == "'":
ret_val = MySingleQuotedScalarString(node.value, anchor=node.anchor)
if node.style == '"':
ret_val = MyDoubleQuotedScalarString(node.value, anchor=node.anchor)
if not ret_val:
if node.anchor:
ret_val = MyPlainScalarString(node.value, anchor=node.anchor)
else:
ret_val = MyPlainScalarString(node.value)
ret_val.lc = ruamel.yaml.comments.LineCol()
ret_val.lc.line = node.start_mark.line
ret_val.lc.col = node.start_mark.column
return ret_val
def construct_yaml_int(self, node):
# type: (Any) -> Any
width = None # type: Any
value_su = self.construct_scalar(node)
try:
sx = value_su.rstrip('_')
underscore = [len(sx) - sx.rindex('_') - 1, False, False] # type: Any
except ValueError:
underscore = None
except IndexError:
underscore = None
value_s = value_su.replace('_', "")
sign = +1
# Assuming that I have only "normal" positive int in my case
"""
if value_s[0] == '-':
sign = -1
if value_s[0] in '+-':
value_s = value_s[1:]
if value_s == '0':
ret_val = 0
elif value_s.startswith('0b'):
if self.resolver.processing_version > (1, 1) and value_s[2] == '0':
width = len(value_s[2:])
if underscore is not None:
underscore[1] = value_su[2] == '_'
underscore[2] = len(value_su[2:]) > 1 and value_su[-1] == '_'
ret_val = BinaryInt(sign * int(value_s[2:], 2), width=width, underscore=underscore, anchor=node.anchor)
elif value_s.startswith('0x'):
# default to lower-case if no a-fA-F in string
if self.resolver.processing_version > (1, 1) and value_s[2] == '0':
width = len(value_s[2:])
hex_fun = HexInt # type: Any
for ch in value_s[2:]:
if ch in 'ABCDEF': # first non-digit is capital
hex_fun = HexCapsInt
break
if ch in 'abcdef':
break
if underscore is not None:
underscore[1] = value_su[2] == '_'
underscore[2] = len(value_su[2:]) > 1 and value_su[-1] == '_'
return hex_fun(
sign * int(value_s[2:], 16),
width=width,
underscore=underscore,
anchor=node.anchor,
)
elif value_s.startswith('0o'):
if self.resolver.processing_version > (1, 1) and value_s[2] == '0':
width = len(value_s[2:])
if underscore is not None:
underscore[1] = value_su[2] == '_'
underscore[2] = len(value_su[2:]) > 1 and value_su[-1] == '_'
return OctalInt(
sign * int(value_s[2:], 8),
width=width,
underscore=underscore,
anchor=node.anchor,
)
elif self.resolver.processing_version != (1, 2) and value_s[0] == '0':
return sign * int(value_s, 8)
elif self.resolver.processing_version != (1, 2) and ':' in value_s:
digits = [int(part) for part in value_s.split(':')]
digits.reverse()
base = 1
value = 0
for digit in digits:
value += digit * base
base *= 60
return sign * value
elif self.resolver.processing_version > (1, 1) and value_s[0] == '0':
# not an octal, an integer with leading zero(s)
if underscore is not None:
# cannot have a leading underscore
underscore[2] = len(value_su) > 1 and value_su[-1] == '_'
return ScalarInt(sign * int(value_s), width=len(value_s), underscore=underscore)
elif underscore:
# cannot have a leading underscore
underscore[2] = len(value_su) > 1 and value_su[-1] == '_'
return ScalarInt(
sign * int(value_s), width=None, underscore=underscore, anchor=node.anchor
)
elif node.anchor:
return ScalarInt(sign * int(value_s), width=None, anchor=node.anchor)
else:
"""
ret_val = MyScalarInt(sign * int(value_s))
ret_val.lc = ruamel.yaml.comments.LineCol()
ret_val.lc.line = node.start_mark.line
ret_val.lc.col = node.start_mark.column
return ret_val
def construct_yaml_bool(self, node):
# type: (Any) -> Any
b = super(MyConstructor, self).construct_yaml_bool(node)
if node.anchor:
ret_val = MyScalarBoolean(b, anchor=node.anchor)
else:
ret_val = MyScalarBoolean(b)
ret_val.lc = ruamel.yaml.comments.LineCol()
ret_val.lc.line = node.start_mark.line
ret_val.lc.col = node.start_mark.column
return ret_val
MyConstructor.add_constructor('tag:yaml.org,2002:int', MyConstructor.construct_yaml_int)
MyConstructor.add_constructor('tag:yaml.org,2002:bool', MyConstructor.construct_yaml_bool)
我需要获取 YAML 文件中某些键的行号。
请注意,this answer does not solve the issue: I do use ruamel.yaml,答案不适用于有序地图。
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from ruamel import yaml
data = yaml.round_trip_load("""
key1: !!omap
- key2: item2
- key3: item3
- key4: !!omap
- key5: item5
- key6: item6
""")
print(data)
结果我得到这个:
CommentedMap([('key1', CommentedOrderedMap([('key2', 'item2'), ('key3', 'item3'), ('key4', CommentedOrderedMap([('key5', 'item5'), ('key6', 'item6')]))]))])
什么不允许访问行号,!!omap
键除外:
print(data['key1'].lc.line) # output: 1
print(data['key1']['key4'].lc.line) # output: 4
但是:
print(data['key1']['key2'].lc.line) # output: AttributeError: 'str' object has no attribute 'lc'
的确,data['key1']['key2]
是 str
。
我找到了解决方法:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from ruamel import yaml
DATA = yaml.round_trip_load("""
key1: !!omap
- key2: item2
- key3: item3
- key4: !!omap
- key5: item5
- key6: item6
""")
def get_line_nb(data):
if isinstance(data, dict):
offset = data.lc.line
for i, key in enumerate(data):
if isinstance(data[key], dict):
get_line_nb(data[key])
else:
print('{}|{} found in line {}\n'
.format(key, data[key], offset + i + 1))
get_line_nb(DATA)
输出:
key2|item2 found in line 2
key3|item3 found in line 3
key5|item5 found in line 5
key6|item6 found in line 6
但这看起来有点"dirty"。有没有更合适的做法?
编辑:此解决方法不仅脏,而且仅适用于上述简单情况,并且一旦出现嵌套列表就会给出错误结果
这个问题不是因为您使用的是 !omap
,而是它没有像 "normal" 映射那样为您提供 line-numbers。从执行 print(data['key1']['key4'].lc.line)
得到 4(其中 key4
是外部 !omap
中的键)这一事实应该很清楚。
如 this 答案所示,
you can access the property lc on collection items
data['key1']['key4']
的值是 collection 项目(另一个 !omap
),但 data['key1']['key2']
的值不是 collection 项目而是, built-in, python 字符串, 没有存储 lc
属性的槽.
要在 non-collection 上获得一个 .lc
属性,就像字符串一样,您必须子类化 RoundTripConstructor
,才能使用 [=21= 中的 类 之类的东西](with__slots__
调整为接受lc
属性然后将节点中可用的行信息传递给该属性然后设置行,列信息:
import sys
import ruamel.yaml
yaml_str = """
key1: !!omap
- key2: item2
- key3: item3
- key4: !!omap
- key5: 'item5'
- key6: |
item6
"""
class Str(ruamel.yaml.scalarstring.ScalarString):
__slots__ = ('lc')
style = ""
def __new__(cls, value):
return ruamel.yaml.scalarstring.ScalarString.__new__(cls, value)
class MyPreservedScalarString(ruamel.yaml.scalarstring.PreservedScalarString):
__slots__ = ('lc')
class MyDoubleQuotedScalarString(ruamel.yaml.scalarstring.DoubleQuotedScalarString):
__slots__ = ('lc')
class MySingleQuotedScalarString(ruamel.yaml.scalarstring.SingleQuotedScalarString):
__slots__ = ('lc')
class MyConstructor(ruamel.yaml.constructor.RoundTripConstructor):
def construct_scalar(self, node):
# type: (Any) -> Any
if not isinstance(node, ruamel.yaml.nodes.ScalarNode):
raise ruamel.yaml.constructor.ConstructorError(
None, None,
"expected a scalar node, but found %s" % node.id,
node.start_mark)
if node.style == '|' and isinstance(node.value, ruamel.yaml.compat.text_type):
ret_val = MyPreservedScalarString(node.value)
elif bool(self._preserve_quotes) and isinstance(node.value, ruamel.yaml.compat.text_type):
if node.style == "'":
ret_val = MySingleQuotedScalarString(node.value)
elif node.style == '"':
ret_val = MyDoubleQuotedScalarString(node.value)
else:
ret_val = Str(node.value)
else:
ret_val = Str(node.value)
ret_val.lc = ruamel.yaml.comments.LineCol()
ret_val.lc.line = node.start_mark.line
ret_val.lc.col = node.start_mark.column
return ret_val
yaml = ruamel.yaml.YAML()
yaml.Constructor = MyConstructor
data = yaml.load(yaml_str)
print(data['key1']['key4'].lc.line)
print(data['key1']['key2'].lc.line)
print(data['key1']['key4']['key6'].lc.line)
请注意,最后一次调用 print
的输出是 6,因为文字标量字符串以 |
.
如果您还想转储 data
,您需要让 Representer
知道那些 My....
类型。
我修改了基于ruamel.yaml版本0.17.17的@Anthon解决方案,处理标量、int和bool位置。
class MyLiteralScalarString(ruamel.yaml.scalarstring.LiteralScalarString):
__slots__ = ('comment', 'lc')
class MyFoldedScalarString(ruamel.yaml.scalarstring.FoldedScalarString):
__slots__ = ('fold_pos', 'comment', 'lc')
class MyDoubleQuotedScalarString(ruamel.yaml.scalarstring.DoubleQuotedScalarString):
__slots__ = ('lc')
class MySingleQuotedScalarString(ruamel.yaml.scalarstring.SingleQuotedScalarString):
__slots__ = ('lc')
class MyPlainScalarString(ruamel.yaml.scalarstring.PlainScalarString):
__slots__ = ('lc')
class MyScalarInt(ruamel.yaml.scalarint.ScalarInt):
lc = None
class MyScalarBoolean(ruamel.yaml.scalarbool.ScalarBoolean):
lc = None
class MyConstructor(ruamel.yaml.constructor.RoundTripConstructor):
def __init__(self, preserve_quotes=None, loader=None):
super(MyConstructor, self).__init__(preserve_quotes=preserve_quotes, loader=loader)
if not hasattr(self.loader, 'comment_handling'):
self.loader.comment_handling = None
def construct_scalar(self, node):
# type: (Any) -> Any
if not isinstance(node, ScalarNode):
raise ConstructorError(None, None, _F('expected a scalar node, but found {node_id!s}', node_id=node.id),
node.start_mark,)
ret_val = None
if node.style == '|' and isinstance(node.value, str):
lss = MyLiteralScalarString(node.value, anchor=node.anchor)
if self.loader and self.loader.comment_handling is None:
if node.comment and node.comment[1]:
lss.comment = node.comment[1][0] # type: ignore
else:
# NEWCMNT
if node.comment is not None and node.comment[1]:
# nprintf('>>>>nc1', node.comment)
# EOL comment after |
lss.comment = self.comment(node.comment[1][0]) # type: ignore
ret_val = lss
elif node.style == '>' and isinstance(node.value, str):
fold_positions = [] # type: List[int]
idx = -1
while True:
idx = node.value.find('\a', idx + 1)
if idx < 0:
break
fold_positions.append(idx - len(fold_positions))
fss = MyFoldedScalarString(node.value.replace('\a', ''), anchor=node.anchor)
if self.loader and self.loader.comment_handling is None:
if node.comment and node.comment[1]:
fss.comment = node.comment[1][0] # type: ignore
else:
# NEWCMNT
if node.comment is not None and node.comment[1]:
# nprintf('>>>>nc2', node.comment)
# EOL comment after >
fss.comment = self.comment(node.comment[1][0]) # type: ignore
if fold_positions:
fss.fold_pos = fold_positions # type: ignore
ret_val = fss
elif bool(self._preserve_quotes) and isinstance(node.value, str):
if node.style == "'":
ret_val = MySingleQuotedScalarString(node.value, anchor=node.anchor)
if node.style == '"':
ret_val = MyDoubleQuotedScalarString(node.value, anchor=node.anchor)
if not ret_val:
if node.anchor:
ret_val = MyPlainScalarString(node.value, anchor=node.anchor)
else:
ret_val = MyPlainScalarString(node.value)
ret_val.lc = ruamel.yaml.comments.LineCol()
ret_val.lc.line = node.start_mark.line
ret_val.lc.col = node.start_mark.column
return ret_val
def construct_yaml_int(self, node):
# type: (Any) -> Any
width = None # type: Any
value_su = self.construct_scalar(node)
try:
sx = value_su.rstrip('_')
underscore = [len(sx) - sx.rindex('_') - 1, False, False] # type: Any
except ValueError:
underscore = None
except IndexError:
underscore = None
value_s = value_su.replace('_', "")
sign = +1
# Assuming that I have only "normal" positive int in my case
"""
if value_s[0] == '-':
sign = -1
if value_s[0] in '+-':
value_s = value_s[1:]
if value_s == '0':
ret_val = 0
elif value_s.startswith('0b'):
if self.resolver.processing_version > (1, 1) and value_s[2] == '0':
width = len(value_s[2:])
if underscore is not None:
underscore[1] = value_su[2] == '_'
underscore[2] = len(value_su[2:]) > 1 and value_su[-1] == '_'
ret_val = BinaryInt(sign * int(value_s[2:], 2), width=width, underscore=underscore, anchor=node.anchor)
elif value_s.startswith('0x'):
# default to lower-case if no a-fA-F in string
if self.resolver.processing_version > (1, 1) and value_s[2] == '0':
width = len(value_s[2:])
hex_fun = HexInt # type: Any
for ch in value_s[2:]:
if ch in 'ABCDEF': # first non-digit is capital
hex_fun = HexCapsInt
break
if ch in 'abcdef':
break
if underscore is not None:
underscore[1] = value_su[2] == '_'
underscore[2] = len(value_su[2:]) > 1 and value_su[-1] == '_'
return hex_fun(
sign * int(value_s[2:], 16),
width=width,
underscore=underscore,
anchor=node.anchor,
)
elif value_s.startswith('0o'):
if self.resolver.processing_version > (1, 1) and value_s[2] == '0':
width = len(value_s[2:])
if underscore is not None:
underscore[1] = value_su[2] == '_'
underscore[2] = len(value_su[2:]) > 1 and value_su[-1] == '_'
return OctalInt(
sign * int(value_s[2:], 8),
width=width,
underscore=underscore,
anchor=node.anchor,
)
elif self.resolver.processing_version != (1, 2) and value_s[0] == '0':
return sign * int(value_s, 8)
elif self.resolver.processing_version != (1, 2) and ':' in value_s:
digits = [int(part) for part in value_s.split(':')]
digits.reverse()
base = 1
value = 0
for digit in digits:
value += digit * base
base *= 60
return sign * value
elif self.resolver.processing_version > (1, 1) and value_s[0] == '0':
# not an octal, an integer with leading zero(s)
if underscore is not None:
# cannot have a leading underscore
underscore[2] = len(value_su) > 1 and value_su[-1] == '_'
return ScalarInt(sign * int(value_s), width=len(value_s), underscore=underscore)
elif underscore:
# cannot have a leading underscore
underscore[2] = len(value_su) > 1 and value_su[-1] == '_'
return ScalarInt(
sign * int(value_s), width=None, underscore=underscore, anchor=node.anchor
)
elif node.anchor:
return ScalarInt(sign * int(value_s), width=None, anchor=node.anchor)
else:
"""
ret_val = MyScalarInt(sign * int(value_s))
ret_val.lc = ruamel.yaml.comments.LineCol()
ret_val.lc.line = node.start_mark.line
ret_val.lc.col = node.start_mark.column
return ret_val
def construct_yaml_bool(self, node):
# type: (Any) -> Any
b = super(MyConstructor, self).construct_yaml_bool(node)
if node.anchor:
ret_val = MyScalarBoolean(b, anchor=node.anchor)
else:
ret_val = MyScalarBoolean(b)
ret_val.lc = ruamel.yaml.comments.LineCol()
ret_val.lc.line = node.start_mark.line
ret_val.lc.col = node.start_mark.column
return ret_val
MyConstructor.add_constructor('tag:yaml.org,2002:int', MyConstructor.construct_yaml_int)
MyConstructor.add_constructor('tag:yaml.org,2002:bool', MyConstructor.construct_yaml_bool)