如何计算字符串中的字段?
How to count fields in a string?
我想知道是否有办法知道带参数的字符串是否有效并计算其中有多少个字段。我更喜欢原生的 Python 函数,但我没有找到任何关于它的信息。
假设这个函数叫做 count_variables
我会:
count_variables("Test") # -> 0
count_variables("Test {0} {1}") # -> 2
count_variables("Test {0} {2}") # -> raise error {1} is missing
count_variables("Test {} {}") # -> 2
count_variables("Test{ {} {}") # -> raise error { is not escaped
count_variables("Test {} {0}") # -> raise error cannot switch from automatic field numbering to manual field
我正在使用 python 2.7
正如@dot.Py 所提到的,更轻量级的函数 is_valid 可能更容易。只有字符串验证没有必需的参数
is_valid("Test") # -> True
is_valid("Test {0} {2}") # -> False
...
感谢您的帮助。
我不知道是否有 built-in 方法,但我自己实现了一个解决方案。我已经在 Python 3.5 和 Python 2.7 下对其进行了测试。它是 "correct",因为它通过了您提供的测试用例:
实施
import re
import unittest
class Numbering:
NONE = 0
MANUAL = 1
AUTOMATIC = 2
def consecutive_variables(variables):
sorted_variables = sorted(variables)
return all(a == b - 1 for a, b in zip(sorted_variables[:-1], sorted_variables[1:]))
def count_variables(data):
numbering = Numbering.NONE
last_variable = 0
variables = []
for i in range(len(data)):
c = data[i]
if c == '{':
match = re.match(r'(\d|^{|^})*?(?=})', data[i + 1:])
if not match:
raise ValueError('Invalid variable formatting')
variable_body = match.group(0)
if variable_body == '':
if numbering == Numbering.MANUAL:
raise ValueError('Cannot switch from manual to automatic numbering')
numbering = Numbering.AUTOMATIC
variables.append(last_variable)
last_variable += 1
else:
if numbering == Numbering.AUTOMATIC:
raise ValueError('Cannot switch from automatic to manual numbering')
numbering = Numbering.MANUAL
variables.append(int(variable_body))
i += len(variable_body) + 1
assert data[i] == '}'
if not consecutive_variables(variables):
raise ValueError('Variables are not consecutive')
return len(variables)
测试
class TestCountVariables(unittest.TestCase):
def test_1(self):
self.assertEqual(count_variables("Test"), 0)
def test_2(self):
self.assertEqual(count_variables("Test {0} {1}"), 2)
def test_3(self):
with self.assertRaises(ValueError):
count_variables("Test {0} {2}")
def test_4(self):
self.assertEqual(count_variables("Test {} {}"), 2)
def test_5(self):
with self.assertRaises(ValueError):
count_variables("Test{ {} {}")
def test_6(self):
with self.assertRaises(ValueError):
count_variables("Test {} {0}")
if __name__ == '__main__':
unittest.main()
输出
......
----------------------------------------------------------------------
Ran 6 tests in 0.000s
OK
您可以创建一个 string.Format
对象并使用其 parse
方法将字符串分解为 (literal_text, field_name, format_spec, conversion)
元组。这将捕获一些错误,例如未转义的 {
,但不会捕获其他错误,例如编号不正确的字段。
凭直觉,我认为您可以创建 string.Format
的子项,returns 为其各种调用模拟数据,并在进行时重新编码详细信息。然后你会发现所有的错误。这应该比自己弄清楚要容易。
就获取计数和捕捉一些格式错误而言,这会做到:
import string
def count_variables(fmtstr):
parser = string.Formatter().parse(fmtstr)
items = []
while True:
try:
item = next(parser)
items.append(item)
literal_text, field_name, format_spec, conversion = item
# analyze here...
except ValueError as e:
retval = e
break
except StopIteration:
retval = len(items)
break
print fmtstr + ':', retval
return retval
我的想法是使用 string.Formatter.parse
来计算变量,然后实际尝试使用恰好那么多变量进行格式化。
它适用于问题中列出的示例,但在其他方面没有得到很好的测试。
import string
def vcount(fmt):
try:
cnt = sum(1 for text, name, spec, conv in string.Formatter().parse(fmt) if name is not None)
fmt.format(*range(cnt))
except Exception as err:
print("error: {}".format(err))
return None # or raise ValueError(err)
print(cnt)
return cnt
vcount("Test") # -> 0
vcount("Test {0} {1}") # -> 2
vcount("Test {0} {2}") # -> raise error
vcount("Test {} {}") # -> 2
vcount("Test{ {} {}") # -> raise error
vcount("Test {} {0}") # -> raise error
更新:一种不同的方法,不等同于原始答案。看评论。无效输入的错误消息可能令人困惑。
def vcount(fmt):
try:
names = [name for text, name, spec, conv in string.Formatter().parse(fmt) if name is not None]
if all(name == "" for name in names):
# unnumbered fields "{} {}"
cnt = len(names)
else:
# numbered "{0} {1} {2} {0}"
cnt = 1 + max(int(name) for name in names)
fmt.format(*range(cnt))
except Exception as err:
print("error: {}".format(err))
return None # or raise ValueError(err)
print(cnt)
return cnt
除外还要处理以下情况:
vcount("Test {0} {1} {0} ") # -> 3 (Should be 2)
我根据@VPfB 的回答建议这个解决方案
def count_and_check_fields(string_format):
try:
unnamed_fields_count = 0
named_fields = set()
for literal_text, field_name, format_spec, conversion in string.Formatter().parse(string_format):
if field_name is not None:
if field_name:
named_fields.add(field_name)
else:
unnamed_fields_count += 1
fields_count = len(named_fields) + unnamed_fields_count
string_format.format(*range(fields_count))
return fields_count, None
except Exception as err:
return None, err.message
count_and_check_fields("Test {0} {1} {0} ") # -> 2
count_and_check_fields("Test {} {} {} ") # -> 3
我想知道是否有办法知道带参数的字符串是否有效并计算其中有多少个字段。我更喜欢原生的 Python 函数,但我没有找到任何关于它的信息。 假设这个函数叫做 count_variables
我会:
count_variables("Test") # -> 0
count_variables("Test {0} {1}") # -> 2
count_variables("Test {0} {2}") # -> raise error {1} is missing
count_variables("Test {} {}") # -> 2
count_variables("Test{ {} {}") # -> raise error { is not escaped
count_variables("Test {} {0}") # -> raise error cannot switch from automatic field numbering to manual field
我正在使用 python 2.7
正如@dot.Py 所提到的,更轻量级的函数 is_valid 可能更容易。只有字符串验证没有必需的参数
is_valid("Test") # -> True
is_valid("Test {0} {2}") # -> False
...
感谢您的帮助。
我不知道是否有 built-in 方法,但我自己实现了一个解决方案。我已经在 Python 3.5 和 Python 2.7 下对其进行了测试。它是 "correct",因为它通过了您提供的测试用例:
实施
import re
import unittest
class Numbering:
NONE = 0
MANUAL = 1
AUTOMATIC = 2
def consecutive_variables(variables):
sorted_variables = sorted(variables)
return all(a == b - 1 for a, b in zip(sorted_variables[:-1], sorted_variables[1:]))
def count_variables(data):
numbering = Numbering.NONE
last_variable = 0
variables = []
for i in range(len(data)):
c = data[i]
if c == '{':
match = re.match(r'(\d|^{|^})*?(?=})', data[i + 1:])
if not match:
raise ValueError('Invalid variable formatting')
variable_body = match.group(0)
if variable_body == '':
if numbering == Numbering.MANUAL:
raise ValueError('Cannot switch from manual to automatic numbering')
numbering = Numbering.AUTOMATIC
variables.append(last_variable)
last_variable += 1
else:
if numbering == Numbering.AUTOMATIC:
raise ValueError('Cannot switch from automatic to manual numbering')
numbering = Numbering.MANUAL
variables.append(int(variable_body))
i += len(variable_body) + 1
assert data[i] == '}'
if not consecutive_variables(variables):
raise ValueError('Variables are not consecutive')
return len(variables)
测试
class TestCountVariables(unittest.TestCase):
def test_1(self):
self.assertEqual(count_variables("Test"), 0)
def test_2(self):
self.assertEqual(count_variables("Test {0} {1}"), 2)
def test_3(self):
with self.assertRaises(ValueError):
count_variables("Test {0} {2}")
def test_4(self):
self.assertEqual(count_variables("Test {} {}"), 2)
def test_5(self):
with self.assertRaises(ValueError):
count_variables("Test{ {} {}")
def test_6(self):
with self.assertRaises(ValueError):
count_variables("Test {} {0}")
if __name__ == '__main__':
unittest.main()
输出
......
----------------------------------------------------------------------
Ran 6 tests in 0.000s
OK
您可以创建一个 string.Format
对象并使用其 parse
方法将字符串分解为 (literal_text, field_name, format_spec, conversion)
元组。这将捕获一些错误,例如未转义的 {
,但不会捕获其他错误,例如编号不正确的字段。
凭直觉,我认为您可以创建 string.Format
的子项,returns 为其各种调用模拟数据,并在进行时重新编码详细信息。然后你会发现所有的错误。这应该比自己弄清楚要容易。
就获取计数和捕捉一些格式错误而言,这会做到:
import string
def count_variables(fmtstr):
parser = string.Formatter().parse(fmtstr)
items = []
while True:
try:
item = next(parser)
items.append(item)
literal_text, field_name, format_spec, conversion = item
# analyze here...
except ValueError as e:
retval = e
break
except StopIteration:
retval = len(items)
break
print fmtstr + ':', retval
return retval
我的想法是使用 string.Formatter.parse
来计算变量,然后实际尝试使用恰好那么多变量进行格式化。
它适用于问题中列出的示例,但在其他方面没有得到很好的测试。
import string
def vcount(fmt):
try:
cnt = sum(1 for text, name, spec, conv in string.Formatter().parse(fmt) if name is not None)
fmt.format(*range(cnt))
except Exception as err:
print("error: {}".format(err))
return None # or raise ValueError(err)
print(cnt)
return cnt
vcount("Test") # -> 0
vcount("Test {0} {1}") # -> 2
vcount("Test {0} {2}") # -> raise error
vcount("Test {} {}") # -> 2
vcount("Test{ {} {}") # -> raise error
vcount("Test {} {0}") # -> raise error
更新:一种不同的方法,不等同于原始答案。看评论。无效输入的错误消息可能令人困惑。
def vcount(fmt):
try:
names = [name for text, name, spec, conv in string.Formatter().parse(fmt) if name is not None]
if all(name == "" for name in names):
# unnumbered fields "{} {}"
cnt = len(names)
else:
# numbered "{0} {1} {2} {0}"
cnt = 1 + max(int(name) for name in names)
fmt.format(*range(cnt))
except Exception as err:
print("error: {}".format(err))
return None # or raise ValueError(err)
print(cnt)
return cnt
除
vcount("Test {0} {1} {0} ") # -> 3 (Should be 2)
我根据@VPfB 的回答建议这个解决方案
def count_and_check_fields(string_format):
try:
unnamed_fields_count = 0
named_fields = set()
for literal_text, field_name, format_spec, conversion in string.Formatter().parse(string_format):
if field_name is not None:
if field_name:
named_fields.add(field_name)
else:
unnamed_fields_count += 1
fields_count = len(named_fields) + unnamed_fields_count
string_format.format(*range(fields_count))
return fields_count, None
except Exception as err:
return None, err.message
count_and_check_fields("Test {0} {1} {0} ") # -> 2
count_and_check_fields("Test {} {} {} ") # -> 3