文本操纵器:字符串位置移动
Text Manipulator: String position movement
任务是构建一个文本操纵器:模拟一组文本操纵的程序commands.Given输入一段文本和一串命令,输出变异的输入文本和光标位置。
从简单开始:
命令
h: move cursor one character to the left
l: move cursor one character to the right
r<c>: replace character under cursor with <c>
重复命令
# All commands can be repeated N times by prefixing them with a number.
#
# [N]h: move cursor N characters to the left
# [N]l: move cursor N characters to the right
# [N]r<c>: replace N characters, starting from the cursor, with <c> and move the cursor
例子
# We'll use Hello World as our input text for all cases:
#
# Input: hhlhllhlhhll
# Output: Hello World
# _
# 2
#
# Input: rhllllllrw
# Output: hello world
# _
# 6
#
# Input: rh6l9l4hrw
# Output: hello world
# _
# 6
#
# Input: 9lrL7h2rL
# Output: HeLLo WorLd
# _
# 3
#
# Input: 999999999999999999999999999lr0
# Output: Hello Worl0
# _
# 10
#
# Input: 999rsom
# Output: sssssssssss
# _
# 10
我写了下面一段代码,但是报错:
class Editor():
def __init__(self, text):
self.text = text
self.pos = 0
def f(self, step):
self.pos += int(step)
def b(self, step):
self.pos -= int(step)
def r(self, char):
s = list(self.text)
s[self.pos] = char
self.text = ''.join(s)
def run(self, command):
command = list(command)
# if command not in ('F','B', 'R'):
#
while command:
operation = command.pop(0).lower()
if operation not in ('f','b','r'):
raise ValueError('command not recognized.')
method = getattr(self, operation)
arg = command.pop(0)
method(arg)
def __str__(self):
return self.text
# Normal run
text = 'abcdefghijklmn'
command = 'F2B1F5Rw'
ed = Editor(text)
ed.run(command)
print(ed)
我在我的代码中使用了 'F' 和 'B' 而不是 'h' 和 'l',但问题是我缺少一个允许我定义的部分可选的 'N'。我的代码只有在操作后定义了一个数字时才有效。
我怎样才能修复上面的代码以满足所有要求?
@paddy 给了你一个很好的建议,但是看看你需要解析的字符串,在我看来,正则表达式可以很容易地完成这项工作。对于解析后的部分,Command pattern 非常合适。毕竟你有一个必须在初始字符串上执行的操作列表(命令)。
在你的例子中,我认为使用这种模式主要有 3 个优点:
每个Command
表示应用于初始字符串的操作。这也意味着,例如,如果您想为一系列操作添加快捷方式,则最终 Command
的数量保持不变,您只需调整解析步骤。另一个好处是您可以拥有命令的历史记录,并且通常设计更加灵活。
所有 Command
共享一个公共接口:一个方法 execute()
,如果需要,还有一个方法 unexecute()
来撤消 [=] 应用的更改16=]方法。
Command
s 将操作执行与解析问题分离。
至于实现,首先定义 Command
s,除了调用接收方方法外不包含任何业务逻辑。
from __future__ import annotations
import functools
import re
import abc
from typing import Iterable
class ICommand(abc.ABC):
@abc.abstractmethod
def __init__(self, target: TextManipulator):
self._target = target
@abc.abstractmethod
def execute(self):
pass
class MoveCursorLeftCommand(ICommand):
def __init__(self, target: TextManipulator, counter):
super().__init__(target)
self._counter = counter
def execute(self):
self._target.move_cursor_left(self._counter)
class MoveCursorRightCommand(ICommand):
def __init__(self, target: TextManipulator, counter):
super().__init__(target)
self._counter = counter
def execute(self):
self._target.move_cursor_right(self._counter)
class ReplaceCommand(ICommand):
def __init__(self, target: TextManipulator, counter, replacement):
super().__init__(target)
self._replacement = replacement
self._counter = counter
def execute(self):
self._target.replace_char(self._counter, self._replacement)
然后你有命令的接收者,它是 TextManipulator
并且包含改变文本和光标位置的方法。
class TextManipulator:
"""
>>> def apply_commands(s, commands_str):
... return TextManipulator(s).run_commands(CommandParser.parse(commands_str))
>>> apply_commands('Hello World', 'hhlhllhlhhll')
('Hello World', 2)
>>> apply_commands('Hello World', 'rhllllllrw')
('hello world', 6)
>>> apply_commands('Hello World', 'rh6l9l4hrw')
('hello world', 6)
>>> apply_commands('Hello World', '9lrL7h2rL')
('HeLLo WorLd', 3)
>>> apply_commands('Hello World', '999999999999999999999999999lr0')
('Hello Worl0', 10)
>>> apply_commands('Hello World', '999rsom')
Traceback (most recent call last):
ValueError: command 'o' not recognized.
>>> apply_commands('Hello World', '7l5r1')
('Hello W1111', 10)
>>> apply_commands('Hello World', '7l4r1')
('Hello W1111', 10)
>>> apply_commands('Hello World', '7l3r1')
('Hello W111d', 9)
"""
def __init__(self, text):
self._text = text
self._cursor_pos = 0
def replace_char(self, counter, replacement):
assert len(replacement) == 1
assert counter >= 0
self._text = self._text[0:self._cursor_pos] + \
replacement * min(counter, len(self._text) - self._cursor_pos) + \
self._text[self._cursor_pos + counter:]
self.move_cursor_right(counter - 1)
def move_cursor_left(self, counter):
assert counter >= 0
self._cursor_pos = max(0, self._cursor_pos - counter)
def move_cursor_right(self, counter):
assert counter >= 0
self._cursor_pos = min(len(self._text) - 1, self._cursor_pos + counter)
def run_commands(self, commands: Iterable[ICommand]):
for cmd in map(lambda partial_cmd: partial_cmd(target=self), commands):
cmd.execute()
return (self._text, self._cursor_pos)
除了接受可迭代的部分命令的 run_commands
方法外,没有什么很难解释这段代码的。这些部分命令是在没有接收者对象的情况下发起的命令,类型应该是TextManipulator
。你为什么要那样做?这是一种将解析与命令执行分离的可能方法。我决定用 functools.partial
来做,但你还有其他有效的选择。
最终,解析部分:
class CommandParser:
@staticmethod
def parse(commands_str: str):
def invalid_command(match: re.Match):
raise ValueError(f"command '{match.group(2)}' not recognized.")
get_counter_from_match = lambda m: int(m.group(1) or 1)
commands_map = {
'h': lambda match: functools.partial(MoveCursorLeftCommand, \
counter=get_counter_from_match(match)),
'l': lambda match: functools.partial(MoveCursorRightCommand, \
counter=get_counter_from_match(match)),
'r': lambda match: functools.partial(ReplaceCommand, \
counter=get_counter_from_match(match), replacement=match.group(3))
}
parsed_commands_iter = re.finditer(r'(\d*)(h|l|r(\w)|.)', commands_str)
commands = map(lambda match: \
commands_map.get(match.group(2)[0], invalid_command)(match), parsed_commands_iter)
return commands
if __name__ == '__main__':
import doctest
doctest.testmod()
正如我在开头所说,在您的情况下可以使用正则表达式进行解析,并且命令创建基于每个匹配项的第二个捕获组的第一个字母。原因是对于 char 替换,第二个捕获组也包含要替换的 char。 commands_map
是通过 match.group(2)[0]
作为键和 return 部分 Command
访问的。如果在映射中未找到该操作,则会抛出 ValueError
异常。每个 Command
的参数都是从 re.Match
对象中推断出来的。
只需将所有这些代码片段放在一起,您就有了一个可行的解决方案(以及 doctest
执行的文档字符串提供的一些测试)。
在某些情况下这可能是一个过于复杂的设计,所以我并不是说这是正确的方法(例如,如果您正在编写一个简单的工具,则可能不是)。您可以避开 Command
s 部分,只采用解析解决方案,但我发现这是该模式的一个有趣(替代)应用。
这个问题的关键是弄清楚如何解析命令字符串。根据您的描述,命令字符串包含一个可选数字,后跟以下三种可能性之一:
h
l
r
,后跟一个字符
解析这个的正则表达式是 (try online):
(\d*)(h|l|r.)
Explanation:
(\d*) Capture zero or more digits,
(h|l|r.) Capture either an h, or an l, or an r followed by any character
将 re.findall()
与此正则表达式结合使用,您可以获得匹配项列表,其中每个匹配项都是包含捕获组的 tuple
。例如,"rh6l9l4hrw"
给出结果
[('', 'rh'), ('6', 'l'), ('9', 'l'), ('4', 'h'), ('', 'rw')]
所以元组的第一个元素是表示N
的字符串(如果none存在则为空字符串),元组的第二个元素是命令。如果命令是 r
,它后面将包含替换字符。现在我们需要做的就是迭代这个列表,并应用正确的命令。
我做了一些修改:
- 通过 属性 和处理正确边界检查的 setter 访问
self.pos
- 在创建对象时将输入文本分解为列表,因为无法像使用列表那样修改字符串 in-place。
__str__()
将列表连接回一个字符串。
- 通过只读 属性 访问
self.text
,它将 self.__text
列表连接成一个字符串。
class Editor():
def __init__(self, text):
self.__text = [char for char in text]
self.__pos = 0
@property
def text(self):
return "".join(self.__text)
@property
def pos(self):
return self.__pos
@pos.setter
def pos(self, value):
self.__pos = max(0, min(len(self.text)-1, value))
def l(self, step):
self.pos = self.pos + step
def h(self, step):
self.pos = self.pos - step
def r(self, char, count=1):
# If count causes the cursor to overshoot the text,
# modify count
count = min(count, len(self.__text) - self.pos)
self.__text[self.pos:self.pos+count] = char * count
self.pos = self.pos + count - 1 # Set position to last replaced character
def run(self, command):
commands = re.findall(r"(\d*)(h|l|r.)", command)
for cmd in commands:
self.validate(cmd)
count = int(cmd[0] or "1") # If cmd[0] is blank, use count = 1
if cmd[1] == "h":
self.h(count)
elif cmd[1] == "l":
self.l(count)
elif cmd[1][0] == "r":
self.r(cmd[1][1], count)
def validate(self, cmd):
cmd_s = ''.join(cmd)
if cmd[0] and not cmd[0].isnumeric():
raise ValueError(f"Invalid numeric input {cmd[0]} for command {cmd_s}")
elif cmd[1][0] not in "hlr":
raise ValueError(f"Invalid command {cmd_s}: Must be either h or l or r")
elif cmd[1] == 'r' and len(cmd) == 1:
raise ValueError(f"Invalid command {cmd_s}: r command needs an argument")
def __str__(self):
return self.text
运行 这与您给定的输入:
commands = ["hhlhllhlhhll", "rhllllllrw", "rh6l9l4hrw", "9lrL7h2rL", "999999999999999999999999999lr0", "999rsom"]
for cmd in commands:
e = Editor("Hello World")
e.run(cmd)
uline = " " + " " * e.pos + "^"
cline = "Cursor: " + " " * e.pos + str(e.pos)
print(f"Input: {cmd}\nOutput: {str(e)}\n{uline}\n{cline}\n")
Input: hhlhllhlhhll
Output: Hello World
^
Cursor: 2
Input: rhllllllrw
Output: hello world
^
Cursor: 6
Input: rh6l9l4hrw
Output: hello world
^
Cursor: 6
Input: 9lrL7h2rL
Output: HeLLo WorLd
^
Cursor: 3
Input: 999999999999999999999999999lr0
Output: Hello Worl0
^
Cursor: 10
Input: 999rsom
Output: sssssssssss
^
Cursor: 10
现在,如果你想在没有正则表达式的情况下做同样的事情,你只需要想出一种方法将输入命令字符串解析成那种元组列表,你可以使用与以前相同的逻辑来做实际更换。
在这里,我将通过编写一个函数来完成此操作,该函数接受一个字符串,returns 一个遍历其中所有命令的迭代器。产生的每个元素都是一个元组,看起来像 re.findall()
返回的列表中的一个元素。这将允许我们用我们的自定义解析器简单地替换对 re.findall()
的调用:
def iter_command(self, command: str):
cmd = [[], []]
# The command is made of two segments:
# 1. The number part
# 2. The letters "h|l|r." part of the regex
seg = 0 # Start with the first segment
for cpos, char in enumerate(command):
if seg == 0:
if "0" <= char <= "9":
# If the character is a number, append it to the first segment
cmd[seg].append(char)
elif char in "hlr":
# Else, if the character is h or l or r, move on to the next segment
seg = 1
if seg == 1:
if not cmd[seg] and char in "hlr":
# If this segment is empty and the character is h|l|r
cmd[seg] = [char]
if char != "r":
# Convert our list of lists of characters to a tuple of strings and yield it
yield tuple(''.join(l) for l in cmd)
# Then reset cmd and seg to process the next command
cmd = [[], []]
seg = 0
else: # char == r
pass # So do one more iteration
elif cmd[seg] and cmd[seg][-1] == "r": # Command is r, so listening for any character
cmd[seg].append(char)
# Same yield tasks as before
yield tuple(''.join(l) for l in cmd)
cmd = [[], []]
seg = 0
else: # This is a character we don't care about
# So do nothing with it
if any(cmd):
yield tuple(''.join(l) for l in cmd)
cmd = [[], []]
seg = 0
现在,让我们针对之前的正则表达式进行测试:
commands = ["hhlhllhlhhll", "rhllllllrw", "rh6l9l4hrw", "9lrL7h2rL", "999999999999999999999999999lr0", "999rsom"]
for cmd in commands:
e = Editor("Hello World")
commands_custom = list(e.iter_command(cmd))
commands_regex = re.findall(r"(\d*)(h|l|r.)", cmd)
print(commands_custom)
print(commands_regex)
print(cmd)
print(all(a == b for a, b in zip(commands_custom, commands_regex)))
print("")
[('', 'h'), ('', 'h'), ('', 'l'), ('', 'h'), ('', 'l'), ('', 'l'), ('', 'h'), ('', 'l'), ('', 'h'), ('', 'h'), ('', 'l'), ('', 'l')]
[('', 'h'), ('', 'h'), ('', 'l'), ('', 'h'), ('', 'l'), ('', 'l'), ('', 'h'), ('', 'l'), ('', 'h'), ('', 'h'), ('', 'l'), ('', 'l')]
hhlhllhlhhll
True
[('', 'rh'), ('', 'l'), ('', 'l'), ('', 'l'), ('', 'l'), ('', 'l'), ('', 'l'), ('', 'rw')]
[('', 'rh'), ('', 'l'), ('', 'l'), ('', 'l'), ('', 'l'), ('', 'l'), ('', 'l'), ('', 'rw')]
rhllllllrw
True
[('', 'rh'), ('6', 'l'), ('9', 'l'), ('4', 'h'), ('', 'rw')]
[('', 'rh'), ('6', 'l'), ('9', 'l'), ('4', 'h'), ('', 'rw')]
rh6l9l4hrw
True
[('9', 'l'), ('', 'rL'), ('7', 'h'), ('2', 'rL')]
[('9', 'l'), ('', 'rL'), ('7', 'h'), ('2', 'rL')]
9lrL7h2rL
True
[('999999999999999999999999999', 'l'), ('', 'r0')]
[('999999999999999999999999999', 'l'), ('', 'r0')]
999999999999999999999999999lr0
True
[('999', 'rs')]
[('999', 'rs')]
999rsom
True
而且,由于这些给出了相同的结果,我们只需要替换对 re.findall()
:
的调用
def run(self, command):
- commands = re.findall(r"(\d*)(h|l|r.)", command)
+ commands = self.iter_command(command)
for cmd in commands:
任务是构建一个文本操纵器:模拟一组文本操纵的程序commands.Given输入一段文本和一串命令,输出变异的输入文本和光标位置。
从简单开始:
命令
h: move cursor one character to the left
l: move cursor one character to the right
r<c>: replace character under cursor with <c>
重复命令
# All commands can be repeated N times by prefixing them with a number.
#
# [N]h: move cursor N characters to the left
# [N]l: move cursor N characters to the right
# [N]r<c>: replace N characters, starting from the cursor, with <c> and move the cursor
例子
# We'll use Hello World as our input text for all cases:
#
# Input: hhlhllhlhhll
# Output: Hello World
# _
# 2
#
# Input: rhllllllrw
# Output: hello world
# _
# 6
#
# Input: rh6l9l4hrw
# Output: hello world
# _
# 6
#
# Input: 9lrL7h2rL
# Output: HeLLo WorLd
# _
# 3
#
# Input: 999999999999999999999999999lr0
# Output: Hello Worl0
# _
# 10
#
# Input: 999rsom
# Output: sssssssssss
# _
# 10
我写了下面一段代码,但是报错:
class Editor():
def __init__(self, text):
self.text = text
self.pos = 0
def f(self, step):
self.pos += int(step)
def b(self, step):
self.pos -= int(step)
def r(self, char):
s = list(self.text)
s[self.pos] = char
self.text = ''.join(s)
def run(self, command):
command = list(command)
# if command not in ('F','B', 'R'):
#
while command:
operation = command.pop(0).lower()
if operation not in ('f','b','r'):
raise ValueError('command not recognized.')
method = getattr(self, operation)
arg = command.pop(0)
method(arg)
def __str__(self):
return self.text
# Normal run
text = 'abcdefghijklmn'
command = 'F2B1F5Rw'
ed = Editor(text)
ed.run(command)
print(ed)
我在我的代码中使用了 'F' 和 'B' 而不是 'h' 和 'l',但问题是我缺少一个允许我定义的部分可选的 'N'。我的代码只有在操作后定义了一个数字时才有效。 我怎样才能修复上面的代码以满足所有要求?
@paddy 给了你一个很好的建议,但是看看你需要解析的字符串,在我看来,正则表达式可以很容易地完成这项工作。对于解析后的部分,Command pattern 非常合适。毕竟你有一个必须在初始字符串上执行的操作列表(命令)。
在你的例子中,我认为使用这种模式主要有 3 个优点:
每个
Command
表示应用于初始字符串的操作。这也意味着,例如,如果您想为一系列操作添加快捷方式,则最终Command
的数量保持不变,您只需调整解析步骤。另一个好处是您可以拥有命令的历史记录,并且通常设计更加灵活。所有
Command
共享一个公共接口:一个方法execute()
,如果需要,还有一个方法unexecute()
来撤消 [=] 应用的更改16=]方法。Command
s 将操作执行与解析问题分离。
至于实现,首先定义 Command
s,除了调用接收方方法外不包含任何业务逻辑。
from __future__ import annotations
import functools
import re
import abc
from typing import Iterable
class ICommand(abc.ABC):
@abc.abstractmethod
def __init__(self, target: TextManipulator):
self._target = target
@abc.abstractmethod
def execute(self):
pass
class MoveCursorLeftCommand(ICommand):
def __init__(self, target: TextManipulator, counter):
super().__init__(target)
self._counter = counter
def execute(self):
self._target.move_cursor_left(self._counter)
class MoveCursorRightCommand(ICommand):
def __init__(self, target: TextManipulator, counter):
super().__init__(target)
self._counter = counter
def execute(self):
self._target.move_cursor_right(self._counter)
class ReplaceCommand(ICommand):
def __init__(self, target: TextManipulator, counter, replacement):
super().__init__(target)
self._replacement = replacement
self._counter = counter
def execute(self):
self._target.replace_char(self._counter, self._replacement)
然后你有命令的接收者,它是 TextManipulator
并且包含改变文本和光标位置的方法。
class TextManipulator:
"""
>>> def apply_commands(s, commands_str):
... return TextManipulator(s).run_commands(CommandParser.parse(commands_str))
>>> apply_commands('Hello World', 'hhlhllhlhhll')
('Hello World', 2)
>>> apply_commands('Hello World', 'rhllllllrw')
('hello world', 6)
>>> apply_commands('Hello World', 'rh6l9l4hrw')
('hello world', 6)
>>> apply_commands('Hello World', '9lrL7h2rL')
('HeLLo WorLd', 3)
>>> apply_commands('Hello World', '999999999999999999999999999lr0')
('Hello Worl0', 10)
>>> apply_commands('Hello World', '999rsom')
Traceback (most recent call last):
ValueError: command 'o' not recognized.
>>> apply_commands('Hello World', '7l5r1')
('Hello W1111', 10)
>>> apply_commands('Hello World', '7l4r1')
('Hello W1111', 10)
>>> apply_commands('Hello World', '7l3r1')
('Hello W111d', 9)
"""
def __init__(self, text):
self._text = text
self._cursor_pos = 0
def replace_char(self, counter, replacement):
assert len(replacement) == 1
assert counter >= 0
self._text = self._text[0:self._cursor_pos] + \
replacement * min(counter, len(self._text) - self._cursor_pos) + \
self._text[self._cursor_pos + counter:]
self.move_cursor_right(counter - 1)
def move_cursor_left(self, counter):
assert counter >= 0
self._cursor_pos = max(0, self._cursor_pos - counter)
def move_cursor_right(self, counter):
assert counter >= 0
self._cursor_pos = min(len(self._text) - 1, self._cursor_pos + counter)
def run_commands(self, commands: Iterable[ICommand]):
for cmd in map(lambda partial_cmd: partial_cmd(target=self), commands):
cmd.execute()
return (self._text, self._cursor_pos)
除了接受可迭代的部分命令的 run_commands
方法外,没有什么很难解释这段代码的。这些部分命令是在没有接收者对象的情况下发起的命令,类型应该是TextManipulator
。你为什么要那样做?这是一种将解析与命令执行分离的可能方法。我决定用 functools.partial
来做,但你还有其他有效的选择。
最终,解析部分:
class CommandParser:
@staticmethod
def parse(commands_str: str):
def invalid_command(match: re.Match):
raise ValueError(f"command '{match.group(2)}' not recognized.")
get_counter_from_match = lambda m: int(m.group(1) or 1)
commands_map = {
'h': lambda match: functools.partial(MoveCursorLeftCommand, \
counter=get_counter_from_match(match)),
'l': lambda match: functools.partial(MoveCursorRightCommand, \
counter=get_counter_from_match(match)),
'r': lambda match: functools.partial(ReplaceCommand, \
counter=get_counter_from_match(match), replacement=match.group(3))
}
parsed_commands_iter = re.finditer(r'(\d*)(h|l|r(\w)|.)', commands_str)
commands = map(lambda match: \
commands_map.get(match.group(2)[0], invalid_command)(match), parsed_commands_iter)
return commands
if __name__ == '__main__':
import doctest
doctest.testmod()
正如我在开头所说,在您的情况下可以使用正则表达式进行解析,并且命令创建基于每个匹配项的第二个捕获组的第一个字母。原因是对于 char 替换,第二个捕获组也包含要替换的 char。 commands_map
是通过 match.group(2)[0]
作为键和 return 部分 Command
访问的。如果在映射中未找到该操作,则会抛出 ValueError
异常。每个 Command
的参数都是从 re.Match
对象中推断出来的。
只需将所有这些代码片段放在一起,您就有了一个可行的解决方案(以及 doctest
执行的文档字符串提供的一些测试)。
在某些情况下这可能是一个过于复杂的设计,所以我并不是说这是正确的方法(例如,如果您正在编写一个简单的工具,则可能不是)。您可以避开 Command
s 部分,只采用解析解决方案,但我发现这是该模式的一个有趣(替代)应用。
这个问题的关键是弄清楚如何解析命令字符串。根据您的描述,命令字符串包含一个可选数字,后跟以下三种可能性之一:
h
l
r
,后跟一个字符
解析这个的正则表达式是 (try online):
(\d*)(h|l|r.)
Explanation:
(\d*) Capture zero or more digits,
(h|l|r.) Capture either an h, or an l, or an r followed by any character
将 re.findall()
与此正则表达式结合使用,您可以获得匹配项列表,其中每个匹配项都是包含捕获组的 tuple
。例如,"rh6l9l4hrw"
给出结果
[('', 'rh'), ('6', 'l'), ('9', 'l'), ('4', 'h'), ('', 'rw')]
所以元组的第一个元素是表示N
的字符串(如果none存在则为空字符串),元组的第二个元素是命令。如果命令是 r
,它后面将包含替换字符。现在我们需要做的就是迭代这个列表,并应用正确的命令。
我做了一些修改:
- 通过 属性 和处理正确边界检查的 setter 访问
self.pos
- 在创建对象时将输入文本分解为列表,因为无法像使用列表那样修改字符串 in-place。
__str__()
将列表连接回一个字符串。 - 通过只读 属性 访问
self.text
,它将self.__text
列表连接成一个字符串。
class Editor():
def __init__(self, text):
self.__text = [char for char in text]
self.__pos = 0
@property
def text(self):
return "".join(self.__text)
@property
def pos(self):
return self.__pos
@pos.setter
def pos(self, value):
self.__pos = max(0, min(len(self.text)-1, value))
def l(self, step):
self.pos = self.pos + step
def h(self, step):
self.pos = self.pos - step
def r(self, char, count=1):
# If count causes the cursor to overshoot the text,
# modify count
count = min(count, len(self.__text) - self.pos)
self.__text[self.pos:self.pos+count] = char * count
self.pos = self.pos + count - 1 # Set position to last replaced character
def run(self, command):
commands = re.findall(r"(\d*)(h|l|r.)", command)
for cmd in commands:
self.validate(cmd)
count = int(cmd[0] or "1") # If cmd[0] is blank, use count = 1
if cmd[1] == "h":
self.h(count)
elif cmd[1] == "l":
self.l(count)
elif cmd[1][0] == "r":
self.r(cmd[1][1], count)
def validate(self, cmd):
cmd_s = ''.join(cmd)
if cmd[0] and not cmd[0].isnumeric():
raise ValueError(f"Invalid numeric input {cmd[0]} for command {cmd_s}")
elif cmd[1][0] not in "hlr":
raise ValueError(f"Invalid command {cmd_s}: Must be either h or l or r")
elif cmd[1] == 'r' and len(cmd) == 1:
raise ValueError(f"Invalid command {cmd_s}: r command needs an argument")
def __str__(self):
return self.text
运行 这与您给定的输入:
commands = ["hhlhllhlhhll", "rhllllllrw", "rh6l9l4hrw", "9lrL7h2rL", "999999999999999999999999999lr0", "999rsom"]
for cmd in commands:
e = Editor("Hello World")
e.run(cmd)
uline = " " + " " * e.pos + "^"
cline = "Cursor: " + " " * e.pos + str(e.pos)
print(f"Input: {cmd}\nOutput: {str(e)}\n{uline}\n{cline}\n")
Input: hhlhllhlhhll
Output: Hello World
^
Cursor: 2
Input: rhllllllrw
Output: hello world
^
Cursor: 6
Input: rh6l9l4hrw
Output: hello world
^
Cursor: 6
Input: 9lrL7h2rL
Output: HeLLo WorLd
^
Cursor: 3
Input: 999999999999999999999999999lr0
Output: Hello Worl0
^
Cursor: 10
Input: 999rsom
Output: sssssssssss
^
Cursor: 10
现在,如果你想在没有正则表达式的情况下做同样的事情,你只需要想出一种方法将输入命令字符串解析成那种元组列表,你可以使用与以前相同的逻辑来做实际更换。
在这里,我将通过编写一个函数来完成此操作,该函数接受一个字符串,returns 一个遍历其中所有命令的迭代器。产生的每个元素都是一个元组,看起来像 re.findall()
返回的列表中的一个元素。这将允许我们用我们的自定义解析器简单地替换对 re.findall()
的调用:
def iter_command(self, command: str):
cmd = [[], []]
# The command is made of two segments:
# 1. The number part
# 2. The letters "h|l|r." part of the regex
seg = 0 # Start with the first segment
for cpos, char in enumerate(command):
if seg == 0:
if "0" <= char <= "9":
# If the character is a number, append it to the first segment
cmd[seg].append(char)
elif char in "hlr":
# Else, if the character is h or l or r, move on to the next segment
seg = 1
if seg == 1:
if not cmd[seg] and char in "hlr":
# If this segment is empty and the character is h|l|r
cmd[seg] = [char]
if char != "r":
# Convert our list of lists of characters to a tuple of strings and yield it
yield tuple(''.join(l) for l in cmd)
# Then reset cmd and seg to process the next command
cmd = [[], []]
seg = 0
else: # char == r
pass # So do one more iteration
elif cmd[seg] and cmd[seg][-1] == "r": # Command is r, so listening for any character
cmd[seg].append(char)
# Same yield tasks as before
yield tuple(''.join(l) for l in cmd)
cmd = [[], []]
seg = 0
else: # This is a character we don't care about
# So do nothing with it
if any(cmd):
yield tuple(''.join(l) for l in cmd)
cmd = [[], []]
seg = 0
现在,让我们针对之前的正则表达式进行测试:
commands = ["hhlhllhlhhll", "rhllllllrw", "rh6l9l4hrw", "9lrL7h2rL", "999999999999999999999999999lr0", "999rsom"]
for cmd in commands:
e = Editor("Hello World")
commands_custom = list(e.iter_command(cmd))
commands_regex = re.findall(r"(\d*)(h|l|r.)", cmd)
print(commands_custom)
print(commands_regex)
print(cmd)
print(all(a == b for a, b in zip(commands_custom, commands_regex)))
print("")
[('', 'h'), ('', 'h'), ('', 'l'), ('', 'h'), ('', 'l'), ('', 'l'), ('', 'h'), ('', 'l'), ('', 'h'), ('', 'h'), ('', 'l'), ('', 'l')]
[('', 'h'), ('', 'h'), ('', 'l'), ('', 'h'), ('', 'l'), ('', 'l'), ('', 'h'), ('', 'l'), ('', 'h'), ('', 'h'), ('', 'l'), ('', 'l')]
hhlhllhlhhll
True
[('', 'rh'), ('', 'l'), ('', 'l'), ('', 'l'), ('', 'l'), ('', 'l'), ('', 'l'), ('', 'rw')]
[('', 'rh'), ('', 'l'), ('', 'l'), ('', 'l'), ('', 'l'), ('', 'l'), ('', 'l'), ('', 'rw')]
rhllllllrw
True
[('', 'rh'), ('6', 'l'), ('9', 'l'), ('4', 'h'), ('', 'rw')]
[('', 'rh'), ('6', 'l'), ('9', 'l'), ('4', 'h'), ('', 'rw')]
rh6l9l4hrw
True
[('9', 'l'), ('', 'rL'), ('7', 'h'), ('2', 'rL')]
[('9', 'l'), ('', 'rL'), ('7', 'h'), ('2', 'rL')]
9lrL7h2rL
True
[('999999999999999999999999999', 'l'), ('', 'r0')]
[('999999999999999999999999999', 'l'), ('', 'r0')]
999999999999999999999999999lr0
True
[('999', 'rs')]
[('999', 'rs')]
999rsom
True
而且,由于这些给出了相同的结果,我们只需要替换对 re.findall()
:
def run(self, command):
- commands = re.findall(r"(\d*)(h|l|r.)", command)
+ commands = self.iter_command(command)
for cmd in commands: