Python 不使用正则表达式替换整个单词
Python replace whole words without using regex
我想在不使用正则表达式的情况下用另一个字符串替换整个单词。 replace_all_whole_words1
可以满足我的要求,但我不想使用正则表达式,它仅用于测试我的 replace_all_whole_words2
函数是否正常工作(不是)。
import re
def replace_all_whole_words1(needle, replacement, haystack):
needle = re.escape(needle)
return re.sub(r"\b%s\b" % needle, replacement, haystack)
def replace_all_whole_words2(needle, replacement, haystack):
i=0
while len(haystack) > i:
found_word = False
found_word_start = i
found_word_end = 0
found_type = 0 # will repersent if we found, word + " " or " " + word or " " + word + " " ()
if i == 0:
found_word = haystack[i:i+len(needle + " " )] == needle + " "
found_word_end = i + len(needle)
found_type = 1
elif i == len(haystack) - len(" " + needle):
found_word = haystack[i:i+len(" " + needle)] == " " + needle
found_word_end = i + len(" " + needle)
found_type = 2
else:
found_word = haystack[i:i+len(" " + needle + " " )] == " " + needle + " "
found_word_end = i + len(" " + needle + " ")
found_type = 3
if found_word:
print(haystack, found_word_start, found_word_end, i, found_type)
haystack = haystack[:found_word_start] + replacement + haystack[found_word_end:]
i += 1
return haystack
needle = "test"
replacement = "replaced"
haystack = "test test test testa atest"
print(
replace_all_whole_words1(needle, replacement, haystack) == replace_all_whole_words2(needle, replacement, haystack)
)
print(
replace_all_whole_words2(needle, replacement, haystack)
)
为什么不转换为列表?因为你只是删除单词。
def replace_all_whole_words2(needle, replacement, haystack):
l = haystack.split()
for idx, word in enumerate(l):
if word == needle:
l[idx] = replacement
return ' '.join(l)
正如评论中所解释的那样,用拆分分离,使用列表理解然后重新加入:
def replace_all_whole_words2(needle, replacement, haystack):
word_list = haystack.split(' ')
new_list = [replacement if word == needle else word for
word in word_list]
return " ".join(new_list)
这里已经有几个很棒且更简单的答案,可以将 haystack 转换为列表,执行替换,然后将其转换回字符串。如果您仍想始终将 haystack 用作字符串,请查看我制作的这个解决方案。
def replace_all_whole_words2(needle, replacement, haystack):
# Counter to go through characters in haystack
i = 0
# This will collect all chars separated by space as word
word = ''
# len_needle will control replacement insertion and diff will adjust i after insertion
len_needle = len(needle)
diff = len(replacement) - len_needle
# Go through characters in haystack and replace needle with replacement
while i < len(haystack):
char = haystack[i]
if char == ' ':
if word == needle:
haystack = haystack[:(i-len_needle)] + replacement + haystack[i:]
# Adjust i so that you can continue from the next char after the space
i += diff
# Reset word to collect new chars
word = ''
else:
word += char
i += 1
return haystack
测试:
needle = "test"
replacement = "replacement"
haystack = "test test test testa atest"
replace_all_whole_words2(needle, replacement, haystack)
输出:
'replacement replacement replacement testa atest'
我想在不使用正则表达式的情况下用另一个字符串替换整个单词。 replace_all_whole_words1
可以满足我的要求,但我不想使用正则表达式,它仅用于测试我的 replace_all_whole_words2
函数是否正常工作(不是)。
import re
def replace_all_whole_words1(needle, replacement, haystack):
needle = re.escape(needle)
return re.sub(r"\b%s\b" % needle, replacement, haystack)
def replace_all_whole_words2(needle, replacement, haystack):
i=0
while len(haystack) > i:
found_word = False
found_word_start = i
found_word_end = 0
found_type = 0 # will repersent if we found, word + " " or " " + word or " " + word + " " ()
if i == 0:
found_word = haystack[i:i+len(needle + " " )] == needle + " "
found_word_end = i + len(needle)
found_type = 1
elif i == len(haystack) - len(" " + needle):
found_word = haystack[i:i+len(" " + needle)] == " " + needle
found_word_end = i + len(" " + needle)
found_type = 2
else:
found_word = haystack[i:i+len(" " + needle + " " )] == " " + needle + " "
found_word_end = i + len(" " + needle + " ")
found_type = 3
if found_word:
print(haystack, found_word_start, found_word_end, i, found_type)
haystack = haystack[:found_word_start] + replacement + haystack[found_word_end:]
i += 1
return haystack
needle = "test"
replacement = "replaced"
haystack = "test test test testa atest"
print(
replace_all_whole_words1(needle, replacement, haystack) == replace_all_whole_words2(needle, replacement, haystack)
)
print(
replace_all_whole_words2(needle, replacement, haystack)
)
为什么不转换为列表?因为你只是删除单词。
def replace_all_whole_words2(needle, replacement, haystack):
l = haystack.split()
for idx, word in enumerate(l):
if word == needle:
l[idx] = replacement
return ' '.join(l)
正如评论中所解释的那样,用拆分分离,使用列表理解然后重新加入:
def replace_all_whole_words2(needle, replacement, haystack):
word_list = haystack.split(' ')
new_list = [replacement if word == needle else word for
word in word_list]
return " ".join(new_list)
这里已经有几个很棒且更简单的答案,可以将 haystack 转换为列表,执行替换,然后将其转换回字符串。如果您仍想始终将 haystack 用作字符串,请查看我制作的这个解决方案。
def replace_all_whole_words2(needle, replacement, haystack):
# Counter to go through characters in haystack
i = 0
# This will collect all chars separated by space as word
word = ''
# len_needle will control replacement insertion and diff will adjust i after insertion
len_needle = len(needle)
diff = len(replacement) - len_needle
# Go through characters in haystack and replace needle with replacement
while i < len(haystack):
char = haystack[i]
if char == ' ':
if word == needle:
haystack = haystack[:(i-len_needle)] + replacement + haystack[i:]
# Adjust i so that you can continue from the next char after the space
i += diff
# Reset word to collect new chars
word = ''
else:
word += char
i += 1
return haystack
测试:
needle = "test"
replacement = "replacement"
haystack = "test test test testa atest"
replace_all_whole_words2(needle, replacement, haystack)
输出:
'replacement replacement replacement testa atest'