如何有效地用单个下划线替换特殊字符和空格?

How to effectively replace special chars and spaces with single underscore?

如何轻松地将特殊字符和 space 替换为下划线 _,以及将多个下划线替换为单个 _

下面是我的方法,但不是很好。

import string

s = 'Hi th@re gu^ys!@#()tes   t;"[]ing'

for chr in string.punctuation + string.whitespace:
    s = s.replace(chr, '_')

while '__' in s:
    s = s.replace('__', '_')

print(s)  # Hi_th_re_gu_ys_tes_t_ing

版本 2:

dont_want_these = frozenset(string.punctuation + string.whitespace)

def repl_it(s):
    chrs = list(s)
    prev_chr_was_fail = False

    for i, chr in enumerate(chrs):
        if chr in dont_want_these:
            if prev_chr_was_fail:
                chrs[i] = ''
            else:
                prev_chr_was_fail = True
                chrs[i] = '_'
        else:
            prev_chr_was_fail = False

    return ''.join(chrs)

assert repl_it('Hi th@re gu^ys!@#()tes   t;"[]ing') == 'Hi_th_re_gu_ys_tes_t_ing'

谢谢

import re
s = 'Hi th@re gu^ys!@#()tes   t;"[]ing'
new_s = re.sub(r'[\W]+','_',s)
print(new_s)
import re

new_s1 = re.sub(r'[\W_]+','_',s1)
new_s2 = re.sub(r'[\W_]+','_',s2)

输入:

s1 = 'Hi th@re gu^ys!@#()tes   t;"[]ing'
s2 = 'Hi th@re gu^ys!@#()tes   t___;"[]ing'

输出:

>>> print(new_s1)
>>> Hi_th_re_gu_ys_tes_t_ing
>>> print(new_s2)
>>> Hi_th_re_gu_ys_tes_t_ing