如何拆分 python 中的整数字符对
How to split integer-character pairs in python
我有一个列表,
['20W', '20W', '20W', '8W5K7W', '8W5K7W', '8W5K7W', '8W5K7W', '9W3K8W', '7W7R6W', '6W4R1Y4R5W', '6W1R1W5R1W1R5W', '6W1R1W2R1Y2R1W1R5W', '6W1R1W5R1W1R5W', '6W1R1W2R1Y2R1W1R5W', '9W3B8W', '9W3B8W', '9W3B8W', '9W3B8W', '9W3B8W', '8W5N7W']
我正在尝试计算它包含多少个整数字符对。例如,20W
是一对,1R
和 5K
也是。我尝试使用正则表达式 \d+[A-Z]
并且它标识了对,但是我将如何拆分它们,因为整数可以是任意长度并且字符串始终是一个字母。
提前致谢。
import re
data = ['20W', '20W', '20W', '8W5K7W', '8W5K7W', '8W5K7W', '8W5K7W', '9W3K8W', '7W7R6W', '6W4R1Y4R5W', '6W1R1W5R1W1R5W',
'6W1R1W2R1Y2R1W1R5W', '6W1R1W5R1W1R5W', '6W1R1W2R1Y2R1W1R5W', '9W3B8W', '9W3B8W', '9W3B8W', '9W3B8W', '9W3B8W', '8W5N7W']
for item in data:
result = re.findall(r'\d+[A-Z]', item)
print(item, len(result), result)
输出
20W 1 ['20W']
20W 1 ['20W']
20W 1 ['20W']
8W5K7W 3 ['8W', '5K', '7W']
8W5K7W 3 ['8W', '5K', '7W']
8W5K7W 3 ['8W', '5K', '7W']
8W5K7W 3 ['8W', '5K', '7W']
9W3K8W 3 ['9W', '3K', '8W']
7W7R6W 3 ['7W', '7R', '6W']
6W4R1Y4R5W 5 ['6W', '4R', '1Y', '4R', '5W']
6W1R1W5R1W1R5W 7 ['6W', '1R', '1W', '5R', '1W', '1R', '5W']
6W1R1W2R1Y2R1W1R5W 9 ['6W', '1R', '1W', '2R', '1Y', '2R', '1W', '1R', '5W']
6W1R1W5R1W1R5W 7 ['6W', '1R', '1W', '5R', '1W', '1R', '5W']
6W1R1W2R1Y2R1W1R5W 9 ['6W', '1R', '1W', '2R', '1Y', '2R', '1W', '1R', '5W']
9W3B8W 3 ['9W', '3B', '8W']
9W3B8W 3 ['9W', '3B', '8W']
9W3B8W 3 ['9W', '3B', '8W']
9W3B8W 3 ['9W', '3B', '8W']
9W3B8W 3 ['9W', '3B', '8W']
8W5N7W 3 ['8W', '5N', '7W']
另一个单行代码,但没有一堆不必要的函数和循环。因为它总是一个字母,所以你真正要问的是:“有多少个字母?”。在这一点上,数字甚至都不重要。正则表达式也比@Ronie 的类似答案更有效和动态。如果出于某种原因字母也包含小写字母,这不会中断。
import re
data = ['20W', '20W', '20W', '8W5K7W', '8W5K7W', '8W5K7W', '8W5K7W', '9W3K8W', '7W7R6W', '6W4R1Y4R5W', '6W1R1W5R1W1R5W', '6W1R1W2R1Y2R1W1R5W', '6W1R1W5R1W1R5W', '6W1R1W2R1Y2R1W1R5W', '9W3B8W', '9W3B8W', '9W3B8W', '9W3B8W', '9W3B8W', '8W5N7W']
pairs = len(re.findall('[a-z]', "".join(data), re.I))
print(pairs) #76
编辑:上面的代码与我的评论者所指的版本完全不同。
具有完整统计报告的复杂示例
import re
class Statistics:
def __init__(self, data):
format = re.compile('\d+[a-z]', re.I)
matches = format.finditer("".join(data))
self.pairs = 0
for m in matches:
attr = getattr(self, m.group(), [0, []])
attr[0] += 1
attr[1].append(self.pairs)
setattr(self, m.group(), attr)
self.pairs += 1
def __str__(self):
attrs = vars(self)
stats = f'{self.pairs} pairs were found:\n\n'
for k, v in attrs.items():
if k != 'pairs':
percentage = round(len(v[1])/self.pairs*100, 2)
stats = f'{stats}{k} appears {v[0]} times in positions {v[1]} and is {percentage}% of known values\n'
return stats
data = ['20W', '20W', '20W', '8W5K7W', '8W5K7W', '8W5K7W', '8W5K7W', '9W3K8W', '7W7R6W', '6W4R1Y4R5W', '6W1R1W5R1W1R5W', '6W1R1W2R1Y2R1W1R5W', '6W1R1W5R1W1R5W', '6W1R1W2R1Y2R1W1R5W', '9W3B8W', '9W3B8W', '9W3B8W', '9W3B8W', '9W3B8W', '8W5N7W']
stats = Statistics(data)
print(stats)
输出
'''
76 pairs were found:
20W appears 3 times in positions [0, 1, 2] and is 3.95% of known values
8W appears 11 times in positions [3, 6, 9, 12, 17, 60, 63, 66, 69, 72, 73] and is 14.47% of known values
5K appears 4 times in positions [4, 7, 10, 13] and is 5.26% of known values
7W appears 6 times in positions [5, 8, 11, 14, 18, 75] and is 7.89% of known values
9W appears 6 times in positions [15, 58, 61, 64, 67, 70] and is 7.89% of known values
3K appears 1 times in positions [16] and is 1.32% of known values
7R appears 1 times in positions [19] and is 1.32% of known values
6W appears 6 times in positions [20, 21, 26, 33, 42, 49] and is 7.89% of known values
4R appears 2 times in positions [22, 24] and is 2.63% of known values
1Y appears 3 times in positions [23, 37, 53] and is 3.95% of known values
5W appears 5 times in positions [25, 32, 41, 48, 57] and is 6.58% of known values
1R appears 8 times in positions [27, 31, 34, 40, 43, 47, 50, 56] and is 10.53% of known values
1W appears 8 times in positions [28, 30, 35, 39, 44, 46, 51, 55] and is 10.53% of known values
5R appears 2 times in positions [29, 45] and is 2.63% of known values
2R appears 4 times in positions [36, 38, 52, 54] and is 5.26% of known values
3B appears 5 times in positions [59, 62, 65, 68, 71] and is 6.58% of known values
5N appears 1 times in positions [74] and is 1.32% of known values
'''
1-liner + 进口:
import re
print(sum(len(re.findall("\d+[A-Z]", i)) for i in x)) #76
我有一个列表,
['20W', '20W', '20W', '8W5K7W', '8W5K7W', '8W5K7W', '8W5K7W', '9W3K8W', '7W7R6W', '6W4R1Y4R5W', '6W1R1W5R1W1R5W', '6W1R1W2R1Y2R1W1R5W', '6W1R1W5R1W1R5W', '6W1R1W2R1Y2R1W1R5W', '9W3B8W', '9W3B8W', '9W3B8W', '9W3B8W', '9W3B8W', '8W5N7W']
我正在尝试计算它包含多少个整数字符对。例如,20W
是一对,1R
和 5K
也是。我尝试使用正则表达式 \d+[A-Z]
并且它标识了对,但是我将如何拆分它们,因为整数可以是任意长度并且字符串始终是一个字母。
提前致谢。
import re
data = ['20W', '20W', '20W', '8W5K7W', '8W5K7W', '8W5K7W', '8W5K7W', '9W3K8W', '7W7R6W', '6W4R1Y4R5W', '6W1R1W5R1W1R5W',
'6W1R1W2R1Y2R1W1R5W', '6W1R1W5R1W1R5W', '6W1R1W2R1Y2R1W1R5W', '9W3B8W', '9W3B8W', '9W3B8W', '9W3B8W', '9W3B8W', '8W5N7W']
for item in data:
result = re.findall(r'\d+[A-Z]', item)
print(item, len(result), result)
输出
20W 1 ['20W']
20W 1 ['20W']
20W 1 ['20W']
8W5K7W 3 ['8W', '5K', '7W']
8W5K7W 3 ['8W', '5K', '7W']
8W5K7W 3 ['8W', '5K', '7W']
8W5K7W 3 ['8W', '5K', '7W']
9W3K8W 3 ['9W', '3K', '8W']
7W7R6W 3 ['7W', '7R', '6W']
6W4R1Y4R5W 5 ['6W', '4R', '1Y', '4R', '5W']
6W1R1W5R1W1R5W 7 ['6W', '1R', '1W', '5R', '1W', '1R', '5W']
6W1R1W2R1Y2R1W1R5W 9 ['6W', '1R', '1W', '2R', '1Y', '2R', '1W', '1R', '5W']
6W1R1W5R1W1R5W 7 ['6W', '1R', '1W', '5R', '1W', '1R', '5W']
6W1R1W2R1Y2R1W1R5W 9 ['6W', '1R', '1W', '2R', '1Y', '2R', '1W', '1R', '5W']
9W3B8W 3 ['9W', '3B', '8W']
9W3B8W 3 ['9W', '3B', '8W']
9W3B8W 3 ['9W', '3B', '8W']
9W3B8W 3 ['9W', '3B', '8W']
9W3B8W 3 ['9W', '3B', '8W']
8W5N7W 3 ['8W', '5N', '7W']
另一个单行代码,但没有一堆不必要的函数和循环。因为它总是一个字母,所以你真正要问的是:“有多少个字母?”。在这一点上,数字甚至都不重要。正则表达式也比@Ronie 的类似答案更有效和动态。如果出于某种原因字母也包含小写字母,这不会中断。
import re
data = ['20W', '20W', '20W', '8W5K7W', '8W5K7W', '8W5K7W', '8W5K7W', '9W3K8W', '7W7R6W', '6W4R1Y4R5W', '6W1R1W5R1W1R5W', '6W1R1W2R1Y2R1W1R5W', '6W1R1W5R1W1R5W', '6W1R1W2R1Y2R1W1R5W', '9W3B8W', '9W3B8W', '9W3B8W', '9W3B8W', '9W3B8W', '8W5N7W']
pairs = len(re.findall('[a-z]', "".join(data), re.I))
print(pairs) #76
编辑:上面的代码与我的评论者所指的版本完全不同。
具有完整统计报告的复杂示例
import re
class Statistics:
def __init__(self, data):
format = re.compile('\d+[a-z]', re.I)
matches = format.finditer("".join(data))
self.pairs = 0
for m in matches:
attr = getattr(self, m.group(), [0, []])
attr[0] += 1
attr[1].append(self.pairs)
setattr(self, m.group(), attr)
self.pairs += 1
def __str__(self):
attrs = vars(self)
stats = f'{self.pairs} pairs were found:\n\n'
for k, v in attrs.items():
if k != 'pairs':
percentage = round(len(v[1])/self.pairs*100, 2)
stats = f'{stats}{k} appears {v[0]} times in positions {v[1]} and is {percentage}% of known values\n'
return stats
data = ['20W', '20W', '20W', '8W5K7W', '8W5K7W', '8W5K7W', '8W5K7W', '9W3K8W', '7W7R6W', '6W4R1Y4R5W', '6W1R1W5R1W1R5W', '6W1R1W2R1Y2R1W1R5W', '6W1R1W5R1W1R5W', '6W1R1W2R1Y2R1W1R5W', '9W3B8W', '9W3B8W', '9W3B8W', '9W3B8W', '9W3B8W', '8W5N7W']
stats = Statistics(data)
print(stats)
输出
'''
76 pairs were found:
20W appears 3 times in positions [0, 1, 2] and is 3.95% of known values
8W appears 11 times in positions [3, 6, 9, 12, 17, 60, 63, 66, 69, 72, 73] and is 14.47% of known values
5K appears 4 times in positions [4, 7, 10, 13] and is 5.26% of known values
7W appears 6 times in positions [5, 8, 11, 14, 18, 75] and is 7.89% of known values
9W appears 6 times in positions [15, 58, 61, 64, 67, 70] and is 7.89% of known values
3K appears 1 times in positions [16] and is 1.32% of known values
7R appears 1 times in positions [19] and is 1.32% of known values
6W appears 6 times in positions [20, 21, 26, 33, 42, 49] and is 7.89% of known values
4R appears 2 times in positions [22, 24] and is 2.63% of known values
1Y appears 3 times in positions [23, 37, 53] and is 3.95% of known values
5W appears 5 times in positions [25, 32, 41, 48, 57] and is 6.58% of known values
1R appears 8 times in positions [27, 31, 34, 40, 43, 47, 50, 56] and is 10.53% of known values
1W appears 8 times in positions [28, 30, 35, 39, 44, 46, 51, 55] and is 10.53% of known values
5R appears 2 times in positions [29, 45] and is 2.63% of known values
2R appears 4 times in positions [36, 38, 52, 54] and is 5.26% of known values
3B appears 5 times in positions [59, 62, 65, 68, 71] and is 6.58% of known values
5N appears 1 times in positions [74] and is 1.32% of known values
'''
1-liner + 进口:
import re
print(sum(len(re.findall("\d+[A-Z]", i)) for i in x)) #76