在 python 中用字典计算词频
count word frequency with out dictionary in python
我有按字母顺序排序的文件输入
Asburn 9
Ashley 10
Ashley 11
Bojourn 12
我想要这样的输出
Asburn 9
Ashley 21
Bojourn 12
我用过的代码:
counts = defaultdict(int)
for line in sys.stdin:
word, count = line.split()
counts[word] += int(count)
for wrd, count in counts.items():
print("{}\t{}".format(wrd,count))
但是我不会用字典。我该如何实现
试试这个:
prev_word = None
prev_count = 0
for line in sys.stdin:
word, count = line.split()
count = int(count)
if word != prev_word:
if prev_word is not None:
print(prev_word, str(prev_count))
prev_count = count
prev_word = word
else:
prev_count += count
if prev_word != None:
print(prev_word, prev_count)
快速测试:以下代码returns预期输出
prev_word = None
prev_count = 0
for line in ['ashutosh 9', 'ashutosh 10', 'check 11', 'check 11', 'zebra 11']:
word, count = line.split()
count = int(count)
if word != prev_word:
if prev_word is not None:
print(prev_word, prev_count)
prev_count = count
prev_word = word
else:
prev_count += count
if prev_word != None:
print(prev_word, prev_count)
在 Python 的标准库的帮助下:
from sys import stdin
from itertools import groupby
from operator import itemgetter
for word, group in groupby(map(str.split, stdin), itemgetter(0)):
print(word, sum(int(count) for _, count in group))
使用正则表达式列出 key-value 相似的术语,将它们按键分组,将每组的 keys-values 与 zip
.
解耦
import re
import itertools as it
text = """Asburn 9
Ashley 10
Ashley 11
Bojourn 12"""
pairs = re.findall(r'^(.+?)\s(\d+)\n*', text, re.M)
groups = [zip(*grp) for _, grp in it.groupby(pairs, lambda p: p[0])]
for keys, values in groups:
print(keys[0], sum(map(int, values)))
我有按字母顺序排序的文件输入
Asburn 9
Ashley 10
Ashley 11
Bojourn 12
我想要这样的输出
Asburn 9
Ashley 21
Bojourn 12
我用过的代码:
counts = defaultdict(int)
for line in sys.stdin:
word, count = line.split()
counts[word] += int(count)
for wrd, count in counts.items():
print("{}\t{}".format(wrd,count))
但是我不会用字典。我该如何实现
试试这个:
prev_word = None
prev_count = 0
for line in sys.stdin:
word, count = line.split()
count = int(count)
if word != prev_word:
if prev_word is not None:
print(prev_word, str(prev_count))
prev_count = count
prev_word = word
else:
prev_count += count
if prev_word != None:
print(prev_word, prev_count)
快速测试:以下代码returns预期输出
prev_word = None
prev_count = 0
for line in ['ashutosh 9', 'ashutosh 10', 'check 11', 'check 11', 'zebra 11']:
word, count = line.split()
count = int(count)
if word != prev_word:
if prev_word is not None:
print(prev_word, prev_count)
prev_count = count
prev_word = word
else:
prev_count += count
if prev_word != None:
print(prev_word, prev_count)
在 Python 的标准库的帮助下:
from sys import stdin
from itertools import groupby
from operator import itemgetter
for word, group in groupby(map(str.split, stdin), itemgetter(0)):
print(word, sum(int(count) for _, count in group))
使用正则表达式列出 key-value 相似的术语,将它们按键分组,将每组的 keys-values 与 zip
.
import re
import itertools as it
text = """Asburn 9
Ashley 10
Ashley 11
Bojourn 12"""
pairs = re.findall(r'^(.+?)\s(\d+)\n*', text, re.M)
groups = [zip(*grp) for _, grp in it.groupby(pairs, lambda p: p[0])]
for keys, values in groups:
print(keys[0], sum(map(int, values)))