如何获取列表值并在 python 中计数
how to get list value and count in python
我正在尝试计算 list.so 中的每个单词,以便我可以删除计数更大的单词 value.but 我得到的输出是不正确的。
假设我的文件 "it was the best of times it was the worst of times .it was the age of wisdom it was the age of foolishness" 中有这些行。我的代码在做什么,它会打印 (was,4) 并在某处再次打印 (was,3),所以 on.every 出现这个词时它会打印这个词但计数不同 value.i 每个单词需要计数。
for file in files:
print(file)
f=open(file, 'r')
content = f.read()
wordlist = content.split()
#print(wordlist)
wordfreq = [wordlist.count(w) for w in wordlist] # a list comprehension
print("List\n" + str(wordlist) + "\n")
print("Frequencies\n" + str(wordfreq) + "\n")
test = [i for i in wordfreq if i > 100]
print("result\n"+str(list(zip(test,wordlist))))
您可以像这样使用 Counter
:
>>> from collections import Counter
>>>
>>> s = "it was the best of times it was the worst of times .it was the age of wisdom it was the age of foolishness"
>>>
>>> d = Counter(s.split())
>>> for k,v in d.items():
... print '{} -> {}'.format(k,v)
...
of -> 4
age -> 2
it -> 3
foolishness -> 1
times -> 2
worst -> 1
.it -> 1
the -> 4
wisdom -> 1
was -> 4
best -> 1
>>>
如果您不想使用 collections.Counter
,您可以像这样使用字典:
>>> s = "it was the best of times it was the worst of times .it was the age of wisdom it was the age of foolishness"
>>> d = {}
>>> for word in s.split():
... try:
... d[word] += 1
... except KeyError:
... d[word] = 1
...
>>> d
{'of': 4, 'age': 2, 'it': 3, 'foolishness': 1, 'times': 2, 'worst': 1, '.it': 1, 'the': 4, 'wisdom': 1, 'was': 4, 'best': 1}
您可以使用 Counter
来自 collections
:
from collections import Counter
import itertools
for file in files:
data = itertools.chain.from_iterable([i.strip('\n').split() for i in open(file)])
the_counts = Counter(data)
print("wordlist: {}".format(data))
print("frequencies: {}".format(dict(the_count))
test = [(a, b) for a, b in the_count.items() if b > 100]
from collections import Counter
for file in files:
words = open(file).read().split()
frequencies = Counter(words)
没有计数器的解决方案:
new = s.split(' ')
m=list()
for i in new:
m.append((i , new.count(i)))
for i in set(m):
print i
del m[:] # deleting list for using it again
输出:
('best', 1)
('was', 4)
('times', 2)
('it', 3)
('worst', 1)
('.it', 1)
('wisdom', 1)
('foolishness', 1)
('the', 4)
('of', 4)
('age', 2)
another test :
s = 'was was it was hello it was'
output :
('hello', 1)
('was', 4)
('it', 2)
如果您将数据保存到文件中,请使用此文件:
s=""
with open('your-file-name', 'r') as r:
s+=r.read().replace('\n', '') #reading multi lines
new = s.split(' ')
m=list()
for i in new:
m.append((i , new.count(i)))
for i in set(m):
print i
del m[:] # deleting list for using it ag
import pandas as pd
a = pd.Series(txt.split()).value_counts().reset_index().rename(columns={0:"counts","index":"word"})
a[a.counts<100]
我正在尝试计算 list.so 中的每个单词,以便我可以删除计数更大的单词 value.but 我得到的输出是不正确的。 假设我的文件 "it was the best of times it was the worst of times .it was the age of wisdom it was the age of foolishness" 中有这些行。我的代码在做什么,它会打印 (was,4) 并在某处再次打印 (was,3),所以 on.every 出现这个词时它会打印这个词但计数不同 value.i 每个单词需要计数。
for file in files:
print(file)
f=open(file, 'r')
content = f.read()
wordlist = content.split()
#print(wordlist)
wordfreq = [wordlist.count(w) for w in wordlist] # a list comprehension
print("List\n" + str(wordlist) + "\n")
print("Frequencies\n" + str(wordfreq) + "\n")
test = [i for i in wordfreq if i > 100]
print("result\n"+str(list(zip(test,wordlist))))
您可以像这样使用 Counter
:
>>> from collections import Counter
>>>
>>> s = "it was the best of times it was the worst of times .it was the age of wisdom it was the age of foolishness"
>>>
>>> d = Counter(s.split())
>>> for k,v in d.items():
... print '{} -> {}'.format(k,v)
...
of -> 4
age -> 2
it -> 3
foolishness -> 1
times -> 2
worst -> 1
.it -> 1
the -> 4
wisdom -> 1
was -> 4
best -> 1
>>>
如果您不想使用 collections.Counter
,您可以像这样使用字典:
>>> s = "it was the best of times it was the worst of times .it was the age of wisdom it was the age of foolishness"
>>> d = {}
>>> for word in s.split():
... try:
... d[word] += 1
... except KeyError:
... d[word] = 1
...
>>> d
{'of': 4, 'age': 2, 'it': 3, 'foolishness': 1, 'times': 2, 'worst': 1, '.it': 1, 'the': 4, 'wisdom': 1, 'was': 4, 'best': 1}
您可以使用 Counter
来自 collections
:
from collections import Counter
import itertools
for file in files:
data = itertools.chain.from_iterable([i.strip('\n').split() for i in open(file)])
the_counts = Counter(data)
print("wordlist: {}".format(data))
print("frequencies: {}".format(dict(the_count))
test = [(a, b) for a, b in the_count.items() if b > 100]
from collections import Counter
for file in files:
words = open(file).read().split()
frequencies = Counter(words)
没有计数器的解决方案:
new = s.split(' ')
m=list()
for i in new:
m.append((i , new.count(i)))
for i in set(m):
print i
del m[:] # deleting list for using it again
输出:
('best', 1)
('was', 4)
('times', 2)
('it', 3)
('worst', 1)
('.it', 1)
('wisdom', 1)
('foolishness', 1)
('the', 4)
('of', 4)
('age', 2)
another test :
s = 'was was it was hello it was'
output :
('hello', 1)
('was', 4)
('it', 2)
如果您将数据保存到文件中,请使用此文件:
s=""
with open('your-file-name', 'r') as r:
s+=r.read().replace('\n', '') #reading multi lines
new = s.split(' ')
m=list()
for i in new:
m.append((i , new.count(i)))
for i in set(m):
print i
del m[:] # deleting list for using it ag
import pandas as pd
a = pd.Series(txt.split()).value_counts().reset_index().rename(columns={0:"counts","index":"word"})
a[a.counts<100]