使用 Python 打印出字符、单词和行的数量
Print out the character, word, and line amounts using Python
这是我目前拥有的:
def stats(filename):
' prints the number of lines, words, and characters in file filename'
infile = open(filename)
lines = infile.readlines()
words = infile.read()
chars = infile.read()
infile.close()
print("line count:", len(lines))
print("word count:", len(words.split()))
print("character counter:", len(chars))
执行时,return 行数正确,但 return 字数和字符数为 0。不知道为什么...
读到文件末尾后需要用infile.seek(0)
回到文件开头,seek(0)
重新回到开头,这样才能重新读。
infile = open('data')
lines = infile.readlines()
infile.seek(0)
print(lines)
words = infile.read()
infile.seek(0)
chars = infile.read()
infile.close()
print("line count:", len(lines))
print("word count:", len(words.split()))
print("character counter:", len(chars))
输出:
line count: 2
word count: 19
character counter: 113
其他方式....:
from collections import Counter
from itertools import chain
infile = open('data')
lines = infile.readlines()
cnt_lines = len(lines)
words = list(chain.from_iterable([x.split() for x in lines]))
cnt_words = len(words)
cnt_chars = len([ c for word in words for c in word])
# show words frequency
print(Counter(words))
您可以遍历文件一次并计算行数、单词数和字符数,而无需多次返回开头,您需要使用您的方法执行此操作,因为在计算行数时会耗尽迭代器:
def stats(filename):
' prints the number of lines, words, and characters in file filename'
lines = chars = 0
words = []
with open(filename) as infile:
for line in infile:
lines += 1
words.extend(line.split())
chars += len(line)
print("line count:", lines)
print("word count:", len(words))
print("character counter:", chars)
return len(words) > len(set(words)) # Returns True if duplicate words
或者使用文件位于字符末尾的副作用:
def stats(filename):
' prints the number of lines, words, and characters in file filename'
words = []
with open(filename) as infile:
for lines, line in enumerate(infile, 1):
words.extend(line.split())
chars = infile.tell()
print("line count:", lines)
print("word count:", len(words))
print("character counter:", chars)
return len(words) > len(set(words)) # Returns True if duplicate words
您调用 readlines
后迭代器已用完,您可以返回到开头,但实际上您根本不需要将所有文件读入内存:
def stats(filename):
chars, words, dupes = 0, 0, False
seen = set()
with open(filename) as f:
for i, line in enumerate(f, 1):
chars += len(line)
spl = line.split()
words += len(spl)
if dupes or not seen.isdisjoint(spl):
dupes = True
elif not dupes:
seen.update(spl)
return i, chars, words, dupes
然后解包赋值:
no_lines, no_chars, no_words, has_dupes = stats("your_file")
如果您不想包含行结尾,您可能需要使用 chars += len(line.rstrip())
。该代码仅准确存储所需的数据量,使用 readlines、read、完整数据的字典等。意味着对于大文件,您的代码将不是很实用
File_Name = 'file.txt'
line_count = 0
word_count = 0
char_count = 0
with open(File_Name,'r') as fh:
# This will produce a list of lines.
# Each line of the file will be an element of the list.
data = fh.readlines()
# Count of total number for list elements == total number of lines.
line_count = len(data)
for line in data:
word_count = word_count + len(line.split())
char_count = char_count + len(line)
print('Line Count : ' , line_count )
print('Word Count : ', word_count)
print('Char Count : ', char_count)
这是我目前拥有的:
def stats(filename):
' prints the number of lines, words, and characters in file filename'
infile = open(filename)
lines = infile.readlines()
words = infile.read()
chars = infile.read()
infile.close()
print("line count:", len(lines))
print("word count:", len(words.split()))
print("character counter:", len(chars))
执行时,return 行数正确,但 return 字数和字符数为 0。不知道为什么...
读到文件末尾后需要用infile.seek(0)
回到文件开头,seek(0)
重新回到开头,这样才能重新读。
infile = open('data')
lines = infile.readlines()
infile.seek(0)
print(lines)
words = infile.read()
infile.seek(0)
chars = infile.read()
infile.close()
print("line count:", len(lines))
print("word count:", len(words.split()))
print("character counter:", len(chars))
输出:
line count: 2
word count: 19
character counter: 113
其他方式....:
from collections import Counter
from itertools import chain
infile = open('data')
lines = infile.readlines()
cnt_lines = len(lines)
words = list(chain.from_iterable([x.split() for x in lines]))
cnt_words = len(words)
cnt_chars = len([ c for word in words for c in word])
# show words frequency
print(Counter(words))
您可以遍历文件一次并计算行数、单词数和字符数,而无需多次返回开头,您需要使用您的方法执行此操作,因为在计算行数时会耗尽迭代器:
def stats(filename):
' prints the number of lines, words, and characters in file filename'
lines = chars = 0
words = []
with open(filename) as infile:
for line in infile:
lines += 1
words.extend(line.split())
chars += len(line)
print("line count:", lines)
print("word count:", len(words))
print("character counter:", chars)
return len(words) > len(set(words)) # Returns True if duplicate words
或者使用文件位于字符末尾的副作用:
def stats(filename):
' prints the number of lines, words, and characters in file filename'
words = []
with open(filename) as infile:
for lines, line in enumerate(infile, 1):
words.extend(line.split())
chars = infile.tell()
print("line count:", lines)
print("word count:", len(words))
print("character counter:", chars)
return len(words) > len(set(words)) # Returns True if duplicate words
您调用 readlines
后迭代器已用完,您可以返回到开头,但实际上您根本不需要将所有文件读入内存:
def stats(filename):
chars, words, dupes = 0, 0, False
seen = set()
with open(filename) as f:
for i, line in enumerate(f, 1):
chars += len(line)
spl = line.split()
words += len(spl)
if dupes or not seen.isdisjoint(spl):
dupes = True
elif not dupes:
seen.update(spl)
return i, chars, words, dupes
然后解包赋值:
no_lines, no_chars, no_words, has_dupes = stats("your_file")
如果您不想包含行结尾,您可能需要使用 chars += len(line.rstrip())
。该代码仅准确存储所需的数据量,使用 readlines、read、完整数据的字典等。意味着对于大文件,您的代码将不是很实用
File_Name = 'file.txt'
line_count = 0
word_count = 0
char_count = 0
with open(File_Name,'r') as fh:
# This will produce a list of lines.
# Each line of the file will be an element of the list.
data = fh.readlines()
# Count of total number for list elements == total number of lines.
line_count = len(data)
for line in data:
word_count = word_count + len(line.split())
char_count = char_count + len(line)
print('Line Count : ' , line_count )
print('Word Count : ', word_count)
print('Char Count : ', char_count)