Python: 从txt文件目录中统计字数并将字数统计写入单独的txt文件
Python: Counting words from a directory of txt files and writing word counts to a separate txt file
Python 的新手,我正在尝试计算文本文件目录中的字数并将输出写入单独的文本文件。但是,我想指定条件。因此,如果字数 > 0,我想将字数和文件路径写入一个文件,如果字数 == 0。我想将字数和文件路径写入另一个文件。到目前为止,下面是我的代码。我想我已经很接近了,但我对如何处理条件和分离文件很困惑。谢谢
import sys
import os
from collections import Counter
import glob
stdoutOrigin=sys.stdout
sys.stdout = open("log.txt", "w")
def count_words_in_dir(dirpath, words, action=None):
for filepath in glob.iglob(os.path.join("path", '*.txt')):
with open(filepath) as f:
data = f.read()
for key,val in words.items():
#print("key is " + key + "\n")
ct = data.count(key)
words[key] = ct
if action:
action(filepath, words)
def print_summary(filepath, words):
for key,val in sorted(words.items()):
print(filepath)
if val > 0:
print('{0}:\t{1}'.format(
key,
val))
filepath = sys.argv[1]
keys = ["x", "y"]
words = dict.fromkeys(keys,0)
count_words_in_dir(filepath, words, action=print_summary)
sys.stdout.close()
sys.stdout=stdoutOrigin
我强烈建议您不要将 stdout
重新用于将数据写入文件作为程序正常过程的一部分。我也想知道你怎么会有一个词“count < 0”。我假设你的意思是“count == 0”。
您的代码存在的主要问题在于这一行:
for filepath in glob.iglob(os.path.join("path", '*.txt')):
字符串常量 "path"
我很确定不属于那里。我认为您希望 filepath
在那里。我认为这个问题会阻止您的代码工作。
这是您的代码版本,我在其中修复了这些问题并添加了根据计数写入两个不同输出文件的逻辑:
import sys
import os
import glob
out1 = open("/tmp/so/seen.txt", "w")
out2 = open("/tmp/so/missing.txt", "w")
def count_words_in_dir(dirpath, words, action=None):
for filepath in glob.iglob(os.path.join(dirpath, '*.txt')):
with open(filepath) as f:
data = f.read()
for key, val in words.items():
# print("key is " + key + "\n")
ct = data.count(key)
words[key] = ct
if action:
action(filepath, words)
def print_summary(filepath, words):
for key, val in sorted(words.items()):
whichout = out1 if val > 0 else out2
print(filepath, file=whichout)
print('{0}: {1}'.format(key, val), file=whichout)
filepath = sys.argv[1]
keys = ["country", "friend", "turnip"]
words = dict.fromkeys(keys, 0)
count_words_in_dir(filepath, words, action=print_summary)
out1.close()
out2.close()
结果:
文件seen.txt:
/Users/steve/tmp/so/dir/data2.txt
friend: 1
/Users/steve/tmp/so/dir/data.txt
country: 2
/Users/steve/tmp/so/dir/data.txt
friend: 1
文件missing.txt:
/Users/steve/tmp/so/dir/data2.txt
country: 0
/Users/steve/tmp/so/dir/data2.txt
turnip: 0
/Users/steve/tmp/so/dir/data.txt
turnip: 0
(不好意思用了一些比你更有意思的搜索词)
您好,我希望我理解正确您的问题,此代码将计算您的文件中有多少个不同的单词,并根据条件执行您想要的操作。
import os
all_words = {}
def count(file_path):
with open(file_path, "r") as f:
# for better performance it is a good idea to go line by line through file
for line in f:
# singles out all the words, by splitting string around spaces
words = line.split(" ")
# and checks if word already exists in all_words dictionary...
for word in words:
try:
# ...if it does increment number of repetitions
all_words[word.replace(",", "").replace(".", "").lower()] += 1
except Exception:
# ...if it doesn't create it and give it number of repetitions 1
all_words[word.replace(",", "").replace(".", "").lower()] = 1
if __name__ == '__main__':
# for every text file in your current directory count how many words it has
for file in os.listdir("."):
if file.endswith(".txt"):
all_words = {}
count(file)
n = len(all_words)
# depending on the number of words do something
if n > 0:
with open("count1.txt", "a") as f:
f.write(file + "\n" + str(n) + "\n")
else:
with open("count2.txt", "a") as f:
f.write(file + "\n" + str(n) + "\n")
如果您想多次计算同一个单词,您可以将字典中的所有值相加,或者您可以消除 try-except 块并计算那里的每个单词。
Python 的新手,我正在尝试计算文本文件目录中的字数并将输出写入单独的文本文件。但是,我想指定条件。因此,如果字数 > 0,我想将字数和文件路径写入一个文件,如果字数 == 0。我想将字数和文件路径写入另一个文件。到目前为止,下面是我的代码。我想我已经很接近了,但我对如何处理条件和分离文件很困惑。谢谢
import sys
import os
from collections import Counter
import glob
stdoutOrigin=sys.stdout
sys.stdout = open("log.txt", "w")
def count_words_in_dir(dirpath, words, action=None):
for filepath in glob.iglob(os.path.join("path", '*.txt')):
with open(filepath) as f:
data = f.read()
for key,val in words.items():
#print("key is " + key + "\n")
ct = data.count(key)
words[key] = ct
if action:
action(filepath, words)
def print_summary(filepath, words):
for key,val in sorted(words.items()):
print(filepath)
if val > 0:
print('{0}:\t{1}'.format(
key,
val))
filepath = sys.argv[1]
keys = ["x", "y"]
words = dict.fromkeys(keys,0)
count_words_in_dir(filepath, words, action=print_summary)
sys.stdout.close()
sys.stdout=stdoutOrigin
我强烈建议您不要将 stdout
重新用于将数据写入文件作为程序正常过程的一部分。我也想知道你怎么会有一个词“count < 0”。我假设你的意思是“count == 0”。
您的代码存在的主要问题在于这一行:
for filepath in glob.iglob(os.path.join("path", '*.txt')):
字符串常量 "path"
我很确定不属于那里。我认为您希望 filepath
在那里。我认为这个问题会阻止您的代码工作。
这是您的代码版本,我在其中修复了这些问题并添加了根据计数写入两个不同输出文件的逻辑:
import sys
import os
import glob
out1 = open("/tmp/so/seen.txt", "w")
out2 = open("/tmp/so/missing.txt", "w")
def count_words_in_dir(dirpath, words, action=None):
for filepath in glob.iglob(os.path.join(dirpath, '*.txt')):
with open(filepath) as f:
data = f.read()
for key, val in words.items():
# print("key is " + key + "\n")
ct = data.count(key)
words[key] = ct
if action:
action(filepath, words)
def print_summary(filepath, words):
for key, val in sorted(words.items()):
whichout = out1 if val > 0 else out2
print(filepath, file=whichout)
print('{0}: {1}'.format(key, val), file=whichout)
filepath = sys.argv[1]
keys = ["country", "friend", "turnip"]
words = dict.fromkeys(keys, 0)
count_words_in_dir(filepath, words, action=print_summary)
out1.close()
out2.close()
结果:
文件seen.txt:
/Users/steve/tmp/so/dir/data2.txt
friend: 1
/Users/steve/tmp/so/dir/data.txt
country: 2
/Users/steve/tmp/so/dir/data.txt
friend: 1
文件missing.txt:
/Users/steve/tmp/so/dir/data2.txt
country: 0
/Users/steve/tmp/so/dir/data2.txt
turnip: 0
/Users/steve/tmp/so/dir/data.txt
turnip: 0
(不好意思用了一些比你更有意思的搜索词)
您好,我希望我理解正确您的问题,此代码将计算您的文件中有多少个不同的单词,并根据条件执行您想要的操作。
import os
all_words = {}
def count(file_path):
with open(file_path, "r") as f:
# for better performance it is a good idea to go line by line through file
for line in f:
# singles out all the words, by splitting string around spaces
words = line.split(" ")
# and checks if word already exists in all_words dictionary...
for word in words:
try:
# ...if it does increment number of repetitions
all_words[word.replace(",", "").replace(".", "").lower()] += 1
except Exception:
# ...if it doesn't create it and give it number of repetitions 1
all_words[word.replace(",", "").replace(".", "").lower()] = 1
if __name__ == '__main__':
# for every text file in your current directory count how many words it has
for file in os.listdir("."):
if file.endswith(".txt"):
all_words = {}
count(file)
n = len(all_words)
# depending on the number of words do something
if n > 0:
with open("count1.txt", "a") as f:
f.write(file + "\n" + str(n) + "\n")
else:
with open("count2.txt", "a") as f:
f.write(file + "\n" + str(n) + "\n")
如果您想多次计算同一个单词,您可以将字典中的所有值相加,或者您可以消除 try-except 块并计算那里的每个单词。