需要使用 python 计算特定目录中的文档数 - MapReduce
Need to count the number of documents in a particular directory using python - MapReduce
请找到我正在使用的以下程序。它正在编译但不提供任何输出。请求帮助解决错误。
import gzip
import warc
import os
from mrjob.job import MRJob
class DocumentCounter(MRJob):
def mapper(self, _, line):
entries = os.listdir("C://Users//HP//WARCDataset")
for entry in entries:
yield 1,1
def reducer(self, key, values):
yield key, sum(values)
if __name__ == '__main__':
DocumentCounter.run()
IDE 的屏幕截图和输出 window。程序运行成功,但不显示结果
class DocumentCounter(MRJob):
def mapper_raw(self,_,line):
for fname in os.listdir(WARC_PATH):
yield "total_documents",1
def combiner(self, key, values):
"""
Sums up count for each mapper.
"""
yield key, sum(values)
def reducer(self, key, values):
##TOTAL_DOUCMENTS = sum(values)
NumberofDocuments = sum(values)
yield key, NumberofDocuments
if __name__ == '__main__':
DocumentCounter.run()
上面的代码使用 os.listdir 函数遍历给定路径下的所有文件
请找到我正在使用的以下程序。它正在编译但不提供任何输出。请求帮助解决错误。
import gzip
import warc
import os
from mrjob.job import MRJob
class DocumentCounter(MRJob):
def mapper(self, _, line):
entries = os.listdir("C://Users//HP//WARCDataset")
for entry in entries:
yield 1,1
def reducer(self, key, values):
yield key, sum(values)
if __name__ == '__main__':
DocumentCounter.run()
class DocumentCounter(MRJob):
def mapper_raw(self,_,line):
for fname in os.listdir(WARC_PATH):
yield "total_documents",1
def combiner(self, key, values):
"""
Sums up count for each mapper.
"""
yield key, sum(values)
def reducer(self, key, values):
##TOTAL_DOUCMENTS = sum(values)
NumberofDocuments = sum(values)
yield key, NumberofDocuments
if __name__ == '__main__':
DocumentCounter.run()
上面的代码使用 os.listdir 函数遍历给定路径下的所有文件