TypeError: __init__() takes 1 positional argument but 2 were given (Python multiprocessing with Pytesseract)
TypeError: __init__() takes 1 positional argument but 2 were given (Python multiprocessing with Pytesseract)
我在尝试使用 Python 的 multiprocessing
库以及 pytesseract
和 pdf2image
时收到以下错误消息,我不太确定这意味着什么或如何纠正它。我看到的其他帖子有类似的输出消息处理将 self
作为参数传递给 class 的方法,但我没有在这种情况下创建 class。
C:\Users\erik7>python "C:\Users\erik7\Documents\Python Projects\multiprocess_test2.py"
0
Exception in thread Thread-11:
Traceback (most recent call last):
File "C:\Users\erik7\AppData\Local\Programs\Python\Python38-32\lib\threading.py", line 932, in _bootstrap_inner
self.run()
File "C:\Users\erik7\AppData\Local\Programs\Python\Python38-32\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\erik7\AppData\Local\Programs\Python\Python38-32\lib\multiprocessing\pool.py", line 576, in _handle_results
task = get()
File "C:\Users\erik7\AppData\Local\Programs\Python\Python38-32\lib\multiprocessing\connection.py", line 251, in recv
return _ForkingPickler.loads(buf.getbuffer())
TypeError: __init__() takes 1 positional argument but 2 were given
1
2
3
4
5
6
7
8
9
我的代码:
import pytesseract
import pdf2image
import multiprocessing
def extract(img, page_num):
print(page_num)
return pytesseract.image_to_osd(img, output_type = pytesseract.Output.DICT)['orientaton']
if __name__ == "__main__":
pdf_path = r"C:/Users/erik7/Documents/Late Scans for Testing/scans_template2.pdf"
output_fmt = 'jpeg'
img_dpi = 300
pop_path = r"C:\Users\erik7\Downloads\poppler-0.90.1\bin"
output_path = r"C:\Users\erik7\Downloads"
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
converted_path = r"C:\Users\erik7\Downloads\converted_images"
converted = pdf2image.convert_from_path(pdf_path = pdf_path, fmt = output_fmt, dpi = img_dpi, poppler_path = pop_path, output_folder = converted_path, grayscale = True, thread_count = 2)
results = []
iterable = [[img, page_num] for page_num, img in enumerate(converted)]
p = multiprocessing.Pool()
r = p.starmap(extract, iterable)
results.append(r)
p.close()
print("\n**PROCESS COMPLETED SUCCESSFULLY")
成功了。我需要将 pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
移动到我的 extract
函数中,并且程序能够 运行 使用 multiprocessing
:
成功
import pytesseract
import pdf2image
import multiprocessing
def extract(img, page_num):
print(page_num)
return pytesseract.image_to_osd(img, output_type = pytesseract.Output.DICT)['orientaton']
if __name__ == "__main__":
pdf_path = r"C:/Users/erik7/Documents/Late Scans for Testing/scans_template2.pdf"
output_fmt = 'jpeg'
img_dpi = 300
pop_path = r"C:\Users\erik7\Downloads\poppler-0.90.1\bin"
output_path = r"C:\Users\erik7\Downloads"
converted_path = r"C:\Users\erik7\Downloads\converted_images"
converted = pdf2image.convert_from_path(pdf_path = pdf_path, fmt = output_fmt, dpi = img_dpi, poppler_path = pop_path, output_folder = converted_path, grayscale = True, thread_count = 2)
results = []
iterable = [[img, page_num] for page_num, img in enumerate(converted)]
p = multiprocessing.Pool()
r = p.starmap(extract, iterable)
results.append(r)
p.close()
print("\n**PROCESS COMPLETED SUCCESSFULLY")
我在尝试使用 Python 的 multiprocessing
库以及 pytesseract
和 pdf2image
时收到以下错误消息,我不太确定这意味着什么或如何纠正它。我看到的其他帖子有类似的输出消息处理将 self
作为参数传递给 class 的方法,但我没有在这种情况下创建 class。
C:\Users\erik7>python "C:\Users\erik7\Documents\Python Projects\multiprocess_test2.py"
0
Exception in thread Thread-11:
Traceback (most recent call last):
File "C:\Users\erik7\AppData\Local\Programs\Python\Python38-32\lib\threading.py", line 932, in _bootstrap_inner
self.run()
File "C:\Users\erik7\AppData\Local\Programs\Python\Python38-32\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\erik7\AppData\Local\Programs\Python\Python38-32\lib\multiprocessing\pool.py", line 576, in _handle_results
task = get()
File "C:\Users\erik7\AppData\Local\Programs\Python\Python38-32\lib\multiprocessing\connection.py", line 251, in recv
return _ForkingPickler.loads(buf.getbuffer())
TypeError: __init__() takes 1 positional argument but 2 were given
1
2
3
4
5
6
7
8
9
我的代码:
import pytesseract
import pdf2image
import multiprocessing
def extract(img, page_num):
print(page_num)
return pytesseract.image_to_osd(img, output_type = pytesseract.Output.DICT)['orientaton']
if __name__ == "__main__":
pdf_path = r"C:/Users/erik7/Documents/Late Scans for Testing/scans_template2.pdf"
output_fmt = 'jpeg'
img_dpi = 300
pop_path = r"C:\Users\erik7\Downloads\poppler-0.90.1\bin"
output_path = r"C:\Users\erik7\Downloads"
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
converted_path = r"C:\Users\erik7\Downloads\converted_images"
converted = pdf2image.convert_from_path(pdf_path = pdf_path, fmt = output_fmt, dpi = img_dpi, poppler_path = pop_path, output_folder = converted_path, grayscale = True, thread_count = 2)
results = []
iterable = [[img, page_num] for page_num, img in enumerate(converted)]
p = multiprocessing.Pool()
r = p.starmap(extract, iterable)
results.append(r)
p.close()
print("\n**PROCESS COMPLETED SUCCESSFULLY")
成功了。我需要将 pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
移动到我的 extract
函数中,并且程序能够 运行 使用 multiprocessing
:
import pytesseract
import pdf2image
import multiprocessing
def extract(img, page_num):
print(page_num)
return pytesseract.image_to_osd(img, output_type = pytesseract.Output.DICT)['orientaton']
if __name__ == "__main__":
pdf_path = r"C:/Users/erik7/Documents/Late Scans for Testing/scans_template2.pdf"
output_fmt = 'jpeg'
img_dpi = 300
pop_path = r"C:\Users\erik7\Downloads\poppler-0.90.1\bin"
output_path = r"C:\Users\erik7\Downloads"
converted_path = r"C:\Users\erik7\Downloads\converted_images"
converted = pdf2image.convert_from_path(pdf_path = pdf_path, fmt = output_fmt, dpi = img_dpi, poppler_path = pop_path, output_folder = converted_path, grayscale = True, thread_count = 2)
results = []
iterable = [[img, page_num] for page_num, img in enumerate(converted)]
p = multiprocessing.Pool()
r = p.starmap(extract, iterable)
results.append(r)
p.close()
print("\n**PROCESS COMPLETED SUCCESSFULLY")