通过边界框从图像中提取选定的文本

Extracting selected text by bounding box from an image

我正在尝试通过图像上的边界框来获取选定的文本。 就像边界框只选择了单词一样,我想获取该文本并将其转换为文本文件。 请查看我的代码并进行一些评论,以便我可以实现该功能。

到目前为止,我已将 PDF 文件转换为图像,文本上带有边框。

import numpy as np
import csv
import io
from PIL import Image
import pytesseract
from wand.image import Image as wi
from pytesseract import Output
import cv2

pdf = wi(filename="samplecompany.pdf", resolution=100)
pdfImg = pdf.convert('jpg')
j = 1
for img in pdfImg.sequence:
    page = wi(image=img)
    page.save(filename=str(j)+".jpg")
    img1 = cv2.imread(str(j)+".jpg")

    d = pytesseract.image_to_data(img1, output_type=Output.DICT)
    n_boxes = len(d['level'])
    print(n_boxes)
    for i in range(n_boxes):
        (x, y, w, h) = (d['left'][i], d['top']
                        [i], d['width'][i], d['height'][i])
        print((x, y, w, h))
        cv2.rectangle(img1, (x, y), (x + w, y + h), (0, 255, 0), 2)

    cv2.imwrite(str(j)+".jpg", img1)

    cv2.waitKey(0)
    j += 1

这段代码工作正常我需要从我有 created.using 边界框位置

的图像中获取所需的文本

让 r 表示目标的边界框 (x, y, w, h)。裁剪可以通过

完成
# Crop image
Croped_image = Image[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]

所以你有了边界框。然后尝试从分割的裁剪图像中检测文本。哪一面都是你想要的

You can use this code to get custom text from a an image and change and modify accordingly and this is also save your text to an text file

import io
import cv2
import numpy as np
import pytesseract
from PIL import Image
from pytesseract import Output
from wand.image import Image as wi
import sys


pdf = wi(filename="Resume.pdf", resolution=100)
pdfImg = pdf.convert('jpg')
j = 1
imgBlobs = []
img1= []
for img in pdfImg.sequence:
    page = wi(image=img)
    page.save(filename=str(j)+".jpg")
    img1.append(cv2.imread(str(j)+".jpg"))
    j += 1

extracted_text = []

for img2 in img1:
    d = pytesseract.image_to_data(img2, output_type=Output.DICT)
    n_boxes = len(d['level'])
    print(n_boxes)
    extracted_text.append(d['text'][9])
    (x, y, w, h) = (d['left'][9], d['top'][9], d['width'][9], d['height'][9])
    cv2.rectangle(img2, (x, y), (x + w, y + h), (0, 255, 0), 2)


    cv2.imshow('img', img2)

    print(d)


with open('Prototype.txt', 'w') as filehandle:
        for listitem in extracted_text:
            filehandle.write('%s\n' % listitem)