为 OCR 检测和分割图像
detect and split image for OCR
我正在尝试 OCR 标准表格(它们的正面和背面都被扫描)
我只想对扫描的第二张图像(带有文本信息的那张)进行 OCR - 有没有办法检测和拆分它们,并且只处理正确的图像?对不起,如果我错过了一些重要的东西,只是开始。
import pytesseract as tess
import os
from PIL import Image
import pandas as pd
import tesserocr
path = "/Users/oliviervandhuynslager/PycharmProjects/OCR/DC_SCANS_TEST" ##path to directory (folder) where the images are located
count = 0
fileName = [] #create empty list that will contain the original filenames
fullText = [] #create empty list to store the OCR results per file
for imageName in os.listdir("/Users/oliviervandhuynslager/PycharmProjects/OCR/DC_SCANS_TEST"):
count = count + 1
fileName.append(imageName)
fileName.sort()#generate list from texts.
#%%
# APPEND (OCR) text from images TO LIST fullText
for imageName in os.listdir("/Users/oliviervandhuynslager/PycharmProjects/OCR/DC_SCANS_TEST"):
inputPath = os.path.join(path, imageName)
img = Image.open(inputPath)
text = tess.image_to_string(img, lang="eng")
fullText.append(text)
这里是展示图像的工作示例:
import cv2
import numpy as np
import pytesseract
pytesseract.pytesseract.tesseract_cmd=r"C:\Program Files\Tesseract-OCR\tesseract.exe"
img = cv2.imread("BFezy.png", 0)
kernel = np.ones((25, 25), np.uint8)
eroded = cv2.erode(img, kernel, iterations=2)
dilated = cv2.dilate(eroded, kernel, iterations=1)
thresholded = cv2.threshold(dilated, 150, 255, cv2.THRESH_BINARY_INV)[1]
countours = cv2.findContours(th, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[0]
if len(countours) == 2:
x, y, w, h = cv2.boundingRect(countours[0])
crop = img[y:h + y, x:w + x]
text = pytesseract.image_to_string(crop)
print(text)
我正在尝试 OCR 标准表格(它们的正面和背面都被扫描)
我只想对扫描的第二张图像(带有文本信息的那张)进行 OCR - 有没有办法检测和拆分它们,并且只处理正确的图像?对不起,如果我错过了一些重要的东西,只是开始。
import pytesseract as tess
import os
from PIL import Image
import pandas as pd
import tesserocr
path = "/Users/oliviervandhuynslager/PycharmProjects/OCR/DC_SCANS_TEST" ##path to directory (folder) where the images are located
count = 0
fileName = [] #create empty list that will contain the original filenames
fullText = [] #create empty list to store the OCR results per file
for imageName in os.listdir("/Users/oliviervandhuynslager/PycharmProjects/OCR/DC_SCANS_TEST"):
count = count + 1
fileName.append(imageName)
fileName.sort()#generate list from texts.
#%%
# APPEND (OCR) text from images TO LIST fullText
for imageName in os.listdir("/Users/oliviervandhuynslager/PycharmProjects/OCR/DC_SCANS_TEST"):
inputPath = os.path.join(path, imageName)
img = Image.open(inputPath)
text = tess.image_to_string(img, lang="eng")
fullText.append(text)
这里是展示图像的工作示例:
import cv2
import numpy as np
import pytesseract
pytesseract.pytesseract.tesseract_cmd=r"C:\Program Files\Tesseract-OCR\tesseract.exe"
img = cv2.imread("BFezy.png", 0)
kernel = np.ones((25, 25), np.uint8)
eroded = cv2.erode(img, kernel, iterations=2)
dilated = cv2.dilate(eroded, kernel, iterations=1)
thresholded = cv2.threshold(dilated, 150, 255, cv2.THRESH_BINARY_INV)[1]
countours = cv2.findContours(th, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[0]
if len(countours) == 2:
x, y, w, h = cv2.boundingRect(countours[0])
crop = img[y:h + y, x:w + x]
text = pytesseract.image_to_string(crop)
print(text)