pytesseract 检测到错误的整数值

Question

我正在尝试检测在我的正方形中找到的数字，我认为我可以使用库 pytesseract，但由于某种原因我读到了错误的值。

这是控制台输出：

这里有我所有的照片（它们是分开的，这只是为了展示它们）

import numpy as np 
import cv2 
import re
from PIL import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
 

img = cv2.imread('gulRecNum.jpg') 
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 

# convert to HSV, since red and yellow are the lowest hue colors and come before green
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

# create a binary thresholded image on hue between red and yellow
lower = (0,240,160)
upper = (30,255,255)
thresh = cv2.inRange(hsv, lower, upper)

# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
clean = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)



# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]

result1 = img.copy()
result2 = img.copy()

mask = np.zeros(result2.shape, dtype=np.uint8)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
ROI_number = 0

for c in contours:
    cv2.drawContours(result1,[c],0,(0,0,0),2)
    # get rotated rectangle from contour
    rot_rect = cv2.minAreaRect(c)
    box = cv2.boxPoints(rot_rect)
    box = np.int0(box)
    # draw rotated rectangle on copy of img
    cv2.drawContours(result2,[box],0,(0,0,0),2)
    # Gør noget hvis arealet er større end 1.
    # Whats the area of the component?
    areal = cv2.contourArea(c)
    if(areal > 1):
        # get the center of mass
        M = cv2.moments(c)
        cx = int(M['m10']/M['m00'])
        cy = int(M['m01']/M['m00'])
        center = (cx, cy)
        print("\nx: ",cx,"\ny: ",cy)
        color = (0, 0, 255)

        cv2.circle(result2, center, 3, color, -1)
        cv2.putText(result2, "center", (int(cx) - 10, int(cy) - 20),
        cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 2)

        # LOOK AT THIS PART
        x,y,w,h = cv2.boundingRect(c)
        ROI = 255 - thresh[y:y+h, x:x+w]
        cv2.drawContours(mask, [c], -1, (255,255,255), -1)
        cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)

        Number = pytesseract.image_to_string(ROI, config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789')
        print("Number ", Number)

        ROI_number += 1

# save result
cv2.imwrite("4cubes_result2.png",result2)

# display result
imS = cv2.resize(result2, (600, 400))
cv2.imshow("result2", imS)

cv2.waitKey(0)
cv2.destroyAllWindows()

以为我可以写 Number = pytesseract.image_to_string(ROI, config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789') print(Number) 然后从图片中得到数字，但是我没有，怎么可能？

编辑新错误

这张图怎么解决？

from PIL import Image
from operator import itemgetter
import numpy as np 
import easyocr
import cv2 
import re
import imutils
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
reader = easyocr.Reader(['ch_sim','en']) # need to run only once to load model into memory

#Define empty array
Cubes = []

def getNumber(ROI):
    img = cv2.imread(ROI)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    ret,thresh = cv2.threshold(gray,127,255,0)

    #cv2.imshow(thresh)
    #cv2.imshow('Thresholded original',thresh)
    #cv2.waitKey(0)

    ## Get contours
    contours,h = cv2.findContours(thresh,cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)


    ## only draw contour that have big areas
    imx = img.shape[0]
    imy = img.shape[1]
    lp_area = (imx * imy) / 10

    tmp_img = img.copy()

    for cnt in contours:
        approx = cv2.approxPolyDP(cnt,0.01 * cv2.arcLength(cnt, True), True)
        
        if cv2.contourArea(cnt) > lp_area:

            # Draw box corners and minimum area rectangle
            rect = cv2.minAreaRect(cnt)
            box = cv2.boxPoints(rect)
            box = np.int0(box)
            #cv2.drawContours(tmp_img, [box], 0, (0, 50, 255), 3)
            #cv2.circle(tmp_img, tuple(box[0]), 8, (0, 255, 0), -1)
            #cv2.circle(tmp_img, tuple(box[1]), 8, (0, 255, 0), -1)
            #cv2.circle(tmp_img, tuple(box[2]), 8, (0, 255, 0), -1)
            #cv2.circle(tmp_img, tuple(box[3]), 8, (0, 255, 0), -1)
            #cv2.imshow(tmp_img)
            #cv2.imshow('Minimum Area Rectangle', tmp_img)
            #cv2.waitKey(0)

            ## Correct orientation and crop
            # Link, https://jdhao.github.io/2019/02/23/crop_rotated_rectangle_opencv/
            width = int(rect[1][0])
            height = int(rect[1][1])
            src_pts = box.astype("float32")
            dst_pts = np.array([[0, height-1],
                                [0, 0],
                                [width-1, 0],
                                [width-1, height-1]], dtype="float32")
            M = cv2.getPerspectiveTransform(src_pts, dst_pts)
            warped = cv2.warpPerspective(img, M, (width, height))


            # Run OCR on cropped image
            # If the predicted value is digit print else rotate first
            result = reader.readtext(warped)
            print(result)
            predicted_digit = result[0][1]

            if np.char.isdigit(predicted_digit) == True:
                cv2.imshow("warped " + ROI,warped)
            else: 
                rot_img = warped.copy()
                for i in range(0, 3):
                    rotated_image = cv2.rotate(rot_img, cv2.cv2.ROTATE_90_CLOCKWISE)
                    result = reader.readtext(rotated_image)
                    #if np.array(result).size == 0: 
                    #  continue
                    if not result:
                        rot_img = rotated_image 
                        continue
                    #if len(result) == 0:
                    #  continue
                    predicted_digit = result[0][1]
                    #print(result)
                    #print(predicted_digit)
                    #cv2.imshow(rotated_image)
                    if np.char.isdigit(predicted_digit) == True:
                        cv2.imshow("Image " + ROI, rotated_image)
                        break
                    rot_img = rotated_image                

    return predicted_digit

def sortNumbers(Cubes):

    Cubes = sorted(Cubes, key=lambda x: int(x[2]))

    #Cubes.sort(key=itemgetter(2))  # In-place sorting
    #Cubes = sorted(Cubes, key=itemgetter(2))  # Create a new list

    return Cubes
        

#img = cv2.imread('gulRecNum.jpg') 
img = cv2.imread('webcam7.png') 
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 


# convert to HSV, since red and yellow are the lowest hue colors and come before green
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

# create a binary thresholded image on hue between red and yellow
#Change these if cube colours changes?
lower =(20, 100, 100)
upper = (30, 255, 255)
#lower = (0,240,160)
#upper = (30,255,255)
thresh = cv2.inRange(hsv, lower, upper)

# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
clean = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]

result2 = img.copy()

mask = np.zeros(result2.shape, dtype=np.uint8)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
ROI_number = 0

for c in contours:
    cv2.drawContours(result2,[c],0,(0,0,0),2)
    # get rotated rectangle from contour
    rot_rect = cv2.minAreaRect(c)
    box = cv2.boxPoints(rot_rect)
    box = np.int0(box)
    # draw rotated rectangle on copy of img
    cv2.drawContours(result2,[box],0,(0,0,0),2)
    # Gør noget hvis arealet er større end 1.
    # Whats the area of the component?
    areal = cv2.contourArea(c)
    if(areal > 1):
        # get the center of mass
        M = cv2.moments(c)
        cx = int(M['m10']/M['m00'])
        cy = int(M['m01']/M['m00'])
        center = (cx, cy)
        print("\nx: ",cx,"\ny: ",cy)
        color = (0, 0, 255)

        cv2.circle(result2, center, 3, color, -1)
        cv2.putText(result2, "center", (int(cx) - 10, int(cy) - 20),
        cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 2)

        x,y,w,h = cv2.boundingRect(c)
        ROI = 255 - thresh[y:y+h, x:x+w]
        cv2.drawContours(mask, [c], -1, (255,255,255), -1)
        cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)

        #Read saved image (number)
        result = getNumber('ROI_{}.png'.format(ROI_number))
        print("ROI_number: ", result)
        Cubes.append([cx, cy, result])
        ROI_number += 1

# save result
cv2.imwrite("4cubes_result2.png",result2)   

# display result
imS = cv2.resize(result2, (600, 400))
cv2.imshow("result2", imS)
#cv2.imshow('mask', mask)
#cv2.imshow('thresh', thresh)
SortedCubes = sortNumbers(Cubes)
print("\nFound array [x, y, Cube_num] = ", Cubes)
print("Sorted array [x, y, Cube_num] = ", SortedCubes)
cv2.waitKey(0)
cv2.destroyAllWindows()

我收到以下错误（无法检测到号码）

Traceback (most recent call last): File "c:/Users/Mads/OneDrive/Universitet/7. semester/ROB1/python/objectDetectiong.py", line 169, in <module> result = getNumber('ROI_{}.png'.format(ROI_number)) File "c:/Users/Mads/OneDrive/Universitet/7. semester/ROB1/python/objectDetectiong.py", line 70, in getNumber predicted_digit = result[0][1] IndexError: list index out of range

Answer 1

这是我的评论的实现。因为，我没有单独的图像，此代码将适用于给定的网格，如处理过的图像。

对于 OCR，我使用 EasyOCR 而不是 Tesserect。您还可以在每个输出裁剪图像上尝试 pytesserect。我没有自信地旋转 4 次 90 度，而是对 OCR 结果进行了数字检测。如果检测到的不是数字，则仅旋转并重试。

在 google colab 上测试。将 cv2_imshow(...) 替换为 cv2.imshow(...) 以便在本地工作。同时删除 from google.colab.patches import cv2_imshow import.

这是我在此处对卡片方向校正的回答的修改版本，OpenCV: using Canny and Shi-Tomasi to detect round corners of a playing card。之前的所有代码都留作注释。

代码

!pip install easyocr

import easyocr
reader = easyocr.Reader(['ch_sim','en']) # need to run only once to load model into memory

"""
Based on my answer of rotated card detection,

"""


import cv2
import numpy as np
from google.colab.patches import cv2_imshow


img = cv2.imread('1.jpg')


gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,0)

#cv2_imshow(thresh)
#cv2.imshow('Thresholded original',thresh)
#cv2.waitKey(0)



## Get contours
contours,h = cv2.findContours(thresh,cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)


## only draw contour that have big areas
imx = img.shape[0]
imy = img.shape[1]
lp_area = (imx * imy) / 10



#################################################################
# Four point perspective transform
# https://www.pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/
#################################################################

def order_points(pts):
    # initialzie a list of coordinates that will be ordered
    # such that the first entry in the list is the top-left,
    # the second entry is the top-right, the third is the
    # bottom-right, and the fourth is the bottom-left
    rect = np.zeros((4, 2), dtype = "float32")
    # the top-left point will have the smallest sum, whereas
    # the bottom-right point will have the largest sum
    s = pts.sum(axis = 1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]
    # now, compute the difference between the points, the
    # top-right point will have the smallest difference,
    # whereas the bottom-left will have the largest difference
    diff = np.diff(pts, axis = 1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]
    # return the ordered coordinates
    return rect


def four_point_transform(image, pts):
    # obtain a consistent order of the points and unpack them
    # individually
    rect = order_points(pts)
    (tl, tr, br, bl) = rect
    # compute the width of the new image, which will be the
    # maximum distance between bottom-right and bottom-left
    # x-coordiates or the top-right and top-left x-coordinates
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))
    # compute the height of the new image, which will be the
    # maximum distance between the top-right and bottom-right
    # y-coordinates or the top-left and bottom-left y-coordinates
    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))
    # now that we have the dimensions of the new image, construct
    # the set of destination points to obtain a "birds eye view",
    # (i.e. top-down view) of the image, again specifying points
    # in the top-left, top-right, bottom-right, and bottom-left
    # order
    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
        [0, maxHeight - 1]], dtype = "float32")
    # compute the perspective transform matrix and then apply it
    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
    # return the warped image
    return warped


#################################################################
#print(len(contours))





tmp_img = img.copy()

for cnt in contours:
    approx = cv2.approxPolyDP(cnt,0.01 * cv2.arcLength(cnt, True), True)
    ## calculate number of vertices
    #print(len(approx))


    ## Get the largest contours only
    ## Side count cannot be used since contours are not all rectangular
    if cv2.contourArea(cnt) > lp_area:
    #if len(approx) == 4 and cv2.contourArea(cnt) > lp_area:
        
        # print("\n\n")
        # print("#################################################")
        # print("rectangle")
        # print("#################################################")
        # print("\n\n")


        #tmp_img = img.copy()
        #cv2.drawContours(tmp_img, [cnt], 0, (0, 255, 0), 6)
        #cv2_imshow(tmp_img)
        #cv2.imshow('Contour Borders', tmp_img)
        #cv2.waitKey(0)


        # tmp_img = img.copy()
        # cv2.drawContours(tmp_img, [cnt], 0, (255, 0, 255), -1)
        # cv2_imshow(tmp_img)
        # #cv2.imshow('Contour Filled', tmp_img)
        # #cv2.waitKey(0)


        # # Make a hull arround the contour and draw it on the original image
        # tmp_img = img.copy()
        # mask = np.zeros((img.shape[:2]), np.uint8)
        # hull = cv2.convexHull(cnt)
        # cv2.drawContours(mask, [hull], 0, (255, 255, 255), -1)
        # cv2_imshow(mask)
        # #cv2.imshow('Convex Hull Mask', mask)
        # #cv2.waitKey(0)


        # # Draw minimum area rectangle
        # #tmp_img = img.copy()
        # rect = cv2.minAreaRect(cnt)
        # box = cv2.boxPoints(rect)
        # box = np.int0(box)
        # cv2.drawContours(tmp_img, [box], 0, (255, 0, 0), 2)
        # #cv2_imshow(tmp_img)
        # #cv2.imshow('Minimum Area Rectangle', tmp_img)
        # #cv2.waitKey(0)


        # Draw box corners and minimum area rectangle
        #tmp_img = img.copy()
        rect = cv2.minAreaRect(cnt)
        box = cv2.boxPoints(rect)
        box = np.int0(box)
        #print(rect)
        #print(box)
        cv2.drawContours(tmp_img, [box], 0, (0, 50, 255), 3)
        cv2.circle(tmp_img, tuple(box[0]), 8, (0, 255, 0), -1)
        cv2.circle(tmp_img, tuple(box[1]), 8, (0, 255, 0), -1)
        cv2.circle(tmp_img, tuple(box[2]), 8, (0, 255, 0), -1)
        cv2.circle(tmp_img, tuple(box[3]), 8, (0, 255, 0), -1)
        #cv2_imshow(tmp_img)
        #cv2.imshow('Minimum Area Rectangle', tmp_img)
        #cv2.waitKey(0)



        ## Correct orientation and crop
        # Link, https://jdhao.github.io/2019/02/23/crop_rotated_rectangle_opencv/
        width = int(rect[1][0])
        height = int(rect[1][1])
        src_pts = box.astype("float32")
        dst_pts = np.array([[0, height-1],
                            [0, 0],
                            [width-1, 0],
                            [width-1, height-1]], dtype="float32")
        M = cv2.getPerspectiveTransform(src_pts, dst_pts)
        warped = cv2.warpPerspective(img, M, (width, height))
        #cv2_imshow(warped)



        # Run OCR on cropped image
        # If the predicted value is digit print else rotate first
        result = reader.readtext(warped)
        predicted_digit = result[0][1]
        print("Detected Text:")

        if np.char.isdigit(predicted_digit) == True:
          print(result)
          print(predicted_digit)
          cv2_imshow(warped)
        else: 
          rot_img = warped.copy()
          for i in range(0, 3):
            rotated_image = cv2.rotate(rot_img, cv2.cv2.ROTATE_90_CLOCKWISE)
            result = reader.readtext(rotated_image)
            #if np.array(result).size == 0: 
            #  continue
            if not result:
              rot_img = rotated_image 
              continue
            #if len(result) == 0:
            #  continue
            predicted_digit = result[0][1]
            #print(result)
            #print(predicted_digit)
            #cv2_imshow(rotated_image)
            if np.char.isdigit(predicted_digit) == True:
              print(result)
              print(predicted_digit)
              cv2_imshow(rotated_image)
              break
            rot_img = rotated_image
              
        


        # # Draw bounding rectangle
        # #tmp_img = img.copy()
        # x, y, w, h = cv2.boundingRect(cnt)
        # cv2.rectangle(tmp_img, (x, y), (x + w, y + h), (255, 0, 0), 2)
        # #cv2_imshow(tmp_img)
        # #cv2.imshow('Bounding Rectangle', tmp_img)
        # #cv2.waitKey(0)


        # # Bounding Rectangle and Minimum Area Rectangle
        # #tmp_img = img.copy()
        # rect = cv2.minAreaRect(cnt)
        # box = cv2.boxPoints(rect)
        # box = np.int0(box)
        # cv2.drawContours(tmp_img, [box], 0, (0, 0, 255), 2)
        # x, y, w, h = cv2.boundingRect(cnt)
        # cv2.rectangle(tmp_img, (x, y), (x + w, y + h), (0, 255, 0), 2)
        # #cv2_imshow(tmp_img)
        # #cv2.imshow('Bounding Rectangle', tmp_img)
        # #cv2.waitKey(0)


        # # determine the most extreme points along the contour
        # # https://www.pyimagesearch.com/2016/04/11/finding-extreme-points-in-contours-with-opencv/
        # tmp_img = img.copy()
        # extLeft = tuple(cnt[cnt[:, :, 0].argmin()][0])
        # extRight = tuple(cnt[cnt[:, :, 0].argmax()][0])
        # extTop = tuple(cnt[cnt[:, :, 1].argmin()][0])
        # extBot = tuple(cnt[cnt[:, :, 1].argmax()][0])
        # cv2.drawContours(tmp_img, [cnt], -1, (0, 255, 255), 2)
        # cv2.circle(tmp_img, extLeft, 8, (0, 0, 255), -1)
        # cv2.circle(tmp_img, extRight, 8, (0, 255, 0), -1)
        # cv2.circle(tmp_img, extTop, 8, (255, 0, 0), -1)
        # cv2.circle(tmp_img, extBot, 8, (255, 255, 0), -1)


        # print("Corner Points: ", extLeft, extRight, extTop, extBot)

        # cv2_imshow(tmp_img)
        # #cv2.imshow('img contour drawn', tmp_img)
        # #cv2.waitKey(0)
        # #cv2.destroyAllWindows()



        # ## Perspective Transform
        # tmp_img = img.copy()
        # pts = np.array([extLeft, extRight, extTop, extBot])
        # warped = four_point_transform(tmp_img, pts)
        # cv2_imshow(tmp_img)
        # #cv2.imshow("Warped", warped)
        # #cv2.waitKey(0)


cv2_imshow(tmp_img)


#cv2.destroyAllWindows()

输出预测

Detected Text:
[([[85, 67], [131, 67], [131, 127], [85, 127]], '1', 0.9992043972015381)]
1

Detected Text:
[([[85, 65], [133, 65], [133, 125], [85, 125]], '2', 0.9991914629936218)]
2

Detected Text:
[([[96, 72], [144, 72], [144, 128], [96, 128]], '4', 0.9996564984321594)]
4

Detected Text:
[([[88, 76], [132, 76], [132, 132], [88, 132]], '3', 0.9973381161689758)]
3

带角的白色区域检测

替代方法，

在超过特定区域的每个大轮廓上尝试从 MNIST 和其他人训练的预训练数字分类模型。
使用带旋转的多任务对象检测。网络的一个输出将是检测另一个角度回归来预测方向。
对每个检测到的文本使用 East 和运行 OCR 等文本检测器。

pytesseract 检测到错误的整数值

pytesseract detects the wrong integer values

python

python-tesseract

cv2

代码