HOG 手写数字识别不起作用

HOG handwritten digit recognition not working

我正在浏览一本 OpenCV 书中关于手写数字识别的章节,尽管我浏览了它并且我认为一切都已正确处理,但我收到此错误消息 Expected 2D array, got 1D array instead。我试图 google 搜索一个答案,似乎很多其他人 运行 遇到了一个非常相似的问题,但没有提供真正的答案。

谁能解释一下为什么这个 feature.hog() 方法不返回二维数组?我正在阅读一些文档,显然默认情况下它 returns 是一个平面一维数组,所以我不知道为什么这个 model.predict() 方法抱怨需要一个二维数组。然后我正在关注的这本书我认为是在 2015 年发行的,所以也许有些变化?

这是我正在尝试的文件 运行:

classify.py

# -*- coding: utf-8 -*-
"""
Created on Tue Nov  3 13:01:39 2020

@author: User
"""


from __future__ import print_function
from sklearn.externals import joblib
from pyimagesearch.hog import HOG
from pyimagesearch import dataset
import argparse
import mahotas
import cv2

ap = argparse.ArgumentParser()
ap.add_argument('-m', '--model', required=True, help='Path to model')
ap.add_argument('-i', '--image', required=True, help='Path to image')
args=vars(ap.parse_args())

model = joblib.load(args['model'])

hog = HOG(orientations=18, pixelsPerCell=(10,10),
          cellsPerBlock=(1,1), normalize=True)

image = cv2.imread(args["image"])
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blurred, 30, 150)
(_, cnts, _) = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,
                                 cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted([(c, cv2.boundingRect(c)[0]) for c in cnts], key=lambda x: x[1])

for (c, _) in cnts:
    (x, y, w, h) = cv2.boundingRect(c)
    
    if w >= 7 and h>= 20:
        roi = gray[y:y+h, x:x+w]
        thresh = roi.copy()
        T = mahotas.thresholding.otsu(roi)
        thresh[thresh > T] = 255
        thresh = cv2.bitwise_not(thresh)
        
        thresh = dataset.deskew(thresh, 72)
        thresh = dataset.center_extent(thresh, (72, 72))
        
        cv2.imshow("thresh", thresh)
        
        hist = hog.describe(thresh)
        digit = model.predict(hist)[0] #this is where it errors
        print("I think that number is: {}".format(digit))
        
        cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 1)
        cv2.putText(image, str(digit), (x-10, y-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2)
        cv2.imshow("image", image)
        cv2.waitKey(0)

这是为此编写的自定义 hog 模块:

hog.py

# -*- coding: utf-8 -*-
"""
Created on Tue Nov  3 11:22:38 2020

@author: User
"""

from skimage import feature

class HOG:
    def __init__(self, orientations=9, pixelsPerCell=(14,14), cellsPerBlock=(1,1),
                 normalize=False):
        self.orientations = orientations
        self.pixelsPerCell = pixelsPerCell
        self.cellsPerBlock = cellsPerBlock
        self.normalize = normalize
    
    def describe(self, image):
        '''
        (2017-11-28) Update for skimage: In  scikit-image==0.12 , the  
        normalise  parameter has been updated to  transform_sqrt . The  
        transform_sqrt  performs the exact same operation, only with a 
        different name. If you’re using an older version of  scikit-image  
        (again, before the v0.12 release), then you’ll want to change 
        transform_sqrt  to  normalise . In  scikit-image==0.15  the default 
        value of  block_norm="L1"  has been deprecated and changed to  
        block_norm="L2-Hys" . Therefore, for this lesson we’ll explicitly 
        specify  block_norm="L1" . Doing this will avoid it switching to  
        "L2-Hys"  with version updates without us knowing (and yielding 
        incorrect car logo identification results). You can read about L1 and 
        L2 norms here:
        https://gurus.pyimagesearch.com/lesson-sample-histogram-of-oriented-gradients-and-car-logo-recognition/#tour_modal
        '''
        hist = feature.hog(image,
                           orientations=self.orientations,
                           pixels_per_cell=self.pixelsPerCell,
                           cells_per_block=self.cellsPerBlock,
                           transform_sqrt =self.normalize,
                           block_norm="L1")
        return hist

这是为此生成“训练模型”的原因:

train.py

# -*- coding: utf-8 -*-
"""
Created on Tue Nov  3 11:57:26 2020

@author: User
"""

from sklearn.externals import joblib
from sklearn.svm import LinearSVC
from pyimagesearch.hog import HOG
from pyimagesearch import dataset
import argparse

ap = argparse.ArgumentParser()
ap.add_argument('-d', '--dataset', required=True, help='Path to dataset')
ap.add_argument('-m', '--model', required=True, help='path to where model will be stored')
args=vars(ap.parse_args())

(digits, target) = dataset.load_digits(args['dataset'])

data = []

hog = HOG(orientations=9, pixelsPerCell=(14,14),
          cellsPerBlock=(1,1), normalize=True)

for image in digits:
    image = dataset.deskew(image, 20)
    image = dataset.center_extent(image, (20, 20))
    
    hist = hog.describe(image)
    data.append(hist)

model = LinearSVC(random_state=42)
model.fit(data, target)

joblib.dump(model, args['model'])

dataset.py

# -*- coding: utf-8 -*-
"""
Created on Tue Nov  3 11:35:04 2020

@author: User
"""

from . import imutils
import numpy as np
import mahotas
import cv2

def load_digits(datasetPath):
    data = np.genfromtxt(datasetPath, delimiter=',', dtype='uint8')
    target = data[:, 0]
    data = data[:, 1:].reshape(data.shape[0], 28, 28)
    
    return (data, target)

def deskew(image, width):
    (h, w) = image.shape[:2]
    moments = cv2.moments(image)
    
    skew = moments['mu11'] / moments['mu02']
    M = np.float32([
            [1, skew, -0.5 * w * skew],
            [0, 1, 0]])
    image = cv2.warpAffine(image, M, (w, h),
                   flags = cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR)
    
    image = imutils.resize(image, width=width)
    
    return image

def center_extent(image, size):
    (eW, eH) = size
    
    if image.shape[1] > image.shape[0]:
        image = imutils.resize(image, width=eW)
    else:
        image = imutils.resize(image, height=eH)
    
    extent = np.zeros((eH, eW), dtype = 'uint8')
    
    offsetX = (eW - image.shape[1]) // 2
    offsetY = (eH - image.shape[0]) // 2
    extent[offsetY:offsetY + image.shape[0], 
           offsetX:offsetX + image.shape[1]] = image
       
    CM = mahotas.center_of_mass(extent)
    (cY, cX) = np.round(CM).astype('int32')
    (dX, dY) = ((size[0] // 2) - cX, (size[1] // 2) - cY)
    M = np.float32([[1, 0, dX], [0, 1, dY]])
    extent = cv2.warpAffine(extent, M, size)
    
    return extent
           

如果这里需要这个自定义 imutils 模块

imutils.py

# -*- coding: utf-8 -*-
"""
Created on Tue Sep 29 16:27:16 2020

@author: User
"""

import numpy as np
import cv2

def translate(image, x, y):
    M = np.float32([[1, 0, x], [0, 1, y]])
    shifted = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))
    return shifted

def rotate(image, angle, center=None, scale=1.0):
    (h, w) = image.shape[:2]
    if not center:
        center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, scale)
    rotated = cv2.warpAffine(image, M, (w, h))
    return rotated

def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]
    
    if width is None and height is None:
        return image
    
    if width is None:
        r = height / float(h)
        dim = (int(w*r), height)
    
    else:
        r = width / float(w)
        dim = (width, int(h*r))
    
    resized = cv2.resize(image, dim, interpolation = inter)
    return resized

我正在使用此数据 found here (the train.csv file),我通过此脚本将其减少到 5000 行:

import pandas as pd

metadata = pd.read_csv('C:/Users/User/Downloads/digit-recognizer/train.csv', low_memory=False)
smaller_df = metadata.head(5000)
smaller_df.to_csv(path_or_buf='data/digits.csv', index=False)
print('successfully wrote a smaller file!')

我已经弄清楚了一段时间,但现在有机会 post 所以我想分享我的发现。

我想当我试图“重塑”数组时,我实际上是在重塑错误的数组,这就是为什么它一直给我一个错误。

所以我想将我拥有的一维数组转换为二维数组,我采用了这一行:digit = model.predict(hist)[0]

并将其更改为:digit = model.predict(hist.reshape(1,-1))[0]