HOG 手写数字识别不起作用

Question

我正在浏览一本 OpenCV 书中关于手写数字识别的章节，尽管我浏览了它并且我认为一切都已正确处理，但我收到此错误消息 Expected 2D array, got 1D array instead。我试图 google 搜索一个答案，似乎很多其他人运行遇到了一个非常相似的问题，但没有提供真正的答案。

谁能解释一下为什么这个 feature.hog() 方法不返回二维数组？我正在阅读一些文档，显然默认情况下它 returns 是一个平面一维数组，所以我不知道为什么这个 model.predict() 方法抱怨需要一个二维数组。然后我正在关注的这本书我认为是在 2015 年发行的，所以也许有些变化？

这是我正在尝试的文件运行:

classify.py

# -*- coding: utf-8 -*-
"""
Created on Tue Nov  3 13:01:39 2020

@author: User
"""


from __future__ import print_function
from sklearn.externals import joblib
from pyimagesearch.hog import HOG
from pyimagesearch import dataset
import argparse
import mahotas
import cv2

ap = argparse.ArgumentParser()
ap.add_argument('-m', '--model', required=True, help='Path to model')
ap.add_argument('-i', '--image', required=True, help='Path to image')
args=vars(ap.parse_args())

model = joblib.load(args['model'])

hog = HOG(orientations=18, pixelsPerCell=(10,10),
          cellsPerBlock=(1,1), normalize=True)

image = cv2.imread(args["image"])
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blurred, 30, 150)
(_, cnts, _) = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,
                                 cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted([(c, cv2.boundingRect(c)[0]) for c in cnts], key=lambda x: x[1])

for (c, _) in cnts:
    (x, y, w, h) = cv2.boundingRect(c)
    
    if w >= 7 and h>= 20:
        roi = gray[y:y+h, x:x+w]
        thresh = roi.copy()
        T = mahotas.thresholding.otsu(roi)
        thresh[thresh > T] = 255
        thresh = cv2.bitwise_not(thresh)
        
        thresh = dataset.deskew(thresh, 72)
        thresh = dataset.center_extent(thresh, (72, 72))
        
        cv2.imshow("thresh", thresh)
        
        hist = hog.describe(thresh)
        digit = model.predict(hist)[0] #this is where it errors
        print("I think that number is: {}".format(digit))
        
        cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 1)
        cv2.putText(image, str(digit), (x-10, y-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2)
        cv2.imshow("image", image)
        cv2.waitKey(0)

这是为此编写的自定义 hog 模块：

hog.py

# -*- coding: utf-8 -*-
"""
Created on Tue Nov  3 11:22:38 2020

@author: User
"""

from skimage import feature

class HOG:
    def __init__(self, orientations=9, pixelsPerCell=(14,14), cellsPerBlock=(1,1),
                 normalize=False):
        self.orientations = orientations
        self.pixelsPerCell = pixelsPerCell
        self.cellsPerBlock = cellsPerBlock
        self.normalize = normalize
    
    def describe(self, image):
        '''
        (2017-11-28) Update for skimage: In  scikit-image==0.12 , the  
        normalise  parameter has been updated to  transform_sqrt . The  
        transform_sqrt  performs the exact same operation, only with a 
        different name. If you’re using an older version of  scikit-image  
        (again, before the v0.12 release), then you’ll want to change 
        transform_sqrt  to  normalise . In  scikit-image==0.15  the default 
        value of  block_norm="L1"  has been deprecated and changed to  
        block_norm="L2-Hys" . Therefore, for this lesson we’ll explicitly 
        specify  block_norm="L1" . Doing this will avoid it switching to  
        "L2-Hys"  with version updates without us knowing (and yielding 
        incorrect car logo identification results). You can read about L1 and 
        L2 norms here:
        https://gurus.pyimagesearch.com/lesson-sample-histogram-of-oriented-gradients-and-car-logo-recognition/#tour_modal
        '''
        hist = feature.hog(image,
                           orientations=self.orientations,
                           pixels_per_cell=self.pixelsPerCell,
                           cells_per_block=self.cellsPerBlock,
                           transform_sqrt =self.normalize,
                           block_norm="L1")
        return hist

这是为此生成“训练模型”的原因：

train.py

# -*- coding: utf-8 -*-
"""
Created on Tue Nov  3 11:57:26 2020

@author: User
"""

from sklearn.externals import joblib
from sklearn.svm import LinearSVC
from pyimagesearch.hog import HOG
from pyimagesearch import dataset
import argparse

ap = argparse.ArgumentParser()
ap.add_argument('-d', '--dataset', required=True, help='Path to dataset')
ap.add_argument('-m', '--model', required=True, help='path to where model will be stored')
args=vars(ap.parse_args())

(digits, target) = dataset.load_digits(args['dataset'])

data = []

hog = HOG(orientations=9, pixelsPerCell=(14,14),
          cellsPerBlock=(1,1), normalize=True)

for image in digits:
    image = dataset.deskew(image, 20)
    image = dataset.center_extent(image, (20, 20))
    
    hist = hog.describe(image)
    data.append(hist)

model = LinearSVC(random_state=42)
model.fit(data, target)

joblib.dump(model, args['model'])

dataset.py

# -*- coding: utf-8 -*-
"""
Created on Tue Nov  3 11:35:04 2020

@author: User
"""

from . import imutils
import numpy as np
import mahotas
import cv2

def load_digits(datasetPath):
    data = np.genfromtxt(datasetPath, delimiter=',', dtype='uint8')
    target = data[:, 0]
    data = data[:, 1:].reshape(data.shape[0], 28, 28)
    
    return (data, target)

def deskew(image, width):
    (h, w) = image.shape[:2]
    moments = cv2.moments(image)
    
    skew = moments['mu11'] / moments['mu02']
    M = np.float32([
            [1, skew, -0.5 * w * skew],
            [0, 1, 0]])
    image = cv2.warpAffine(image, M, (w, h),
                   flags = cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR)
    
    image = imutils.resize(image, width=width)
    
    return image

def center_extent(image, size):
    (eW, eH) = size
    
    if image.shape[1] > image.shape[0]:
        image = imutils.resize(image, width=eW)
    else:
        image = imutils.resize(image, height=eH)
    
    extent = np.zeros((eH, eW), dtype = 'uint8')
    
    offsetX = (eW - image.shape[1]) // 2
    offsetY = (eH - image.shape[0]) // 2
    extent[offsetY:offsetY + image.shape[0], 
           offsetX:offsetX + image.shape[1]] = image
       
    CM = mahotas.center_of_mass(extent)
    (cY, cX) = np.round(CM).astype('int32')
    (dX, dY) = ((size[0] // 2) - cX, (size[1] // 2) - cY)
    M = np.float32([[1, 0, dX], [0, 1, dY]])
    extent = cv2.warpAffine(extent, M, size)
    
    return extent

如果这里需要这个自定义 imutils 模块

imutils.py

# -*- coding: utf-8 -*-
"""
Created on Tue Sep 29 16:27:16 2020

@author: User
"""

import numpy as np
import cv2

def translate(image, x, y):
    M = np.float32([[1, 0, x], [0, 1, y]])
    shifted = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))
    return shifted

def rotate(image, angle, center=None, scale=1.0):
    (h, w) = image.shape[:2]
    if not center:
        center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, scale)
    rotated = cv2.warpAffine(image, M, (w, h))
    return rotated

def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]
    
    if width is None and height is None:
        return image
    
    if width is None:
        r = height / float(h)
        dim = (int(w*r), height)
    
    else:
        r = width / float(w)
        dim = (width, int(h*r))
    
    resized = cv2.resize(image, dim, interpolation = inter)
    return resized

我正在使用此数据 found here (the train.csv file)，我通过此脚本将其减少到 5000 行：

import pandas as pd

metadata = pd.read_csv('C:/Users/User/Downloads/digit-recognizer/train.csv', low_memory=False)
smaller_df = metadata.head(5000)
smaller_df.to_csv(path_or_buf='data/digits.csv', index=False)
print('successfully wrote a smaller file!')

Answer 1

我已经弄清楚了一段时间，但现在有机会 post 所以我想分享我的发现。

我想当我试图“重塑”数组时，我实际上是在重塑错误的数组，这就是为什么它一直给我一个错误。

所以我想将我拥有的一维数组转换为二维数组，我采用了这一行：digit = model.predict(hist)[0]

并将其更改为：digit = model.predict(hist.reshape(1,-1))[0]

HOG 手写数字识别不起作用

HOG handwritten digit recognition not working

python-3.x

scikit-learn

scikit-image

mahotas