HOG 手写数字识别不起作用
HOG handwritten digit recognition not working
我正在浏览一本 OpenCV 书中关于手写数字识别的章节,尽管我浏览了它并且我认为一切都已正确处理,但我收到此错误消息 Expected 2D array, got 1D array instead
。我试图 google 搜索一个答案,似乎很多其他人 运行 遇到了一个非常相似的问题,但没有提供真正的答案。
谁能解释一下为什么这个 feature.hog()
方法不返回二维数组?我正在阅读一些文档,显然默认情况下它 returns 是一个平面一维数组,所以我不知道为什么这个 model.predict()
方法抱怨需要一个二维数组。然后我正在关注的这本书我认为是在 2015 年发行的,所以也许有些变化?
这是我正在尝试的文件 运行:
classify.py
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 3 13:01:39 2020
@author: User
"""
from __future__ import print_function
from sklearn.externals import joblib
from pyimagesearch.hog import HOG
from pyimagesearch import dataset
import argparse
import mahotas
import cv2
ap = argparse.ArgumentParser()
ap.add_argument('-m', '--model', required=True, help='Path to model')
ap.add_argument('-i', '--image', required=True, help='Path to image')
args=vars(ap.parse_args())
model = joblib.load(args['model'])
hog = HOG(orientations=18, pixelsPerCell=(10,10),
cellsPerBlock=(1,1), normalize=True)
image = cv2.imread(args["image"])
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blurred, 30, 150)
(_, cnts, _) = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted([(c, cv2.boundingRect(c)[0]) for c in cnts], key=lambda x: x[1])
for (c, _) in cnts:
(x, y, w, h) = cv2.boundingRect(c)
if w >= 7 and h>= 20:
roi = gray[y:y+h, x:x+w]
thresh = roi.copy()
T = mahotas.thresholding.otsu(roi)
thresh[thresh > T] = 255
thresh = cv2.bitwise_not(thresh)
thresh = dataset.deskew(thresh, 72)
thresh = dataset.center_extent(thresh, (72, 72))
cv2.imshow("thresh", thresh)
hist = hog.describe(thresh)
digit = model.predict(hist)[0] #this is where it errors
print("I think that number is: {}".format(digit))
cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 1)
cv2.putText(image, str(digit), (x-10, y-10),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2)
cv2.imshow("image", image)
cv2.waitKey(0)
这是为此编写的自定义 hog 模块:
hog.py
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 3 11:22:38 2020
@author: User
"""
from skimage import feature
class HOG:
def __init__(self, orientations=9, pixelsPerCell=(14,14), cellsPerBlock=(1,1),
normalize=False):
self.orientations = orientations
self.pixelsPerCell = pixelsPerCell
self.cellsPerBlock = cellsPerBlock
self.normalize = normalize
def describe(self, image):
'''
(2017-11-28) Update for skimage: In scikit-image==0.12 , the
normalise parameter has been updated to transform_sqrt . The
transform_sqrt performs the exact same operation, only with a
different name. If you’re using an older version of scikit-image
(again, before the v0.12 release), then you’ll want to change
transform_sqrt to normalise . In scikit-image==0.15 the default
value of block_norm="L1" has been deprecated and changed to
block_norm="L2-Hys" . Therefore, for this lesson we’ll explicitly
specify block_norm="L1" . Doing this will avoid it switching to
"L2-Hys" with version updates without us knowing (and yielding
incorrect car logo identification results). You can read about L1 and
L2 norms here:
https://gurus.pyimagesearch.com/lesson-sample-histogram-of-oriented-gradients-and-car-logo-recognition/#tour_modal
'''
hist = feature.hog(image,
orientations=self.orientations,
pixels_per_cell=self.pixelsPerCell,
cells_per_block=self.cellsPerBlock,
transform_sqrt =self.normalize,
block_norm="L1")
return hist
这是为此生成“训练模型”的原因:
train.py
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 3 11:57:26 2020
@author: User
"""
from sklearn.externals import joblib
from sklearn.svm import LinearSVC
from pyimagesearch.hog import HOG
from pyimagesearch import dataset
import argparse
ap = argparse.ArgumentParser()
ap.add_argument('-d', '--dataset', required=True, help='Path to dataset')
ap.add_argument('-m', '--model', required=True, help='path to where model will be stored')
args=vars(ap.parse_args())
(digits, target) = dataset.load_digits(args['dataset'])
data = []
hog = HOG(orientations=9, pixelsPerCell=(14,14),
cellsPerBlock=(1,1), normalize=True)
for image in digits:
image = dataset.deskew(image, 20)
image = dataset.center_extent(image, (20, 20))
hist = hog.describe(image)
data.append(hist)
model = LinearSVC(random_state=42)
model.fit(data, target)
joblib.dump(model, args['model'])
dataset.py
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 3 11:35:04 2020
@author: User
"""
from . import imutils
import numpy as np
import mahotas
import cv2
def load_digits(datasetPath):
data = np.genfromtxt(datasetPath, delimiter=',', dtype='uint8')
target = data[:, 0]
data = data[:, 1:].reshape(data.shape[0], 28, 28)
return (data, target)
def deskew(image, width):
(h, w) = image.shape[:2]
moments = cv2.moments(image)
skew = moments['mu11'] / moments['mu02']
M = np.float32([
[1, skew, -0.5 * w * skew],
[0, 1, 0]])
image = cv2.warpAffine(image, M, (w, h),
flags = cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR)
image = imutils.resize(image, width=width)
return image
def center_extent(image, size):
(eW, eH) = size
if image.shape[1] > image.shape[0]:
image = imutils.resize(image, width=eW)
else:
image = imutils.resize(image, height=eH)
extent = np.zeros((eH, eW), dtype = 'uint8')
offsetX = (eW - image.shape[1]) // 2
offsetY = (eH - image.shape[0]) // 2
extent[offsetY:offsetY + image.shape[0],
offsetX:offsetX + image.shape[1]] = image
CM = mahotas.center_of_mass(extent)
(cY, cX) = np.round(CM).astype('int32')
(dX, dY) = ((size[0] // 2) - cX, (size[1] // 2) - cY)
M = np.float32([[1, 0, dX], [0, 1, dY]])
extent = cv2.warpAffine(extent, M, size)
return extent
如果这里需要这个自定义 imutils
模块
imutils.py
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 29 16:27:16 2020
@author: User
"""
import numpy as np
import cv2
def translate(image, x, y):
M = np.float32([[1, 0, x], [0, 1, y]])
shifted = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))
return shifted
def rotate(image, angle, center=None, scale=1.0):
(h, w) = image.shape[:2]
if not center:
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, scale)
rotated = cv2.warpAffine(image, M, (w, h))
return rotated
def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
r = height / float(h)
dim = (int(w*r), height)
else:
r = width / float(w)
dim = (width, int(h*r))
resized = cv2.resize(image, dim, interpolation = inter)
return resized
我正在使用此数据 found here (the train.csv
file),我通过此脚本将其减少到 5000 行:
import pandas as pd
metadata = pd.read_csv('C:/Users/User/Downloads/digit-recognizer/train.csv', low_memory=False)
smaller_df = metadata.head(5000)
smaller_df.to_csv(path_or_buf='data/digits.csv', index=False)
print('successfully wrote a smaller file!')
我已经弄清楚了一段时间,但现在有机会 post 所以我想分享我的发现。
我想当我试图“重塑”数组时,我实际上是在重塑错误的数组,这就是为什么它一直给我一个错误。
所以我想将我拥有的一维数组转换为二维数组,我采用了这一行:digit = model.predict(hist)[0]
并将其更改为:digit = model.predict(hist.reshape(1,-1))[0]
我正在浏览一本 OpenCV 书中关于手写数字识别的章节,尽管我浏览了它并且我认为一切都已正确处理,但我收到此错误消息 Expected 2D array, got 1D array instead
。我试图 google 搜索一个答案,似乎很多其他人 运行 遇到了一个非常相似的问题,但没有提供真正的答案。
谁能解释一下为什么这个 feature.hog()
方法不返回二维数组?我正在阅读一些文档,显然默认情况下它 returns 是一个平面一维数组,所以我不知道为什么这个 model.predict()
方法抱怨需要一个二维数组。然后我正在关注的这本书我认为是在 2015 年发行的,所以也许有些变化?
这是我正在尝试的文件 运行:
classify.py
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 3 13:01:39 2020
@author: User
"""
from __future__ import print_function
from sklearn.externals import joblib
from pyimagesearch.hog import HOG
from pyimagesearch import dataset
import argparse
import mahotas
import cv2
ap = argparse.ArgumentParser()
ap.add_argument('-m', '--model', required=True, help='Path to model')
ap.add_argument('-i', '--image', required=True, help='Path to image')
args=vars(ap.parse_args())
model = joblib.load(args['model'])
hog = HOG(orientations=18, pixelsPerCell=(10,10),
cellsPerBlock=(1,1), normalize=True)
image = cv2.imread(args["image"])
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blurred, 30, 150)
(_, cnts, _) = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted([(c, cv2.boundingRect(c)[0]) for c in cnts], key=lambda x: x[1])
for (c, _) in cnts:
(x, y, w, h) = cv2.boundingRect(c)
if w >= 7 and h>= 20:
roi = gray[y:y+h, x:x+w]
thresh = roi.copy()
T = mahotas.thresholding.otsu(roi)
thresh[thresh > T] = 255
thresh = cv2.bitwise_not(thresh)
thresh = dataset.deskew(thresh, 72)
thresh = dataset.center_extent(thresh, (72, 72))
cv2.imshow("thresh", thresh)
hist = hog.describe(thresh)
digit = model.predict(hist)[0] #this is where it errors
print("I think that number is: {}".format(digit))
cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 1)
cv2.putText(image, str(digit), (x-10, y-10),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2)
cv2.imshow("image", image)
cv2.waitKey(0)
这是为此编写的自定义 hog 模块:
hog.py
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 3 11:22:38 2020
@author: User
"""
from skimage import feature
class HOG:
def __init__(self, orientations=9, pixelsPerCell=(14,14), cellsPerBlock=(1,1),
normalize=False):
self.orientations = orientations
self.pixelsPerCell = pixelsPerCell
self.cellsPerBlock = cellsPerBlock
self.normalize = normalize
def describe(self, image):
'''
(2017-11-28) Update for skimage: In scikit-image==0.12 , the
normalise parameter has been updated to transform_sqrt . The
transform_sqrt performs the exact same operation, only with a
different name. If you’re using an older version of scikit-image
(again, before the v0.12 release), then you’ll want to change
transform_sqrt to normalise . In scikit-image==0.15 the default
value of block_norm="L1" has been deprecated and changed to
block_norm="L2-Hys" . Therefore, for this lesson we’ll explicitly
specify block_norm="L1" . Doing this will avoid it switching to
"L2-Hys" with version updates without us knowing (and yielding
incorrect car logo identification results). You can read about L1 and
L2 norms here:
https://gurus.pyimagesearch.com/lesson-sample-histogram-of-oriented-gradients-and-car-logo-recognition/#tour_modal
'''
hist = feature.hog(image,
orientations=self.orientations,
pixels_per_cell=self.pixelsPerCell,
cells_per_block=self.cellsPerBlock,
transform_sqrt =self.normalize,
block_norm="L1")
return hist
这是为此生成“训练模型”的原因:
train.py
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 3 11:57:26 2020
@author: User
"""
from sklearn.externals import joblib
from sklearn.svm import LinearSVC
from pyimagesearch.hog import HOG
from pyimagesearch import dataset
import argparse
ap = argparse.ArgumentParser()
ap.add_argument('-d', '--dataset', required=True, help='Path to dataset')
ap.add_argument('-m', '--model', required=True, help='path to where model will be stored')
args=vars(ap.parse_args())
(digits, target) = dataset.load_digits(args['dataset'])
data = []
hog = HOG(orientations=9, pixelsPerCell=(14,14),
cellsPerBlock=(1,1), normalize=True)
for image in digits:
image = dataset.deskew(image, 20)
image = dataset.center_extent(image, (20, 20))
hist = hog.describe(image)
data.append(hist)
model = LinearSVC(random_state=42)
model.fit(data, target)
joblib.dump(model, args['model'])
dataset.py
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 3 11:35:04 2020
@author: User
"""
from . import imutils
import numpy as np
import mahotas
import cv2
def load_digits(datasetPath):
data = np.genfromtxt(datasetPath, delimiter=',', dtype='uint8')
target = data[:, 0]
data = data[:, 1:].reshape(data.shape[0], 28, 28)
return (data, target)
def deskew(image, width):
(h, w) = image.shape[:2]
moments = cv2.moments(image)
skew = moments['mu11'] / moments['mu02']
M = np.float32([
[1, skew, -0.5 * w * skew],
[0, 1, 0]])
image = cv2.warpAffine(image, M, (w, h),
flags = cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR)
image = imutils.resize(image, width=width)
return image
def center_extent(image, size):
(eW, eH) = size
if image.shape[1] > image.shape[0]:
image = imutils.resize(image, width=eW)
else:
image = imutils.resize(image, height=eH)
extent = np.zeros((eH, eW), dtype = 'uint8')
offsetX = (eW - image.shape[1]) // 2
offsetY = (eH - image.shape[0]) // 2
extent[offsetY:offsetY + image.shape[0],
offsetX:offsetX + image.shape[1]] = image
CM = mahotas.center_of_mass(extent)
(cY, cX) = np.round(CM).astype('int32')
(dX, dY) = ((size[0] // 2) - cX, (size[1] // 2) - cY)
M = np.float32([[1, 0, dX], [0, 1, dY]])
extent = cv2.warpAffine(extent, M, size)
return extent
如果这里需要这个自定义 imutils
模块
imutils.py
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 29 16:27:16 2020
@author: User
"""
import numpy as np
import cv2
def translate(image, x, y):
M = np.float32([[1, 0, x], [0, 1, y]])
shifted = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))
return shifted
def rotate(image, angle, center=None, scale=1.0):
(h, w) = image.shape[:2]
if not center:
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, scale)
rotated = cv2.warpAffine(image, M, (w, h))
return rotated
def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
r = height / float(h)
dim = (int(w*r), height)
else:
r = width / float(w)
dim = (width, int(h*r))
resized = cv2.resize(image, dim, interpolation = inter)
return resized
我正在使用此数据 found here (the train.csv
file),我通过此脚本将其减少到 5000 行:
import pandas as pd
metadata = pd.read_csv('C:/Users/User/Downloads/digit-recognizer/train.csv', low_memory=False)
smaller_df = metadata.head(5000)
smaller_df.to_csv(path_or_buf='data/digits.csv', index=False)
print('successfully wrote a smaller file!')
我已经弄清楚了一段时间,但现在有机会 post 所以我想分享我的发现。
我想当我试图“重塑”数组时,我实际上是在重塑错误的数组,这就是为什么它一直给我一个错误。
所以我想将我拥有的一维数组转换为二维数组,我采用了这一行:digit = model.predict(hist)[0]
并将其更改为:digit = model.predict(hist.reshape(1,-1))[0]