尝试使用 Caffe 分类器导致 "sequence argument must have length equal to input rank " 错误
Trying to to use Caffe classifier causes "sequence argument must have length equal to input rank "error
我正在尝试使用 Caffe.Classifier
class 及其 predict()
方法来训练 Imagenet
caffemodel
。
图像大小调整为 256x256
并裁剪 227x227
用于训练网络。
一切都简单明了,但我不断收到如下奇怪的错误:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-7-3b440ebf1f6e> in <module>()
17 image_dims=(256, 256))
18
---> 19 out = net.predict([image_caffe], oversample=True)
20 print(labels[out[0].argmax()].strip(),' (', out[0][out[0].argmax()] , ')')
21 plabel = int(labels[out[0].argmax()].strip())
<ipython-input-5-e6ae1810b820> in predict(self, inputs, oversample)
65 for ix, in_ in enumerate(inputs):
66 print('image dims = ',self.image_dims[0],',',self.image_dims[1] ,'_in = ',in_.shape)
---> 67 input_[ix] = caffe.io.resize_image(in_, self.image_dims)
68
69 if oversample:
C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\caffe\io.py in resize_image(im, new_dims, interp_order)
335 # ndimage interpolates anything but more slowly.
336 scale = tuple(np.array(new_dims, dtype=float) / np.array(im.shape[:2]))
--> 337 resized_im = zoom(im, scale + (1,), order=interp_order)
338 return resized_im.astype(np.float32)
339
C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\scipy\ndimage\interpolation.py in zoom(input, zoom, output, order, mode, cval, prefilter)
588 else:
589 filtered = input
--> 590 zoom = _ni_support._normalize_sequence(zoom, input.ndim)
591 output_shape = tuple(
592 [int(round(ii * jj)) for ii, jj in zip(input.shape, zoom)])
C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\scipy\ndimage\_ni_support.py in _normalize_sequence(input, rank, array_type)
63 if len(normalized) != rank:
64 err = "sequence argument must have length equal to input rank"
---> 65 raise RuntimeError(err)
66 else:
67 normalized = [input] * rank
RuntimeError: sequence argument must have length equal to input rank
这是我正在使用的代码片段:
import sys
import caffe
import numpy as np
import lmdb
import matplotlib.pyplot as plt
import itertools
def flat_shape(x):
"Returns x without singleton dimension, eg: (1,28,28) -> (28,28)"
return x.reshape(x.shape)
def db_reader(fpath, type='lmdb'):
if type == 'lmdb':
return lmdb_reader(fpath)
else:
return leveldb_reader(fpath)
def lmdb_reader(fpath):
import lmdb
lmdb_env = lmdb.open(fpath)
lmdb_txn = lmdb_env.begin()
lmdb_cursor = lmdb_txn.cursor()
for key, value in lmdb_cursor:
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
label = int(datum.label)
image = caffe.io.datum_to_array(datum).astype(np.uint8)
yield (key, flat_shape(image), label)
def leveldb_reader(fpath):
import leveldb
db = leveldb.LevelDB(fpath)
for key, value in db.RangeIter():
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
label = int(datum.label)
image = caffe.io.datum_to_array(datum).astype(np.uint8)
yield (key, flat_shape(image), label)
Classifier
class(复制形式 Caffe
的 python 目录):
import numpy as np
import caffe
class Classifier(caffe.Net):
"""
Classifier extends Net for image class prediction
by scaling, center cropping, or oversampling.
Parameters
----------
image_dims : dimensions to scale input for cropping/sampling.
Default is to scale to net input size for whole-image crop.
mean, input_scale, raw_scale, channel_swap: params for
preprocessing options.
"""
def __init__(self, model_file, pretrained_file, image_dims=None,
mean=None, input_scale=None, raw_scale=None,
channel_swap=None):
caffe.Net.__init__(self, model_file, pretrained_file, caffe.TEST)
# configure pre-processing
in_ = self.inputs[0]
print('inputs[0]',self.inputs[0])
self.transformer = caffe.io.Transformer(
{in_: self.blobs[in_].data.shape})
self.transformer.set_transpose(in_, (2, 0, 1))
if mean is not None:
self.transformer.set_mean(in_, mean)
if input_scale is not None:
self.transformer.set_input_scale(in_, input_scale)
if raw_scale is not None:
self.transformer.set_raw_scale(in_, raw_scale)
if channel_swap is not None:
self.transformer.set_channel_swap(in_, channel_swap)
print('crops: ',self.blobs[in_].data.shape[2:])
self.crop_dims = np.array(self.blobs[in_].data.shape[2:])
if not image_dims:
image_dims = self.crop_dims
self.image_dims = image_dims
def predict(self, inputs, oversample=True):
"""
Predict classification probabilities of inputs.
Parameters
----------
inputs : iterable of (H x W x K) input ndarrays.
oversample : boolean
average predictions across center, corners, and mirrors
when True (default). Center-only prediction when False.
Returns
-------
predictions: (N x C) ndarray of class probabilities for N images and C
classes.
"""
# Scale to standardize input dimensions.
input_ = np.zeros((len(inputs),
self.image_dims[0],
self.image_dims[1],
inputs[0].shape[2]),
dtype=np.float32)
for ix, in_ in enumerate(inputs):
print('image dims = ',self.image_dims[0],',',self.image_dims[1] ,'_in = ',in_.shape)
input_[ix] = caffe.io.resize_image(in_, self.image_dims)
if oversample:
# Generate center, corner, and mirrored crops.
input_ = caffe.io.oversample(input_, self.crop_dims)
else:
# Take center crop.
center = np.array(self.image_dims) / 2.0
crop = np.tile(center, (1, 2))[0] + np.concatenate([
-self.crop_dims / 2.0,
self.crop_dims / 2.0
])
input_ = input_[:, crop[0]:crop[2], crop[1]:crop[3], :]
# Classify
caffe_in = np.zeros(np.array(input_.shape)[[0, 3, 1, 2]],
dtype=np.float32)
for ix, in_ in enumerate(input_):
caffe_in[ix] = self.transformer.preprocess(self.inputs[0], in_)
out = self.forward_all(**{self.inputs[0]: caffe_in})
predictions = out[self.outputs[0]]
# For oversampling, average predictions across crops.
if oversample:
predictions = predictions.reshape((len(predictions) / 10, 10, -1))
predictions = predictions.mean(1)
return predictions
主要部分:
proto ='deploy.prototxt'
model='snap1.caffemodel'
mean='imagenet_mean.binaryproto'
db_path='G:/imagenet/ilsvrc12_val_lmdb'
# Extract mean from the mean image file
#mean_blobproto_new = caffe.proto.caffe_pb2.BlobProto()
#f = open(mean, 'rb')
#mean_blobproto_new.ParseFromString(f.read())
#mean_image = caffe.io.blobproto_to_array(mean_blobproto_new)
#f.close()
mu = np.load('mean.npy').mean(1).mean(1)
caffe.set_mode_gpu()
reader = lmdb_reader(db_path)
i = 0
for i, image, label in reader:
image_caffe = image.reshape(1, *image.shape)
print(image_caffe.shape, mu.shape)
net = Classifier(proto, model,
mean= mu,
channel_swap=(2,1,0),
raw_scale=255,
image_dims=(256, 256))
out = net.predict([image_caffe], oversample=True)
print(i, labels[out[0].argmax()].strip(),' (', out[0][out[0].argmax()] , ')')
i+=1
这里有什么问题?
我找到了原因,我必须以 3D 张量而不是 4D 张量的形式提供图像!
所以我们的 4d 张量:
image_caffe = image.reshape(1, *image.shape)
需要更改为 3D:
image_caffe = image.transpose(2,1,0)
作为旁注,尝试对 运行 任何与咖啡相关的东西使用 python2。 python3 一开始可能有用,但肯定会引起很多麻烦。例如,将 oversample
设置为 True 的 predict
方法将在 python3 下崩溃,但在 python2!
下工作正常
我正在尝试使用 Caffe.Classifier
class 及其 predict()
方法来训练 Imagenet
caffemodel
。
图像大小调整为 256x256
并裁剪 227x227
用于训练网络。
一切都简单明了,但我不断收到如下奇怪的错误:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-7-3b440ebf1f6e> in <module>()
17 image_dims=(256, 256))
18
---> 19 out = net.predict([image_caffe], oversample=True)
20 print(labels[out[0].argmax()].strip(),' (', out[0][out[0].argmax()] , ')')
21 plabel = int(labels[out[0].argmax()].strip())
<ipython-input-5-e6ae1810b820> in predict(self, inputs, oversample)
65 for ix, in_ in enumerate(inputs):
66 print('image dims = ',self.image_dims[0],',',self.image_dims[1] ,'_in = ',in_.shape)
---> 67 input_[ix] = caffe.io.resize_image(in_, self.image_dims)
68
69 if oversample:
C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\caffe\io.py in resize_image(im, new_dims, interp_order)
335 # ndimage interpolates anything but more slowly.
336 scale = tuple(np.array(new_dims, dtype=float) / np.array(im.shape[:2]))
--> 337 resized_im = zoom(im, scale + (1,), order=interp_order)
338 return resized_im.astype(np.float32)
339
C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\scipy\ndimage\interpolation.py in zoom(input, zoom, output, order, mode, cval, prefilter)
588 else:
589 filtered = input
--> 590 zoom = _ni_support._normalize_sequence(zoom, input.ndim)
591 output_shape = tuple(
592 [int(round(ii * jj)) for ii, jj in zip(input.shape, zoom)])
C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\scipy\ndimage\_ni_support.py in _normalize_sequence(input, rank, array_type)
63 if len(normalized) != rank:
64 err = "sequence argument must have length equal to input rank"
---> 65 raise RuntimeError(err)
66 else:
67 normalized = [input] * rank
RuntimeError: sequence argument must have length equal to input rank
这是我正在使用的代码片段:
import sys
import caffe
import numpy as np
import lmdb
import matplotlib.pyplot as plt
import itertools
def flat_shape(x):
"Returns x without singleton dimension, eg: (1,28,28) -> (28,28)"
return x.reshape(x.shape)
def db_reader(fpath, type='lmdb'):
if type == 'lmdb':
return lmdb_reader(fpath)
else:
return leveldb_reader(fpath)
def lmdb_reader(fpath):
import lmdb
lmdb_env = lmdb.open(fpath)
lmdb_txn = lmdb_env.begin()
lmdb_cursor = lmdb_txn.cursor()
for key, value in lmdb_cursor:
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
label = int(datum.label)
image = caffe.io.datum_to_array(datum).astype(np.uint8)
yield (key, flat_shape(image), label)
def leveldb_reader(fpath):
import leveldb
db = leveldb.LevelDB(fpath)
for key, value in db.RangeIter():
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
label = int(datum.label)
image = caffe.io.datum_to_array(datum).astype(np.uint8)
yield (key, flat_shape(image), label)
Classifier
class(复制形式 Caffe
的 python 目录):
import numpy as np
import caffe
class Classifier(caffe.Net):
"""
Classifier extends Net for image class prediction
by scaling, center cropping, or oversampling.
Parameters
----------
image_dims : dimensions to scale input for cropping/sampling.
Default is to scale to net input size for whole-image crop.
mean, input_scale, raw_scale, channel_swap: params for
preprocessing options.
"""
def __init__(self, model_file, pretrained_file, image_dims=None,
mean=None, input_scale=None, raw_scale=None,
channel_swap=None):
caffe.Net.__init__(self, model_file, pretrained_file, caffe.TEST)
# configure pre-processing
in_ = self.inputs[0]
print('inputs[0]',self.inputs[0])
self.transformer = caffe.io.Transformer(
{in_: self.blobs[in_].data.shape})
self.transformer.set_transpose(in_, (2, 0, 1))
if mean is not None:
self.transformer.set_mean(in_, mean)
if input_scale is not None:
self.transformer.set_input_scale(in_, input_scale)
if raw_scale is not None:
self.transformer.set_raw_scale(in_, raw_scale)
if channel_swap is not None:
self.transformer.set_channel_swap(in_, channel_swap)
print('crops: ',self.blobs[in_].data.shape[2:])
self.crop_dims = np.array(self.blobs[in_].data.shape[2:])
if not image_dims:
image_dims = self.crop_dims
self.image_dims = image_dims
def predict(self, inputs, oversample=True):
"""
Predict classification probabilities of inputs.
Parameters
----------
inputs : iterable of (H x W x K) input ndarrays.
oversample : boolean
average predictions across center, corners, and mirrors
when True (default). Center-only prediction when False.
Returns
-------
predictions: (N x C) ndarray of class probabilities for N images and C
classes.
"""
# Scale to standardize input dimensions.
input_ = np.zeros((len(inputs),
self.image_dims[0],
self.image_dims[1],
inputs[0].shape[2]),
dtype=np.float32)
for ix, in_ in enumerate(inputs):
print('image dims = ',self.image_dims[0],',',self.image_dims[1] ,'_in = ',in_.shape)
input_[ix] = caffe.io.resize_image(in_, self.image_dims)
if oversample:
# Generate center, corner, and mirrored crops.
input_ = caffe.io.oversample(input_, self.crop_dims)
else:
# Take center crop.
center = np.array(self.image_dims) / 2.0
crop = np.tile(center, (1, 2))[0] + np.concatenate([
-self.crop_dims / 2.0,
self.crop_dims / 2.0
])
input_ = input_[:, crop[0]:crop[2], crop[1]:crop[3], :]
# Classify
caffe_in = np.zeros(np.array(input_.shape)[[0, 3, 1, 2]],
dtype=np.float32)
for ix, in_ in enumerate(input_):
caffe_in[ix] = self.transformer.preprocess(self.inputs[0], in_)
out = self.forward_all(**{self.inputs[0]: caffe_in})
predictions = out[self.outputs[0]]
# For oversampling, average predictions across crops.
if oversample:
predictions = predictions.reshape((len(predictions) / 10, 10, -1))
predictions = predictions.mean(1)
return predictions
主要部分:
proto ='deploy.prototxt'
model='snap1.caffemodel'
mean='imagenet_mean.binaryproto'
db_path='G:/imagenet/ilsvrc12_val_lmdb'
# Extract mean from the mean image file
#mean_blobproto_new = caffe.proto.caffe_pb2.BlobProto()
#f = open(mean, 'rb')
#mean_blobproto_new.ParseFromString(f.read())
#mean_image = caffe.io.blobproto_to_array(mean_blobproto_new)
#f.close()
mu = np.load('mean.npy').mean(1).mean(1)
caffe.set_mode_gpu()
reader = lmdb_reader(db_path)
i = 0
for i, image, label in reader:
image_caffe = image.reshape(1, *image.shape)
print(image_caffe.shape, mu.shape)
net = Classifier(proto, model,
mean= mu,
channel_swap=(2,1,0),
raw_scale=255,
image_dims=(256, 256))
out = net.predict([image_caffe], oversample=True)
print(i, labels[out[0].argmax()].strip(),' (', out[0][out[0].argmax()] , ')')
i+=1
这里有什么问题?
我找到了原因,我必须以 3D 张量而不是 4D 张量的形式提供图像! 所以我们的 4d 张量:
image_caffe = image.reshape(1, *image.shape)
需要更改为 3D:
image_caffe = image.transpose(2,1,0)
作为旁注,尝试对 运行 任何与咖啡相关的东西使用 python2。 python3 一开始可能有用,但肯定会引起很多麻烦。例如,将 oversample
设置为 True 的 predict
方法将在 python3 下崩溃,但在 python2!