尝试使用 Caffe 分类器导致 "sequence argument must have length equal to input rank " 错误

Question

我正在尝试使用 Caffe.Classifier class 及其 predict() 方法来训练 Imagenet caffemodel。
图像大小调整为 256x256 并裁剪 227x227 用于训练网络。
一切都简单明了，但我不断收到如下奇怪的错误：

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-7-3b440ebf1f6e> in <module>()
     17                        image_dims=(256, 256))
     18 
---> 19     out = net.predict([image_caffe], oversample=True)
     20     print(labels[out[0].argmax()].strip(),' (', out[0][out[0].argmax()] , ')')
     21     plabel = int(labels[out[0].argmax()].strip())

<ipython-input-5-e6ae1810b820> in predict(self, inputs, oversample)
     65         for ix, in_ in enumerate(inputs):
     66             print('image dims = ',self.image_dims[0],',',self.image_dims[1] ,'_in = ',in_.shape)
---> 67             input_[ix] = caffe.io.resize_image(in_, self.image_dims)
     68 
     69         if oversample:

C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\caffe\io.py in resize_image(im, new_dims, interp_order)
    335         # ndimage interpolates anything but more slowly.
    336         scale = tuple(np.array(new_dims, dtype=float) / np.array(im.shape[:2]))
--> 337         resized_im = zoom(im, scale + (1,), order=interp_order)
    338     return resized_im.astype(np.float32)
    339 

C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\scipy\ndimage\interpolation.py in zoom(input, zoom, output, order, mode, cval, prefilter)
    588     else:
    589         filtered = input
--> 590     zoom = _ni_support._normalize_sequence(zoom, input.ndim)
    591     output_shape = tuple(
    592             [int(round(ii * jj)) for ii, jj in zip(input.shape, zoom)])

C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\scipy\ndimage\_ni_support.py in _normalize_sequence(input, rank, array_type)
     63         if len(normalized) != rank:
     64             err = "sequence argument must have length equal to input rank"
---> 65             raise RuntimeError(err)
     66     else:
     67         normalized = [input] * rank

RuntimeError: sequence argument must have length equal to input rank

这是我正在使用的代码片段：

import sys
import caffe
import numpy as np
import lmdb
import matplotlib.pyplot as plt
import itertools

def flat_shape(x):
    "Returns x without singleton dimension, eg: (1,28,28) -> (28,28)"
    return x.reshape(x.shape)

def db_reader(fpath, type='lmdb'):
    if type == 'lmdb':
        return lmdb_reader(fpath)
    else:
        return leveldb_reader(fpath)

def lmdb_reader(fpath):
    import lmdb
    lmdb_env = lmdb.open(fpath)
    lmdb_txn = lmdb_env.begin()
    lmdb_cursor = lmdb_txn.cursor()

    for key, value in lmdb_cursor:
        datum = caffe.proto.caffe_pb2.Datum()
        datum.ParseFromString(value)
        label = int(datum.label)
        image = caffe.io.datum_to_array(datum).astype(np.uint8)
        yield (key, flat_shape(image), label)

def leveldb_reader(fpath):
    import leveldb
    db = leveldb.LevelDB(fpath)

    for key, value in db.RangeIter():
        datum = caffe.proto.caffe_pb2.Datum()
        datum.ParseFromString(value)
        label = int(datum.label)
        image = caffe.io.datum_to_array(datum).astype(np.uint8)
        yield (key, flat_shape(image), label)

Classifier class（复制形式 Caffe 的 python 目录）：

    import numpy as np
    import caffe

    class Classifier(caffe.Net):
        """
        Classifier extends Net for image class prediction
        by scaling, center cropping, or oversampling.

        Parameters
        ----------
        image_dims : dimensions to scale input for cropping/sampling.
            Default is to scale to net input size for whole-image crop.
        mean, input_scale, raw_scale, channel_swap: params for
            preprocessing options.
        """
        def __init__(self, model_file, pretrained_file, image_dims=None,
                     mean=None, input_scale=None, raw_scale=None,
                     channel_swap=None):
            caffe.Net.__init__(self, model_file, pretrained_file, caffe.TEST)

            # configure pre-processing
            in_ = self.inputs[0]
            print('inputs[0]',self.inputs[0])
            self.transformer = caffe.io.Transformer(
                {in_: self.blobs[in_].data.shape})
            self.transformer.set_transpose(in_, (2, 0, 1))

            if mean is not None:
                self.transformer.set_mean(in_, mean)
            if input_scale is not None:
                self.transformer.set_input_scale(in_, input_scale)
            if raw_scale is not None:
                self.transformer.set_raw_scale(in_, raw_scale)
            if channel_swap is not None:
                self.transformer.set_channel_swap(in_, channel_swap)

            print('crops: ',self.blobs[in_].data.shape[2:]) 
            self.crop_dims = np.array(self.blobs[in_].data.shape[2:])
            if not image_dims:
                image_dims = self.crop_dims
            self.image_dims = image_dims

        def predict(self, inputs, oversample=True):
            """
            Predict classification probabilities of inputs.

            Parameters
            ----------
            inputs : iterable of (H x W x K) input ndarrays.
            oversample : boolean
                average predictions across center, corners, and mirrors
                when True (default). Center-only prediction when False.

            Returns
            -------
            predictions: (N x C) ndarray of class probabilities for N images and C
                classes.
            """
            # Scale to standardize input dimensions.
            input_ = np.zeros((len(inputs),
                               self.image_dims[0],
                               self.image_dims[1],
                               inputs[0].shape[2]),
                              dtype=np.float32)
            for ix, in_ in enumerate(inputs):
                print('image dims = ',self.image_dims[0],',',self.image_dims[1] ,'_in = ',in_.shape)
                input_[ix] = caffe.io.resize_image(in_, self.image_dims)

            if oversample:
                # Generate center, corner, and mirrored crops.
                input_ = caffe.io.oversample(input_, self.crop_dims)
            else:
                # Take center crop.
                center = np.array(self.image_dims) / 2.0
                crop = np.tile(center, (1, 2))[0] + np.concatenate([
                    -self.crop_dims / 2.0,
                    self.crop_dims / 2.0
                ])
                input_ = input_[:, crop[0]:crop[2], crop[1]:crop[3], :]

            # Classify
            caffe_in = np.zeros(np.array(input_.shape)[[0, 3, 1, 2]],
                                dtype=np.float32)
            for ix, in_ in enumerate(input_):
                caffe_in[ix] = self.transformer.preprocess(self.inputs[0], in_)
            out = self.forward_all(**{self.inputs[0]: caffe_in})
            predictions = out[self.outputs[0]]

            # For oversampling, average predictions across crops.
            if oversample:
                predictions = predictions.reshape((len(predictions) / 10, 10, -1))
                predictions = predictions.mean(1)

            return predictions

主要部分：

proto ='deploy.prototxt'
model='snap1.caffemodel'
mean='imagenet_mean.binaryproto'
db_path='G:/imagenet/ilsvrc12_val_lmdb'

# Extract mean from the mean image file
#mean_blobproto_new = caffe.proto.caffe_pb2.BlobProto()
#f = open(mean, 'rb')
#mean_blobproto_new.ParseFromString(f.read())
#mean_image = caffe.io.blobproto_to_array(mean_blobproto_new)
#f.close()
mu = np.load('mean.npy').mean(1).mean(1)
caffe.set_mode_gpu() 
reader = lmdb_reader(db_path)

i = 0
for i, image, label in reader:
    image_caffe = image.reshape(1, *image.shape)
    print(image_caffe.shape, mu.shape)

    net = Classifier(proto, model,
                     mean= mu,
                     channel_swap=(2,1,0),
                     raw_scale=255,
                     image_dims=(256, 256))

    out = net.predict([image_caffe], oversample=True)
    print(i, labels[out[0].argmax()].strip(),' (', out[0][out[0].argmax()] , ')')
    i+=1

这里有什么问题？

Answer 1

我找到了原因，我必须以 3D 张量而不是 4D 张量的形式提供图像！所以我们的 4d 张量：

image_caffe = image.reshape(1, *image.shape)

需要更改为 3D：

image_caffe = image.transpose(2,1,0)

作为旁注，尝试对运行任何与咖啡相关的东西使用 python2。 python3 一开始可能有用，但肯定会引起很多麻烦。例如，将 oversample 设置为 True 的 predict 方法将在 python3 下崩溃，但在 python2!

下工作正常

尝试使用 Caffe 分类器导致 "sequence argument must have length equal to input rank " 错误

Trying to to use Caffe classifier causes "sequence argument must have length equal to input rank "error

python-3.x

caffe

pycaffe