尝试使用 Caffe 分类器导致 "sequence argument must have length equal to input rank " 错误

Trying to to use Caffe classifier causes "sequence argument must have length equal to input rank "error

我正在尝试使用 Caffe.Classifier class 及其 predict() 方法来训练 Imagenet caffemodel
图像大小调整为 256x256 并裁剪 227x227 用于训练网络。
一切都简单明了,但我不断收到如下奇怪的错误:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-7-3b440ebf1f6e> in <module>()
     17                        image_dims=(256, 256))
     18 
---> 19     out = net.predict([image_caffe], oversample=True)
     20     print(labels[out[0].argmax()].strip(),' (', out[0][out[0].argmax()] , ')')
     21     plabel = int(labels[out[0].argmax()].strip())

<ipython-input-5-e6ae1810b820> in predict(self, inputs, oversample)
     65         for ix, in_ in enumerate(inputs):
     66             print('image dims = ',self.image_dims[0],',',self.image_dims[1] ,'_in = ',in_.shape)
---> 67             input_[ix] = caffe.io.resize_image(in_, self.image_dims)
     68 
     69         if oversample:

C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\caffe\io.py in resize_image(im, new_dims, interp_order)
    335         # ndimage interpolates anything but more slowly.
    336         scale = tuple(np.array(new_dims, dtype=float) / np.array(im.shape[:2]))
--> 337         resized_im = zoom(im, scale + (1,), order=interp_order)
    338     return resized_im.astype(np.float32)
    339 

C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\scipy\ndimage\interpolation.py in zoom(input, zoom, output, order, mode, cval, prefilter)
    588     else:
    589         filtered = input
--> 590     zoom = _ni_support._normalize_sequence(zoom, input.ndim)
    591     output_shape = tuple(
    592             [int(round(ii * jj)) for ii, jj in zip(input.shape, zoom)])

C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\scipy\ndimage\_ni_support.py in _normalize_sequence(input, rank, array_type)
     63         if len(normalized) != rank:
     64             err = "sequence argument must have length equal to input rank"
---> 65             raise RuntimeError(err)
     66     else:
     67         normalized = [input] * rank

RuntimeError: sequence argument must have length equal to input rank

这是我正在使用的代码片段:

import sys
import caffe
import numpy as np
import lmdb
import matplotlib.pyplot as plt
import itertools

def flat_shape(x):
    "Returns x without singleton dimension, eg: (1,28,28) -> (28,28)"
    return x.reshape(x.shape)

def db_reader(fpath, type='lmdb'):
    if type == 'lmdb':
        return lmdb_reader(fpath)
    else:
        return leveldb_reader(fpath)

def lmdb_reader(fpath):
    import lmdb
    lmdb_env = lmdb.open(fpath)
    lmdb_txn = lmdb_env.begin()
    lmdb_cursor = lmdb_txn.cursor()

    for key, value in lmdb_cursor:
        datum = caffe.proto.caffe_pb2.Datum()
        datum.ParseFromString(value)
        label = int(datum.label)
        image = caffe.io.datum_to_array(datum).astype(np.uint8)
        yield (key, flat_shape(image), label)

def leveldb_reader(fpath):
    import leveldb
    db = leveldb.LevelDB(fpath)

    for key, value in db.RangeIter():
        datum = caffe.proto.caffe_pb2.Datum()
        datum.ParseFromString(value)
        label = int(datum.label)
        image = caffe.io.datum_to_array(datum).astype(np.uint8)
        yield (key, flat_shape(image), label)

Classifier class(复制形式 Caffe 的 python 目录):

    import numpy as np
    import caffe

    class Classifier(caffe.Net):
        """
        Classifier extends Net for image class prediction
        by scaling, center cropping, or oversampling.

        Parameters
        ----------
        image_dims : dimensions to scale input for cropping/sampling.
            Default is to scale to net input size for whole-image crop.
        mean, input_scale, raw_scale, channel_swap: params for
            preprocessing options.
        """
        def __init__(self, model_file, pretrained_file, image_dims=None,
                     mean=None, input_scale=None, raw_scale=None,
                     channel_swap=None):
            caffe.Net.__init__(self, model_file, pretrained_file, caffe.TEST)

            # configure pre-processing
            in_ = self.inputs[0]
            print('inputs[0]',self.inputs[0])
            self.transformer = caffe.io.Transformer(
                {in_: self.blobs[in_].data.shape})
            self.transformer.set_transpose(in_, (2, 0, 1))

            if mean is not None:
                self.transformer.set_mean(in_, mean)
            if input_scale is not None:
                self.transformer.set_input_scale(in_, input_scale)
            if raw_scale is not None:
                self.transformer.set_raw_scale(in_, raw_scale)
            if channel_swap is not None:
                self.transformer.set_channel_swap(in_, channel_swap)

            print('crops: ',self.blobs[in_].data.shape[2:]) 
            self.crop_dims = np.array(self.blobs[in_].data.shape[2:])
            if not image_dims:
                image_dims = self.crop_dims
            self.image_dims = image_dims

        def predict(self, inputs, oversample=True):
            """
            Predict classification probabilities of inputs.

            Parameters
            ----------
            inputs : iterable of (H x W x K) input ndarrays.
            oversample : boolean
                average predictions across center, corners, and mirrors
                when True (default). Center-only prediction when False.

            Returns
            -------
            predictions: (N x C) ndarray of class probabilities for N images and C
                classes.
            """
            # Scale to standardize input dimensions.
            input_ = np.zeros((len(inputs),
                               self.image_dims[0],
                               self.image_dims[1],
                               inputs[0].shape[2]),
                              dtype=np.float32)
            for ix, in_ in enumerate(inputs):
                print('image dims = ',self.image_dims[0],',',self.image_dims[1] ,'_in = ',in_.shape)
                input_[ix] = caffe.io.resize_image(in_, self.image_dims)

            if oversample:
                # Generate center, corner, and mirrored crops.
                input_ = caffe.io.oversample(input_, self.crop_dims)
            else:
                # Take center crop.
                center = np.array(self.image_dims) / 2.0
                crop = np.tile(center, (1, 2))[0] + np.concatenate([
                    -self.crop_dims / 2.0,
                    self.crop_dims / 2.0
                ])
                input_ = input_[:, crop[0]:crop[2], crop[1]:crop[3], :]

            # Classify
            caffe_in = np.zeros(np.array(input_.shape)[[0, 3, 1, 2]],
                                dtype=np.float32)
            for ix, in_ in enumerate(input_):
                caffe_in[ix] = self.transformer.preprocess(self.inputs[0], in_)
            out = self.forward_all(**{self.inputs[0]: caffe_in})
            predictions = out[self.outputs[0]]

            # For oversampling, average predictions across crops.
            if oversample:
                predictions = predictions.reshape((len(predictions) / 10, 10, -1))
                predictions = predictions.mean(1)

            return predictions

主要部分:

proto ='deploy.prototxt'
model='snap1.caffemodel'
mean='imagenet_mean.binaryproto'
db_path='G:/imagenet/ilsvrc12_val_lmdb'

# Extract mean from the mean image file
#mean_blobproto_new = caffe.proto.caffe_pb2.BlobProto()
#f = open(mean, 'rb')
#mean_blobproto_new.ParseFromString(f.read())
#mean_image = caffe.io.blobproto_to_array(mean_blobproto_new)
#f.close()
mu = np.load('mean.npy').mean(1).mean(1)
caffe.set_mode_gpu() 
reader = lmdb_reader(db_path)

i = 0
for i, image, label in reader:
    image_caffe = image.reshape(1, *image.shape)
    print(image_caffe.shape, mu.shape)

    net = Classifier(proto, model,
                     mean= mu,
                     channel_swap=(2,1,0),
                     raw_scale=255,
                     image_dims=(256, 256))

    out = net.predict([image_caffe], oversample=True)
    print(i, labels[out[0].argmax()].strip(),' (', out[0][out[0].argmax()] , ')')
    i+=1

这里有什么问题?

我找到了原因,我必须以 3D 张量而不是 4D 张量的形式提供图像! 所以我们的 4d 张量:

image_caffe = image.reshape(1, *image.shape) 

需要更改为 3D:

image_caffe = image.transpose(2,1,0)

作为旁注,尝试对 运行 任何与咖啡相关的东西使用 python2。 python3 一开始可能有用,但肯定会引起很多麻烦。例如,将 oversample 设置为 True 的 predict 方法将在 python3 下崩溃,但在 python2!

下工作正常