如何在 Keras/Tensorflow 中模仿 Caffe 的最大池化行为?
How to mimic Caffe's max pooling behavior in Keras/Tensorflow?
如果我在 Keras 中有 MaxPooling2D
层和 pool_size=(2,2), strides=(2,2)
。
应用于 3x3
输入特征图,它将导致 1x1
空间输出大小。 Caffe (pool: MAX; kernel_size: 2; stride: 2
) 中的相同操作将导致大小为 2x2
.
的输出
众所周知Caffe和Tensorflow/Keras.
二维卷积有解决方法:避免 one can prepend it with an 并将填充类型从same
更改为valid
是否有任何类似的解决方法来更改 Keras 中的 MaxPooling2D
行为,使其执行类似于 Caffe?更准确地说,我正在寻找 MaxPooling2D
的包装器,它将等于 Caffe 中的最大池化 2D 2x2。
也许,在 MaxPooling2D
输入的左侧和顶部填充一个像素?
我正在使用来自 TensorFlow 的tf.keras
。
好的,我找到了答案,让我把它保存在这里。必须用零填充输入 bottom/right。这是最小的工作示例:
import os
import math
import numpy as np
import tensorflow as tf
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Input, MaxPool2D
from tensorflow.python.keras import backend as K
import caffe
from caffe.model_libs import P
from caffe import layers as L
from caffe.proto import caffe_pb2
def MaxPooling2DWrapper(pool_size=(2, 2), strides=None, padding='valid', data_format=None, **kwargs):
def padded_pooling(inputs):
_, h, w, _ = K.int_shape(inputs)
interm_input = inputs
if h % 2 != 0 or w % 2 != 0:
interm_input = tf.keras.layers.Lambda(lambda x: tf.pad(inputs, [[0, 0], [0, 1], [0, 1], [0, 0]]),
name='input_pad')(inputs)
return MaxPool2D(pool_size, strides, padding, data_format, **kwargs)(interm_input)
return padded_pooling
def build_caffe_model(h, w):
caffe_spec = caffe.NetSpec()
pool_config = {
'pool': P.Pooling.MAX,
'kernel_size': 2,
'stride': 2
}
caffe_spec['input'] = L.Input(shape=caffe_pb2.BlobShape(dim=(1, 1, h, w)))
caffe_spec['max_pool'] = L.Pooling(caffe_spec['input'], **pool_config)
proto = str(caffe_spec.to_proto())
with open('deploy.prototxt', 'w') as f:
f.write(proto)
net = caffe.Net('deploy.prototxt', caffe.TEST)
return net
def build_keras_model(h, w):
inputs = Input(shape=(h, w, 1))
maxpool = MaxPooling2DWrapper()(inputs)
return Model(inputs, maxpool)
def main():
caffe.set_mode_cpu()
os.environ['GLOG_minloglevel'] = '2'
h = 3
w = 3
size_input = h * w
caffe_net = build_caffe_model(h, w)
keras_model = build_keras_model(h, w)
keras_model.summary()
keras_out = keras_model.predict(np.arange(size_input).reshape(1, h, w, 1))
caffe_net.blobs['input'].data[...] = np.arange(size_input).reshape(1, 1, h, w)
caffe_out = caffe_net.forward()['max_pool']
print('Input:')
print(np.arange(size_input).reshape(h, w))
print('Caffe result:')
print(np.squeeze(caffe_out))
print('Keras result:')
print(np.squeeze(keras_out))
if __name__ == '__main__':
main()
包装器只会在需要时添加填充。此代码的输出:
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 3, 3, 1) 0
_________________________________________________________________
input_pad (Lambda) (None, 4, 4, 1) 0
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 2, 2, 1) 0
=================================================================
Input:
[[0 1 2]
[3 4 5]
[6 7 8]]
Caffe result:
[[4. 5.]
[7. 8.]]
Keras result:
[[4. 5.]
[7. 8.]]
如果我在 Keras 中有 MaxPooling2D
层和 pool_size=(2,2), strides=(2,2)
。
应用于 3x3
输入特征图,它将导致 1x1
空间输出大小。 Caffe (pool: MAX; kernel_size: 2; stride: 2
) 中的相同操作将导致大小为 2x2
.
众所周知Caffe和Tensorflow/Keras
二维卷积有解决方法:避免same
更改为valid
是否有任何类似的解决方法来更改 Keras 中的 MaxPooling2D
行为,使其执行类似于 Caffe?更准确地说,我正在寻找 MaxPooling2D
的包装器,它将等于 Caffe 中的最大池化 2D 2x2。
也许,在 MaxPooling2D
输入的左侧和顶部填充一个像素?
我正在使用来自 TensorFlow 的tf.keras
。
好的,我找到了答案,让我把它保存在这里。必须用零填充输入 bottom/right。这是最小的工作示例:
import os
import math
import numpy as np
import tensorflow as tf
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Input, MaxPool2D
from tensorflow.python.keras import backend as K
import caffe
from caffe.model_libs import P
from caffe import layers as L
from caffe.proto import caffe_pb2
def MaxPooling2DWrapper(pool_size=(2, 2), strides=None, padding='valid', data_format=None, **kwargs):
def padded_pooling(inputs):
_, h, w, _ = K.int_shape(inputs)
interm_input = inputs
if h % 2 != 0 or w % 2 != 0:
interm_input = tf.keras.layers.Lambda(lambda x: tf.pad(inputs, [[0, 0], [0, 1], [0, 1], [0, 0]]),
name='input_pad')(inputs)
return MaxPool2D(pool_size, strides, padding, data_format, **kwargs)(interm_input)
return padded_pooling
def build_caffe_model(h, w):
caffe_spec = caffe.NetSpec()
pool_config = {
'pool': P.Pooling.MAX,
'kernel_size': 2,
'stride': 2
}
caffe_spec['input'] = L.Input(shape=caffe_pb2.BlobShape(dim=(1, 1, h, w)))
caffe_spec['max_pool'] = L.Pooling(caffe_spec['input'], **pool_config)
proto = str(caffe_spec.to_proto())
with open('deploy.prototxt', 'w') as f:
f.write(proto)
net = caffe.Net('deploy.prototxt', caffe.TEST)
return net
def build_keras_model(h, w):
inputs = Input(shape=(h, w, 1))
maxpool = MaxPooling2DWrapper()(inputs)
return Model(inputs, maxpool)
def main():
caffe.set_mode_cpu()
os.environ['GLOG_minloglevel'] = '2'
h = 3
w = 3
size_input = h * w
caffe_net = build_caffe_model(h, w)
keras_model = build_keras_model(h, w)
keras_model.summary()
keras_out = keras_model.predict(np.arange(size_input).reshape(1, h, w, 1))
caffe_net.blobs['input'].data[...] = np.arange(size_input).reshape(1, 1, h, w)
caffe_out = caffe_net.forward()['max_pool']
print('Input:')
print(np.arange(size_input).reshape(h, w))
print('Caffe result:')
print(np.squeeze(caffe_out))
print('Keras result:')
print(np.squeeze(keras_out))
if __name__ == '__main__':
main()
包装器只会在需要时添加填充。此代码的输出:
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 3, 3, 1) 0
_________________________________________________________________
input_pad (Lambda) (None, 4, 4, 1) 0
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 2, 2, 1) 0
=================================================================
Input:
[[0 1 2]
[3 4 5]
[6 7 8]]
Caffe result:
[[4. 5.]
[7. 8.]]
Keras result:
[[4. 5.]
[7. 8.]]