如何在 keras/tensorflow 中将图像拆分为 patches/sub-images?
How to split image into patches/sub-images in keras/tensorflow?
我正在尝试重新创建 this paper 中的逻辑。逻辑可以总结为下图:
强调我的问题:
- 我有一个 256x256 的输入图像。它通过 densenet(下面的工作示例)
- 同一张图片被分成 4 个相等且互斥的 128x128 片段。它们也都通过densenet并取平均值。
工作代码:
from keras.applications.densenet import DenseNet201
from keras.layers import Dense, Flatten, Concatenate
from keras.activations import relu
#main images
in1 = tf.keras.Input(shape=(256,256,3))
#4 sub patches of main image
patch1 = tf.keras.Input(shape=(128,128,3))
patch2 = tf.keras.Input(shape=(128,128,3))
patch3 = tf.keras.Input(shape=(128,128,3))
patch4 = tf.keras.Input(shape=(128,128,3))
# CNN
cnn = DenseNet201(include_top=False, pooling='avg')
#output of full 256x256
out1 = cnn(in1)
#output of 4 128x128 patches
path_out1 = cnn(patch1)
path_out2 = cnn(patch2)
path_out3 = cnn(patch3)
path_out4 = cnn(patch4)
#average patches
patch_out_average = tf.keras.layers.Average()([path_out1, path_out2, path_out3, path_out4])
#combine features
out_combined = tf.stack([out1, patch_out_average])
我的问题:有没有办法让它更优雅、更少手动?我不想手动为 16x64x64 生成 16 行输入。有没有办法 'patch' 将图像分成多个部分并 return 一个平均张量或者只是让它变短?
谢谢。
更新(使用下面答案中的代码):
from keras.applications.densenet import DenseNet201
from keras.layers import Dense, Flatten, Concatenate
from keras.activations import relu
class CreatePatches(tf.keras.layers.Layer):
def __init__(self , patch_size, cnn):
super(CreatePatches , self).__init__()
self.patch_size = patch_size
self.cnn = cnn
def call(self, inputs):
patches = []
#For square images only (as inputs.shape[1] = inputs.shape[2])
input_image_size = inputs.shape[1]
for i in range(0 ,input_image_size , self.patch_size):
for j in range(0 ,input_image_size , self.patch_size):
patches.append(self.cnn(inputs[ : , i : i + self.patch_size , j : j + self.patch_size , : ]))
return patches
#main image
in1 = tf.keras.Input(shape=(256,256,3))
# CNN
cnn = DenseNet201(include_top=False, pooling='avg')
#output of full 256x256
out256 = cnn(in1)
#output of 4 128x128 patches
out128 = CreatePatches(patch_size=128, cnn = cnn)(in1)
#output of 16 64x64 patches
out64 = CreatePatches(patch_size=64, cnn = cnn)(in1)
#average patches
out128 = tf.keras.layers.Average()(out128)
out64 = tf.keras.layers.Average()(out64)
#combine features
out_combined = tf.stack([out256, out128, out64], axis = 1)
#average
out_averaged = tf.keras.layers.GlobalAveragePooling1D()(out_combined)
out_averaged
更新(2021 年 7 月 16 日)
我从 Vision Transformers 的 Keras tutorial 中找到了这段代码,其中实现了自定义 Keras 层以使用 tf.image.extract_patches
函数从图像创建补丁。
class Patches(layers.Layer):
def __init__(self, patch_size):
super(Patches, self).__init__()
self.patch_size = patch_size
def call(self, images):
batch_size = tf.shape(images)[0]
patches = tf.image.extract_patches(
images=images,
sizes=[1, self.patch_size, self.patch_size, 1],
strides=[1, self.patch_size, self.patch_size, 1],
rates=[1, 1, 1, 1],
padding="VALID",
)
patch_dims = patches.shape[-1]
patches = tf.reshape(patches, [batch_size, -1, patch_dims])
return patches
现有解决方案
您可以 create a custom Keras Layer
将给定的正方形图像 ( width = height ) 分割成小块,像这样,
class CreatePatches( tf.keras.layers.Layer ):
def __init__( self , patch_size ):
super( CreatePatches , self ).__init__()
self.patch_size = patch_size
def call(self, inputs ):
patches = []
# For square images only ( as inputs.shape[ 1 ] = inputs.shape[ 2 ] )
input_image_size = inputs.shape[ 1 ]
for i in range( 0 , input_image_size , self.patch_size ):
for j in range( 0 , input_image_size , self.patch_size ):
patches.append( inputs[ : , i : i + self.patch_size , j : j + self.patch_size , : ] )
return patches
sample_image = np.random.rand( 1 , 256 , 256 , 3 )
layer = CreatePatches( 128 )
layer( sample_image )
Just make sure that inputs.shape[ 1 ]
is perfectly divisible by patch_size
.
您还可以将此图层包含在 Model
中,例如,
inputs = tf.keras.layers.Input( shape=( 256 , 256 , 3 ) )
patches = CreatePatches( patch_size=128 )( inputs )
model = tf.keras.models.Model( inputs , patches )
model.summary()
以上片段的输出,
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_3 (InputLayer) [(None, 256, 256, 3)] 0
_________________________________________________________________
create_patches_5 (CreatePatc [(None, 128, 128, 3), (No 0
=================================================================
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________
有关模型输出的更多详细信息,
>> model.outputs
[<KerasTensor: shape=(None, 128, 128, 3) dtype=float32 (created by layer 'create_patches_5')>,
<KerasTensor: shape=(None, 128, 128, 3) dtype=float32 (created by layer 'create_patches_5')>,
<KerasTensor: shape=(None, 128, 128, 3) dtype=float32 (created by layer 'create_patches_5')>,
<KerasTensor: shape=(None, 128, 128, 3) dtype=float32 (created by layer 'create_patches_5')>]
我正在尝试重新创建 this paper 中的逻辑。逻辑可以总结为下图:
强调我的问题:
- 我有一个 256x256 的输入图像。它通过 densenet(下面的工作示例)
- 同一张图片被分成 4 个相等且互斥的 128x128 片段。它们也都通过densenet并取平均值。
工作代码:
from keras.applications.densenet import DenseNet201
from keras.layers import Dense, Flatten, Concatenate
from keras.activations import relu
#main images
in1 = tf.keras.Input(shape=(256,256,3))
#4 sub patches of main image
patch1 = tf.keras.Input(shape=(128,128,3))
patch2 = tf.keras.Input(shape=(128,128,3))
patch3 = tf.keras.Input(shape=(128,128,3))
patch4 = tf.keras.Input(shape=(128,128,3))
# CNN
cnn = DenseNet201(include_top=False, pooling='avg')
#output of full 256x256
out1 = cnn(in1)
#output of 4 128x128 patches
path_out1 = cnn(patch1)
path_out2 = cnn(patch2)
path_out3 = cnn(patch3)
path_out4 = cnn(patch4)
#average patches
patch_out_average = tf.keras.layers.Average()([path_out1, path_out2, path_out3, path_out4])
#combine features
out_combined = tf.stack([out1, patch_out_average])
我的问题:有没有办法让它更优雅、更少手动?我不想手动为 16x64x64 生成 16 行输入。有没有办法 'patch' 将图像分成多个部分并 return 一个平均张量或者只是让它变短?
谢谢。
更新(使用下面答案中的代码):
from keras.applications.densenet import DenseNet201
from keras.layers import Dense, Flatten, Concatenate
from keras.activations import relu
class CreatePatches(tf.keras.layers.Layer):
def __init__(self , patch_size, cnn):
super(CreatePatches , self).__init__()
self.patch_size = patch_size
self.cnn = cnn
def call(self, inputs):
patches = []
#For square images only (as inputs.shape[1] = inputs.shape[2])
input_image_size = inputs.shape[1]
for i in range(0 ,input_image_size , self.patch_size):
for j in range(0 ,input_image_size , self.patch_size):
patches.append(self.cnn(inputs[ : , i : i + self.patch_size , j : j + self.patch_size , : ]))
return patches
#main image
in1 = tf.keras.Input(shape=(256,256,3))
# CNN
cnn = DenseNet201(include_top=False, pooling='avg')
#output of full 256x256
out256 = cnn(in1)
#output of 4 128x128 patches
out128 = CreatePatches(patch_size=128, cnn = cnn)(in1)
#output of 16 64x64 patches
out64 = CreatePatches(patch_size=64, cnn = cnn)(in1)
#average patches
out128 = tf.keras.layers.Average()(out128)
out64 = tf.keras.layers.Average()(out64)
#combine features
out_combined = tf.stack([out256, out128, out64], axis = 1)
#average
out_averaged = tf.keras.layers.GlobalAveragePooling1D()(out_combined)
out_averaged
更新(2021 年 7 月 16 日)
我从 Vision Transformers 的 Keras tutorial 中找到了这段代码,其中实现了自定义 Keras 层以使用 tf.image.extract_patches
函数从图像创建补丁。
class Patches(layers.Layer):
def __init__(self, patch_size):
super(Patches, self).__init__()
self.patch_size = patch_size
def call(self, images):
batch_size = tf.shape(images)[0]
patches = tf.image.extract_patches(
images=images,
sizes=[1, self.patch_size, self.patch_size, 1],
strides=[1, self.patch_size, self.patch_size, 1],
rates=[1, 1, 1, 1],
padding="VALID",
)
patch_dims = patches.shape[-1]
patches = tf.reshape(patches, [batch_size, -1, patch_dims])
return patches
现有解决方案
您可以 create a custom Keras Layer
将给定的正方形图像 ( width = height ) 分割成小块,像这样,
class CreatePatches( tf.keras.layers.Layer ):
def __init__( self , patch_size ):
super( CreatePatches , self ).__init__()
self.patch_size = patch_size
def call(self, inputs ):
patches = []
# For square images only ( as inputs.shape[ 1 ] = inputs.shape[ 2 ] )
input_image_size = inputs.shape[ 1 ]
for i in range( 0 , input_image_size , self.patch_size ):
for j in range( 0 , input_image_size , self.patch_size ):
patches.append( inputs[ : , i : i + self.patch_size , j : j + self.patch_size , : ] )
return patches
sample_image = np.random.rand( 1 , 256 , 256 , 3 )
layer = CreatePatches( 128 )
layer( sample_image )
Just make sure that
inputs.shape[ 1 ]
is perfectly divisible bypatch_size
.
您还可以将此图层包含在 Model
中,例如,
inputs = tf.keras.layers.Input( shape=( 256 , 256 , 3 ) )
patches = CreatePatches( patch_size=128 )( inputs )
model = tf.keras.models.Model( inputs , patches )
model.summary()
以上片段的输出,
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_3 (InputLayer) [(None, 256, 256, 3)] 0
_________________________________________________________________
create_patches_5 (CreatePatc [(None, 128, 128, 3), (No 0
=================================================================
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________
有关模型输出的更多详细信息,
>> model.outputs
[<KerasTensor: shape=(None, 128, 128, 3) dtype=float32 (created by layer 'create_patches_5')>,
<KerasTensor: shape=(None, 128, 128, 3) dtype=float32 (created by layer 'create_patches_5')>,
<KerasTensor: shape=(None, 128, 128, 3) dtype=float32 (created by layer 'create_patches_5')>,
<KerasTensor: shape=(None, 128, 128, 3) dtype=float32 (created by layer 'create_patches_5')>]