keras中的多图像输入
Multi image input in keras
我尝试使用 keras 实现 pilotnet 模型。使用顺序模型我能够实现 1 个图像 CNN 但我们如何将 3 个图像输入到 keras 中的 CNN 网络。
'''
def createModel():
模型=顺序()
model.add(Convolution2D(24, (5, 5), (2, 2), input_shape=(66, 200, 3), activation='relu'))
model.add(Convolution2D(36, (5, 5), (2, 2), activation='relu'))
model.add(Convolution2D(48, (5, 5), (2, 2), activation='relu'))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(100, activation = 'relu'))
model.add(Dense(50, activation = 'relu'))
model.add(Dense(10, activation = 'relu'))
model.add(Dense(1))
model.compile(Adam(lr=0.0001),loss='mse')
return model
'''
此实现仅适用于中心摄像头图像,但我如何将左侧和右侧摄像头图像输入模型,以便我只获得 1 个输出,即我的转向角。Model I'm trying to implement
可以直接把input_shape
改成(N, 66, 200, 3)
。因此,在训练和测试输入批次形状时,应该是 (B, N, 66, 200, 3)
,其中 B
是批次大小,N
是 num_views,在你的情况下是三个(中心、左、右)。
这是 input_shape = (3, 66, 200, 3)
的模型摘要。
Model: "sequential_3"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_15 (Conv2D) (None, 3, 31, 98, 24) 1824
_________________________________________________________________
conv2d_16 (Conv2D) (None, 3, 14, 47, 36) 21636
_________________________________________________________________
conv2d_17 (Conv2D) (None, 3, 5, 22, 48) 43248
_________________________________________________________________
conv2d_18 (Conv2D) (None, 3, 3, 20, 64) 27712
_________________________________________________________________
conv2d_19 (Conv2D) (None, 3, 1, 18, 64) 36928
_________________________________________________________________
flatten_3 (Flatten) (None, 3456) 0
_________________________________________________________________
dense_12 (Dense) (None, 100) 345700
_________________________________________________________________
dense_13 (Dense) (None, 50) 5050
_________________________________________________________________
dense_14 (Dense) (None, 10) 510
_________________________________________________________________
dense_15 (Dense) (None, 1) 11
=================================================================
Total params: 482,619
Trainable params: 482,619
Non-trainable params: 0
_________________________________________________________________
在我看来,我认为解决你的问题的正确方法是你应该使用 Keras 函数 API 因为它很方便并且适用于设计复杂模型或多输入或多输出模型,但对于需要多个输入的情况。除了模型设计,对于 Keras 来说,还涉及到图片的提要。但我要跳过这一点,因为我认为你已经知道了。
我已经为您的问题提供了一个使用 Keras 函数 API 的建模示例。我指的是你附上的图片,你可以自己修改模型结构。从图片中我了解到所有 3 个 CNN 都不共享权重。
我已经展示了这两种方法的示例,第一种方法声明所有 3 个 CNN 同时合并到一个模型中。
其次是3个CNN,结构相同,可以单独搭建,也可以方便的组合成新的模型。
第一种方法
import tensorflow as tf
def createModel():
image_shape = (66, 200, 3)
input_img_center = tf.keras.Input(image_shape)
input_img_right = tf.keras.Input(image_shape)
input_img_left = tf.keras.Input(image_shape)
# CNN for center
f1 = tf.keras.layers.Conv2D(24, (5, 5), (2, 2), activation='relu')(input_img_center)
f1 = tf.keras.layers.Conv2D(36, (5, 5), (2, 2), activation='relu')(f1)
f1 = tf.keras.layers.Conv2D(48, (5, 5), (2, 2), activation='relu')(f1)
f1 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu')(f1)
f1 = tf.keras.layers.Flatten()(f1)
# CNN for right
f2 = tf.keras.layers.Conv2D(24, (5, 5), (2, 2), activation='relu')(input_img_right)
f2 = tf.keras.layers.Conv2D(36, (5, 5), (2, 2), activation='relu')(f2)
f2 = tf.keras.layers.Conv2D(48, (5, 5), (2, 2), activation='relu')(f2)
f2 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu')(f2)
f2 = tf.keras.layers.Flatten()(f2)
# CNN for left
f3 = tf.keras.layers.Conv2D(24, (5, 5), (2, 2), activation='relu')(input_img_left)
f3 = tf.keras.layers.Conv2D(36, (5, 5), (2, 2), activation='relu')(f3)
f3 = tf.keras.layers.Conv2D(48, (5, 5), (2, 2), activation='relu')(f3)
f3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu')(f3)
f3 = tf.keras.layers.Flatten()(f3)
# concatenate feature vector from 3 view
f = tf.keras.layers.concatenate([f1, f2, f3])
# create whatever layer you want (in this example, I followed by an additional fully connected layer)
f = tf.keras.layers.Dense(100, activation = 'relu')(f)
f = tf.keras.layers.Dense(50, activation = 'relu')(f)
f = tf.keras.layers.Dense(10, activation = 'relu')(f)
output = tf.keras.layers.Dense(1)(f)
model = tf.keras.Model([input_img_center, input_img_right, input_img_left], [output])
opt = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=opt, loss='mse')
model.summary()
return model
model = createModel()
第二种方法
def createModel():
image_shape = (66, 200, 3)
input_img = tf.keras.Input(image_shape)
f = tf.keras.layers.Conv2D(24, (5, 5), (2, 2), activation='relu')(input_img)
f = tf.keras.layers.Conv2D(36, (5, 5), (2, 2), activation='relu')(f)
f = tf.keras.layers.Conv2D(48, (5, 5), (2, 2), activation='relu')(f)
f = tf.keras.layers.Conv2D(64, (3, 3), activation='relu')(f)
f = tf.keras.layers.Flatten()(f)
model = tf.keras.Model([input_img], [f])
model.summary()
return model
def createCombineModel(center_model, right_model, left_model):
image_shape = (66, 200, 3)
input_img_center = tf.keras.Input(image_shape)
input_img_right = tf.keras.Input(image_shape)
input_img_left = tf.keras.Input(image_shape)
f1 = center_model(input_img_center)
f2 = right_model(input_img_right)
f3 = left_model(input_img_left)
# concatenate feature vector from 3 view
f = tf.keras.layers.concatenate([f1, f2, f3])
# create whatever layer you want (in this example, I followed by an additional fully connected layer)
f = tf.keras.layers.Dense(100, activation = 'relu')(f)
f = tf.keras.layers.Dense(50, activation = 'relu')(f)
f = tf.keras.layers.Dense(10, activation = 'relu')(f)
output = tf.keras.layers.Dense(1)(f)
model = tf.keras.Model([input_img_center, input_img_right, input_img_left], [output])
opt = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=opt, loss='mse')
model.summary()
return model
center_model = createModel()
right_model = createModel()
left_model = createModel()
createCombineModel(center_model, right_model, left_model)
我尝试使用 keras 实现 pilotnet 模型。使用顺序模型我能够实现 1 个图像 CNN 但我们如何将 3 个图像输入到 keras 中的 CNN 网络。
'''
def createModel(): 模型=顺序()
model.add(Convolution2D(24, (5, 5), (2, 2), input_shape=(66, 200, 3), activation='relu'))
model.add(Convolution2D(36, (5, 5), (2, 2), activation='relu'))
model.add(Convolution2D(48, (5, 5), (2, 2), activation='relu'))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(100, activation = 'relu'))
model.add(Dense(50, activation = 'relu'))
model.add(Dense(10, activation = 'relu'))
model.add(Dense(1))
model.compile(Adam(lr=0.0001),loss='mse')
return model
'''
此实现仅适用于中心摄像头图像,但我如何将左侧和右侧摄像头图像输入模型,以便我只获得 1 个输出,即我的转向角。Model I'm trying to implement
可以直接把input_shape
改成(N, 66, 200, 3)
。因此,在训练和测试输入批次形状时,应该是 (B, N, 66, 200, 3)
,其中 B
是批次大小,N
是 num_views,在你的情况下是三个(中心、左、右)。
这是 input_shape = (3, 66, 200, 3)
的模型摘要。
Model: "sequential_3"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_15 (Conv2D) (None, 3, 31, 98, 24) 1824
_________________________________________________________________
conv2d_16 (Conv2D) (None, 3, 14, 47, 36) 21636
_________________________________________________________________
conv2d_17 (Conv2D) (None, 3, 5, 22, 48) 43248
_________________________________________________________________
conv2d_18 (Conv2D) (None, 3, 3, 20, 64) 27712
_________________________________________________________________
conv2d_19 (Conv2D) (None, 3, 1, 18, 64) 36928
_________________________________________________________________
flatten_3 (Flatten) (None, 3456) 0
_________________________________________________________________
dense_12 (Dense) (None, 100) 345700
_________________________________________________________________
dense_13 (Dense) (None, 50) 5050
_________________________________________________________________
dense_14 (Dense) (None, 10) 510
_________________________________________________________________
dense_15 (Dense) (None, 1) 11
=================================================================
Total params: 482,619
Trainable params: 482,619
Non-trainable params: 0
_________________________________________________________________
在我看来,我认为解决你的问题的正确方法是你应该使用 Keras 函数 API 因为它很方便并且适用于设计复杂模型或多输入或多输出模型,但对于需要多个输入的情况。除了模型设计,对于 Keras 来说,还涉及到图片的提要。但我要跳过这一点,因为我认为你已经知道了。
我已经为您的问题提供了一个使用 Keras 函数 API 的建模示例。我指的是你附上的图片,你可以自己修改模型结构。从图片中我了解到所有 3 个 CNN 都不共享权重。
我已经展示了这两种方法的示例,第一种方法声明所有 3 个 CNN 同时合并到一个模型中。
其次是3个CNN,结构相同,可以单独搭建,也可以方便的组合成新的模型。
第一种方法
import tensorflow as tf
def createModel():
image_shape = (66, 200, 3)
input_img_center = tf.keras.Input(image_shape)
input_img_right = tf.keras.Input(image_shape)
input_img_left = tf.keras.Input(image_shape)
# CNN for center
f1 = tf.keras.layers.Conv2D(24, (5, 5), (2, 2), activation='relu')(input_img_center)
f1 = tf.keras.layers.Conv2D(36, (5, 5), (2, 2), activation='relu')(f1)
f1 = tf.keras.layers.Conv2D(48, (5, 5), (2, 2), activation='relu')(f1)
f1 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu')(f1)
f1 = tf.keras.layers.Flatten()(f1)
# CNN for right
f2 = tf.keras.layers.Conv2D(24, (5, 5), (2, 2), activation='relu')(input_img_right)
f2 = tf.keras.layers.Conv2D(36, (5, 5), (2, 2), activation='relu')(f2)
f2 = tf.keras.layers.Conv2D(48, (5, 5), (2, 2), activation='relu')(f2)
f2 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu')(f2)
f2 = tf.keras.layers.Flatten()(f2)
# CNN for left
f3 = tf.keras.layers.Conv2D(24, (5, 5), (2, 2), activation='relu')(input_img_left)
f3 = tf.keras.layers.Conv2D(36, (5, 5), (2, 2), activation='relu')(f3)
f3 = tf.keras.layers.Conv2D(48, (5, 5), (2, 2), activation='relu')(f3)
f3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu')(f3)
f3 = tf.keras.layers.Flatten()(f3)
# concatenate feature vector from 3 view
f = tf.keras.layers.concatenate([f1, f2, f3])
# create whatever layer you want (in this example, I followed by an additional fully connected layer)
f = tf.keras.layers.Dense(100, activation = 'relu')(f)
f = tf.keras.layers.Dense(50, activation = 'relu')(f)
f = tf.keras.layers.Dense(10, activation = 'relu')(f)
output = tf.keras.layers.Dense(1)(f)
model = tf.keras.Model([input_img_center, input_img_right, input_img_left], [output])
opt = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=opt, loss='mse')
model.summary()
return model
model = createModel()
第二种方法
def createModel():
image_shape = (66, 200, 3)
input_img = tf.keras.Input(image_shape)
f = tf.keras.layers.Conv2D(24, (5, 5), (2, 2), activation='relu')(input_img)
f = tf.keras.layers.Conv2D(36, (5, 5), (2, 2), activation='relu')(f)
f = tf.keras.layers.Conv2D(48, (5, 5), (2, 2), activation='relu')(f)
f = tf.keras.layers.Conv2D(64, (3, 3), activation='relu')(f)
f = tf.keras.layers.Flatten()(f)
model = tf.keras.Model([input_img], [f])
model.summary()
return model
def createCombineModel(center_model, right_model, left_model):
image_shape = (66, 200, 3)
input_img_center = tf.keras.Input(image_shape)
input_img_right = tf.keras.Input(image_shape)
input_img_left = tf.keras.Input(image_shape)
f1 = center_model(input_img_center)
f2 = right_model(input_img_right)
f3 = left_model(input_img_left)
# concatenate feature vector from 3 view
f = tf.keras.layers.concatenate([f1, f2, f3])
# create whatever layer you want (in this example, I followed by an additional fully connected layer)
f = tf.keras.layers.Dense(100, activation = 'relu')(f)
f = tf.keras.layers.Dense(50, activation = 'relu')(f)
f = tf.keras.layers.Dense(10, activation = 'relu')(f)
output = tf.keras.layers.Dense(1)(f)
model = tf.keras.Model([input_img_center, input_img_right, input_img_left], [output])
opt = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=opt, loss='mse')
model.summary()
return model
center_model = createModel()
right_model = createModel()
left_model = createModel()
createCombineModel(center_model, right_model, left_model)