InvalidArgumentError: In[0].dim(0) and In[1].dim(0) must be the same: [1,125,150] vs [32,150,125]
InvalidArgumentError: In[0].dim(0) and In[1].dim(0) must be the same: [1,125,150] vs [32,150,125]
我正在尝试创建一个合并 2 个源的自定义图层。我收到错误 "InvalidArgumentError: In[0].dim(0) and In[1].dim(0) must be the same: [1,125,150] vs [32,150,125]." The code 运行s if I set the batch_size to 1 so then have [1,125,150] vs [1,150,125];然而,损失并没有改变,所以仍然不是根本原因。我认为我需要使用批量大小而不是仅仅扩展 dims
class mergeLayer(L.Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(mergeLayer,self).__init__()
self.kernel_initializer = INIT.get('uniform')
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(name='kernel',shape=input_shape[1:],initializer=self.kernel_initializer,trainable=True)
super(mergeLayer,self).build(input_shape) # Be sure to call this somewhere!
def call(self, x):
temp = K.batch_dot(tf.expand_dims(self.kernel,0),tf.transpose(x,perm=[0,2,1]))+1
return temp
def compute_output_shape(self, input_shape):
return input_shape
下面是拟合模型的代码。同样,如果我在这里将 batch_size 更改为 1,我可以将代码更改为 运行 但损失保持不变。
modelMerge.fit(x=[train1,train2],y=cats,epochs=100,batch_size=32,shuffle='batch')
score = modelMerge.evaluate(x=[test1,test2],y=cats,batch_size=32)
当batch_size为1
时输出
Epoch 1/100
3903/3903 [=========================] - 45s - loss: 15.7062 - acc: 0.0254
Epoch 2/100
3903/3903 [=========================] - 43s - loss: 15.7050 - acc: 0.0254
Epoch 3/100
277/3903 [=>.......................] - ETA: 42s - loss: 15.8272 - acc: 0.0181
非常感谢您的宝贵时间和帮助。
更新:这里是调用mergeLayer的Keras模型结构
def buildModel_merge(numClasses):
source = L.Input(shape=(64,25,1))
x = L.Conv2D(150, (3,3), activation='relu', name='conv1a')(source)
x = L.MaxPooling2D((2,2))(x)
x = L.BatchNormalization()(x)
x = L.Conv2D(150, (3,3), activation='relu', name='conv2a')(x)
x = L.Conv2D(150, (5,5), activation='relu', name='conv3a')(x)
x = L.Dropout(0.5)(x)
#reshape into a dxN matrix
x = L.Reshape((125,150))(x)
x = mergeLayer(100)(x)
source2 = L.Input(shape=(30,30,30,1))
x2 = L.Conv3D(32,(5,5,5),strides=(2,2,2),activation='relu',name='conv1b')(source2)
x2 = L.Dropout(0.2)(x2)
x2 = L.Conv3D(32,(3,3,3),activation='relu',name='conv2b')(x2)
x2 = L.MaxPooling3D(pool_size=(2,2,2),name='pool2b')(x2)
x2 = L.Dropout(0.3)(x2)
#reshape into a dxM matrix
x2 = L.Reshape((125,32))(x2)
x2 = mergeLayer(100)(x2)
#x = L.Multiply(x, x2)(x)
x = L.Multiply()([x,x2])
x = L.Flatten()(x)
x = L.Dense(400, activation='relu', name='dense1')(x) # Is relu used here?
x = L.Dropout(0.5)(x)
classify = L.Dense(numClasses, activation='softmax', name='dense2')(x)
model = M.Model(inputs=[source,source2],outputs=classify)
optimizer= O.SGD(momentum=0.02)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['acc'])
return model
以下是您的代码中的一些更正:
- 您不需要
output_dim
和 **kwargs
参数
- 我没有在内核上使用
expand_dims
,而是用额外的维度定义了它(但是你的 keras 版本似乎与我的行为不同,所以使用 #alternative
行代码)。
- 主要问题:
batch_dot
需要两个具有相同批量大小的张量(这意味着:第一个维度必须相同)
- 通过重复内核以适应
x
的批量大小解决了这个问题
- 用 keras 后端函数 (
import keras.backend as K
) 交换了所有 tf
函数 - 这不是问题,但您可以将此解决方案移植到其他受支持的后端。
.
class mergeLayer(Layer):
#your init doesn't need output_dim and **kwargs
def __init__(self):
super(mergeLayer,self).__init__()
self.kernel_initializer = INIT.get('uniform')
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(
name='kernel',
#corrected shape to avoid expand_dims
shape=(1,)+input_shape[1:],
#alternative:
#shape = input_shape[1:],
initializer=self.kernel_initializer,trainable=True)
super(mergeLayer,self).build(input_shape) # Be sure to call this somewhere!
def call(self, x):
#take a tensor of ones with the same shape as x
form = K.ones_like(x)
#multiplies the kernel to match the batch size of x
kernel = form * self.kernel
#alternative:
#kernel = form * K.expand_dims(self.kernel,0)
#used K.permute_dimensions instead of tf
temp = K.batch_dot(kernel,K.permute_dimensions(x,(0,2,1)))+1
return temp
def compute_output_shape(self, input_shape):
return input_shape
我正在尝试创建一个合并 2 个源的自定义图层。我收到错误 "InvalidArgumentError: In[0].dim(0) and In[1].dim(0) must be the same: [1,125,150] vs [32,150,125]." The code 运行s if I set the batch_size to 1 so then have [1,125,150] vs [1,150,125];然而,损失并没有改变,所以仍然不是根本原因。我认为我需要使用批量大小而不是仅仅扩展 dims
class mergeLayer(L.Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(mergeLayer,self).__init__()
self.kernel_initializer = INIT.get('uniform')
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(name='kernel',shape=input_shape[1:],initializer=self.kernel_initializer,trainable=True)
super(mergeLayer,self).build(input_shape) # Be sure to call this somewhere!
def call(self, x):
temp = K.batch_dot(tf.expand_dims(self.kernel,0),tf.transpose(x,perm=[0,2,1]))+1
return temp
def compute_output_shape(self, input_shape):
return input_shape
下面是拟合模型的代码。同样,如果我在这里将 batch_size 更改为 1,我可以将代码更改为 运行 但损失保持不变。
modelMerge.fit(x=[train1,train2],y=cats,epochs=100,batch_size=32,shuffle='batch')
score = modelMerge.evaluate(x=[test1,test2],y=cats,batch_size=32)
当batch_size为1
时输出Epoch 1/100
3903/3903 [=========================] - 45s - loss: 15.7062 - acc: 0.0254
Epoch 2/100
3903/3903 [=========================] - 43s - loss: 15.7050 - acc: 0.0254
Epoch 3/100
277/3903 [=>.......................] - ETA: 42s - loss: 15.8272 - acc: 0.0181
非常感谢您的宝贵时间和帮助。
更新:这里是调用mergeLayer的Keras模型结构
def buildModel_merge(numClasses):
source = L.Input(shape=(64,25,1))
x = L.Conv2D(150, (3,3), activation='relu', name='conv1a')(source)
x = L.MaxPooling2D((2,2))(x)
x = L.BatchNormalization()(x)
x = L.Conv2D(150, (3,3), activation='relu', name='conv2a')(x)
x = L.Conv2D(150, (5,5), activation='relu', name='conv3a')(x)
x = L.Dropout(0.5)(x)
#reshape into a dxN matrix
x = L.Reshape((125,150))(x)
x = mergeLayer(100)(x)
source2 = L.Input(shape=(30,30,30,1))
x2 = L.Conv3D(32,(5,5,5),strides=(2,2,2),activation='relu',name='conv1b')(source2)
x2 = L.Dropout(0.2)(x2)
x2 = L.Conv3D(32,(3,3,3),activation='relu',name='conv2b')(x2)
x2 = L.MaxPooling3D(pool_size=(2,2,2),name='pool2b')(x2)
x2 = L.Dropout(0.3)(x2)
#reshape into a dxM matrix
x2 = L.Reshape((125,32))(x2)
x2 = mergeLayer(100)(x2)
#x = L.Multiply(x, x2)(x)
x = L.Multiply()([x,x2])
x = L.Flatten()(x)
x = L.Dense(400, activation='relu', name='dense1')(x) # Is relu used here?
x = L.Dropout(0.5)(x)
classify = L.Dense(numClasses, activation='softmax', name='dense2')(x)
model = M.Model(inputs=[source,source2],outputs=classify)
optimizer= O.SGD(momentum=0.02)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['acc'])
return model
以下是您的代码中的一些更正:
- 您不需要
output_dim
和**kwargs
参数 - 我没有在内核上使用
expand_dims
,而是用额外的维度定义了它(但是你的 keras 版本似乎与我的行为不同,所以使用#alternative
行代码)。 - 主要问题:
batch_dot
需要两个具有相同批量大小的张量(这意味着:第一个维度必须相同)- 通过重复内核以适应
x
的批量大小解决了这个问题
- 通过重复内核以适应
- 用 keras 后端函数 (
import keras.backend as K
) 交换了所有tf
函数 - 这不是问题,但您可以将此解决方案移植到其他受支持的后端。
.
class mergeLayer(Layer):
#your init doesn't need output_dim and **kwargs
def __init__(self):
super(mergeLayer,self).__init__()
self.kernel_initializer = INIT.get('uniform')
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(
name='kernel',
#corrected shape to avoid expand_dims
shape=(1,)+input_shape[1:],
#alternative:
#shape = input_shape[1:],
initializer=self.kernel_initializer,trainable=True)
super(mergeLayer,self).build(input_shape) # Be sure to call this somewhere!
def call(self, x):
#take a tensor of ones with the same shape as x
form = K.ones_like(x)
#multiplies the kernel to match the batch size of x
kernel = form * self.kernel
#alternative:
#kernel = form * K.expand_dims(self.kernel,0)
#used K.permute_dimensions instead of tf
temp = K.batch_dot(kernel,K.permute_dimensions(x,(0,2,1)))+1
return temp
def compute_output_shape(self, input_shape):
return input_shape