How to fix "ValueError: Operands could not be broadcast together with shapes (2592,) (4,)" in Tensorflow?
How to fix "ValueError: Operands could not be broadcast together with shapes (2592,) (4,)" in Tensorflow?
我目前正在设计一个 NoisyNet 层,如这里所建议的:"Noisy Networks for Exploration",在 Tensorflow 中并得到标题中指示的维度误差,同时将两个张量的维度相乘 element-wise 行 filtered_output = keras.layers.merge.Multiply()([output, actions_input])
应该(原则上)在打印所涉及的两个张量 filtered_output
和 actions_input
的尺寸时根据打印输出彼此兼容,其中两个张量似乎是维度 shape=(1, 4)
。
我在 Python3 中使用 Tensorflow 1.12.0。
相关代码如下:
import numpy as np
import tensorflow as tf
import keras
class NoisyLayer(keras.layers.Layer):
def __init__(self, in_shape=(1,2592), out_units=256, activation=tf.identity):
super(NoisyLayer, self).__init__()
self.in_shape = in_shape
self.out_units = out_units
self.mu_interval = 1.0/np.sqrt(float(self.out_units))
self.sig_0 = 0.5
self.activation = activation
self.assign_resampling()
def build(self, input_shape):
# Initializer
self.mu_initializer = tf.initializers.random_uniform(minval=-self.mu_interval, maxval=self.mu_interval) # Mu-initializer
self.si_initializer = tf.initializers.constant(self.sig_0/np.sqrt(float(self.out_units))) # Sigma-initializer
# Weights
self.w_mu = tf.Variable(initial_value=self.mu_initializer(shape=(self.in_shape[-1], self.out_units), dtype='float32'), trainable=True) # (1,2592)x(2592,4) = (1,4)
self.w_si = tf.Variable(initial_value=self.si_initializer(shape=(self.in_shape[-1], self.out_units), dtype='float32'), trainable=True)
# Biases
self.b_mu = tf.Variable(initial_value=self.mu_initializer(shape=(self.in_shape[0], self.out_units), dtype='float32'), trainable=True)
self.b_si = tf.Variable(initial_value=self.si_initializer(shape=(self.in_shape[0], self.out_units), dtype='float32'), trainable=True)
def call(self, inputs, resample_noise_flag):
if resample_noise_flag:
self.assign_resampling()
# Putting it all together
self.w = tf.math.add(self.w_mu, tf.math.multiply(self.w_si, self.w_eps))
self.b = tf.math.add(self.b_mu, tf.math.multiply(self.b_si, self.q_eps))
return self.activation(tf.linalg.matmul(inputs, self.w) + self.b)
def assign_resampling(self):
self.p_eps = self.f(self.resample_noise([self.in_shape[-1], 1]))
self.q_eps = self.f(self.resample_noise([1, self.out_units]))
self.w_eps = self.p_eps * self.q_eps # Cartesian product of input_noise x output_noise
def resample_noise(self, shape):
return tf.random.normal(shape, mean=0.0, stddev=1.0, seed=None, name=None)
def f(self, x):
return tf.math.multiply(tf.math.sign(x), tf.math.sqrt(tf.math.abs(x)))
frames_input = tf.ones((1, 84, 84, 4)) # Toy input
conv1 = keras.layers.Conv2D(16, (8, 8), strides=(4, 4), activation="relu")(frames_input)
conv2 = keras.layers.Conv2D(32, (4, 4), strides=(2, 2), activation="relu")(conv1)
flattened = keras.layers.Flatten()(conv2)
actionspace_size = 4
# NoisyNet
hidden = NoisyLayer(activation=tf.nn.relu)(inputs=flattened, resample_noise_flag=True)
output = NoisyLayer(in_shape=(1,256), out_units=actionspace_size)(inputs=hidden, resample_noise_flag=True)
actions_input = tf.ones((1,actionspace_size))
print('hidden:\n', hidden)
print('output:\n', output)
print('actions_input:\n', actions_input)
filtered_output = keras.layers.merge.Multiply()([output, actions_input])
当我 运行 代码时,输出如下所示:
hidden:
Tensor("noisy_layer_5/Relu:0", shape=(1, 256), dtype=float32)
output:
Tensor("noisy_layer_6/Identity:0", shape=(1, 4), dtype=float32)
actions_input:
Tensor("ones_5:0", shape=(1, 4), dtype=float32)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-f6df621eacab> in <module>()
68 print('actions_input:\n', actions_input)
69
---> 70 filtered_output = keras.layers.merge.Multiply()([output, actions_input])
2 frames
/usr/local/lib/python3.6/dist-packages/keras/layers/merge.py in _compute_elemwise_op_output_shape(self, shape1, shape2)
59 raise ValueError('Operands could not be broadcast '
60 'together with shapes ' +
---> 61 str(shape1) + ' ' + str(shape2))
62 output_shape.append(i)
63 return tuple(output_shape)
ValueError: Operands could not be broadcast together with shapes (2592,) (4,)
特别是,我想知道 Operands could not be broadcast together with shapes (2592,) (4,)
中的数字 2592
从何而来,因为该数字与扁平输入张量 flattened
到第一个噪声层的长度一致,但是 - 在我看来 - 不再是第二个噪声层 output
的输出维度的一部分,它又作为上面指出的错误行的输入。
有谁知道出了什么问题吗?
提前致谢,丹尼尔
如custom layer document所述,您需要实施compute_output_shape(input_shape)
方法:
compute_output_shape(input_shape)
: in case your layer modifies the
shape of its input, you should specify here the shape transformation
logic. This allows Keras to do automatic shape inference.
当您不应用此方法时,Keras 无法在不实际执行计算的情况下进行形状推断。
print(keras.backend.int_shape(hidden))
print(keras.backend.int_shape(output))
(1, 2592)
(1, 2592)
所以需要添加如下:
def compute_output_shape(self, input_shape):
return (input_shape[0], self.out_units)
另外,build()
方法最后必须设置self.built = True
,根据文档调用super(NoisyLayer, self).build(input_shape)
即可。
我目前正在设计一个 NoisyNet 层,如这里所建议的:"Noisy Networks for Exploration",在 Tensorflow 中并得到标题中指示的维度误差,同时将两个张量的维度相乘 element-wise 行 filtered_output = keras.layers.merge.Multiply()([output, actions_input])
应该(原则上)在打印所涉及的两个张量 filtered_output
和 actions_input
的尺寸时根据打印输出彼此兼容,其中两个张量似乎是维度 shape=(1, 4)
。
我在 Python3 中使用 Tensorflow 1.12.0。
相关代码如下:
import numpy as np
import tensorflow as tf
import keras
class NoisyLayer(keras.layers.Layer):
def __init__(self, in_shape=(1,2592), out_units=256, activation=tf.identity):
super(NoisyLayer, self).__init__()
self.in_shape = in_shape
self.out_units = out_units
self.mu_interval = 1.0/np.sqrt(float(self.out_units))
self.sig_0 = 0.5
self.activation = activation
self.assign_resampling()
def build(self, input_shape):
# Initializer
self.mu_initializer = tf.initializers.random_uniform(minval=-self.mu_interval, maxval=self.mu_interval) # Mu-initializer
self.si_initializer = tf.initializers.constant(self.sig_0/np.sqrt(float(self.out_units))) # Sigma-initializer
# Weights
self.w_mu = tf.Variable(initial_value=self.mu_initializer(shape=(self.in_shape[-1], self.out_units), dtype='float32'), trainable=True) # (1,2592)x(2592,4) = (1,4)
self.w_si = tf.Variable(initial_value=self.si_initializer(shape=(self.in_shape[-1], self.out_units), dtype='float32'), trainable=True)
# Biases
self.b_mu = tf.Variable(initial_value=self.mu_initializer(shape=(self.in_shape[0], self.out_units), dtype='float32'), trainable=True)
self.b_si = tf.Variable(initial_value=self.si_initializer(shape=(self.in_shape[0], self.out_units), dtype='float32'), trainable=True)
def call(self, inputs, resample_noise_flag):
if resample_noise_flag:
self.assign_resampling()
# Putting it all together
self.w = tf.math.add(self.w_mu, tf.math.multiply(self.w_si, self.w_eps))
self.b = tf.math.add(self.b_mu, tf.math.multiply(self.b_si, self.q_eps))
return self.activation(tf.linalg.matmul(inputs, self.w) + self.b)
def assign_resampling(self):
self.p_eps = self.f(self.resample_noise([self.in_shape[-1], 1]))
self.q_eps = self.f(self.resample_noise([1, self.out_units]))
self.w_eps = self.p_eps * self.q_eps # Cartesian product of input_noise x output_noise
def resample_noise(self, shape):
return tf.random.normal(shape, mean=0.0, stddev=1.0, seed=None, name=None)
def f(self, x):
return tf.math.multiply(tf.math.sign(x), tf.math.sqrt(tf.math.abs(x)))
frames_input = tf.ones((1, 84, 84, 4)) # Toy input
conv1 = keras.layers.Conv2D(16, (8, 8), strides=(4, 4), activation="relu")(frames_input)
conv2 = keras.layers.Conv2D(32, (4, 4), strides=(2, 2), activation="relu")(conv1)
flattened = keras.layers.Flatten()(conv2)
actionspace_size = 4
# NoisyNet
hidden = NoisyLayer(activation=tf.nn.relu)(inputs=flattened, resample_noise_flag=True)
output = NoisyLayer(in_shape=(1,256), out_units=actionspace_size)(inputs=hidden, resample_noise_flag=True)
actions_input = tf.ones((1,actionspace_size))
print('hidden:\n', hidden)
print('output:\n', output)
print('actions_input:\n', actions_input)
filtered_output = keras.layers.merge.Multiply()([output, actions_input])
当我 运行 代码时,输出如下所示:
hidden:
Tensor("noisy_layer_5/Relu:0", shape=(1, 256), dtype=float32)
output:
Tensor("noisy_layer_6/Identity:0", shape=(1, 4), dtype=float32)
actions_input:
Tensor("ones_5:0", shape=(1, 4), dtype=float32)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-f6df621eacab> in <module>()
68 print('actions_input:\n', actions_input)
69
---> 70 filtered_output = keras.layers.merge.Multiply()([output, actions_input])
2 frames
/usr/local/lib/python3.6/dist-packages/keras/layers/merge.py in _compute_elemwise_op_output_shape(self, shape1, shape2)
59 raise ValueError('Operands could not be broadcast '
60 'together with shapes ' +
---> 61 str(shape1) + ' ' + str(shape2))
62 output_shape.append(i)
63 return tuple(output_shape)
ValueError: Operands could not be broadcast together with shapes (2592,) (4,)
特别是,我想知道 Operands could not be broadcast together with shapes (2592,) (4,)
中的数字 2592
从何而来,因为该数字与扁平输入张量 flattened
到第一个噪声层的长度一致,但是 - 在我看来 - 不再是第二个噪声层 output
的输出维度的一部分,它又作为上面指出的错误行的输入。
有谁知道出了什么问题吗?
提前致谢,丹尼尔
如custom layer document所述,您需要实施compute_output_shape(input_shape)
方法:
compute_output_shape(input_shape)
: in case your layer modifies the shape of its input, you should specify here the shape transformation logic. This allows Keras to do automatic shape inference.
当您不应用此方法时,Keras 无法在不实际执行计算的情况下进行形状推断。
print(keras.backend.int_shape(hidden))
print(keras.backend.int_shape(output))
(1, 2592)
(1, 2592)
所以需要添加如下:
def compute_output_shape(self, input_shape):
return (input_shape[0], self.out_units)
另外,build()
方法最后必须设置self.built = True
,根据文档调用super(NoisyLayer, self).build(input_shape)
即可。