TensorFlow 1.x: TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'
TensorFlow 1.x: TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'
我是 TensorFlow 的新手。我在 TensorFlow 1.x
中制作了以下神经网络
import tensorflow as tf
import numpy as np
import tflearn
class ActorNetwork(object):
"""
Input to the network is the state, output is the action
under a deterministic policy.
The output layer activation is a tanh to keep the action
between -action_bound and action_bound
"""
def __init__(self, sess, state_dim, action_dim, action_bound, learning_rate, tau, batch_size):
self.sess = sess
self.s_dim = state_dim
self.a_dim = action_dim
self.action_bound = action_bound
self.learning_rate = learning_rate
self.tau = tau
self.batch_size = batch_size
# Actor Network
self.inputs, self.out, self.scaled_out = self.create_actor_network()
self.network_params = tf.trainable_variables()
# Target Network
self.target_inputs, self.target_out, self.target_scaled_out = self.create_actor_network()
self.target_network_params = tf.trainable_variables()[
len(self.network_params):]
# Op for periodically updating target network with online network
# weights
self.update_target_network_params = \
[self.target_network_params[i].assign(tf.multiply(self.network_params[i], self.tau) +
tf.multiply(self.target_network_params[i], 1. - self.tau))
for i in range(len(self.target_network_params))]
# This gradient will be provided by the critic network
self.action_gradient = tf.placeholder(tf.float32, [None, self.a_dim])
# Combine the gradients here
self.unnormalized_actor_gradients = tf.gradients(
self.scaled_out, self.network_params, -self.action_gradient)
self.actor_gradients = list(map(lambda x: tf.math.divide(x, self.batch_size), self.unnormalized_actor_gradients))
# Optimization Op
self.optimize = tf.train.AdamOptimizer(self.learning_rate).\
apply_gradients(zip(self.actor_gradients, self.network_params))
self.num_trainable_vars = len(
self.network_params) + len(self.target_network_params)
def create_actor_network(self):
inputs = tflearn.input_data(shape=[None, self.s_dim])
net = tflearn.fully_connected(inputs, 400)
net = tflearn.layers.normalization.batch_normalization(net)
net = tflearn.activations.relu(net)
net = tflearn.fully_connected(net, 300)
net = tflearn.layers.normalization.batch_normalization(net)
net = tflearn.activations.relu(net)
# Final layer weights are init to Uniform[-3e-3, 3e-3]
w_init = tflearn.initializations.uniform(minval=-0.003, maxval=0.003)
out = tflearn.fully_connected(
net, self.a_dim, activation='tanh', weights_init=w_init)
# Scale output to -action_bound to action_bound
scaled_out = tf.multiply(out, self.action_bound)
return inputs, out, scaled_out
def train(self, inputs, a_gradient):
self.sess.run(self.optimize, feed_dict={
self.inputs: inputs,
self.action_gradient: a_gradient
})
def predict(self, inputs):
return self.sess.run(self.scaled_out, feed_dict={
self.inputs: inputs
})
def predict_target(self, inputs):
return self.sess.run(self.target_scaled_out, feed_dict={
self.target_inputs: inputs
})
def update_target_network(self):
self.sess.run(self.update_target_network_params)
def get_num_trainable_vars(self):
return self.num_trainable_vars
当我调用它时,它没有给出任何错误,但是在第二次它给出了错误。例如
with tf.Session() as sess:
actor1 = ActorNetwork(sess, 1, 2, 1, 0.01, 0.003, 200)
actor2 = ActorNetwork(sess, 1, 2, 1, 0.01, 0.003, 200)
我只收到 actor2 的以下错误:
TypeError:/ 的操作数类型不受支持:'NoneType' 和 'int'
与lambda函数中的None值有关。但是,为什么第一次没有报错呢?
编辑:堆栈跟踪:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-2-2323bc1d5028> in <module>()
1 with tf.Session() as sess:
2 actor1 = ActorNetwork(sess, 1, 2, 1, 0.01, 0.003, 200)
----> 3 actor2 = ActorNetwork(sess, 1, 2, 1, 0.01, 0.003, 200)
3 frames
<ipython-input-1-895268594a81> in __init__(self, sess, state_dim, action_dim, action_bound, learning_rate, tau, batch_size)
48 self.unnormalized_actor_gradients = tf.gradients(
49 self.scaled_out, self.network_params, -self.action_gradient)
---> 50 self.actor_gradients = list(map(lambda x: tf.math.divide(x, self.batch_size), self.unnormalized_actor_gradients))
51
52 # Optimization Op
<ipython-input-1-895268594a81> in <lambda>(x)
48 self.unnormalized_actor_gradients = tf.gradients(
49 self.scaled_out, self.network_params, -self.action_gradient)
---> 50 self.actor_gradients = list(map(lambda x: tf.math.divide(x, self.batch_size), self.unnormalized_actor_gradients))
51
52 # Optimization Op
/tensorflow-1.15.2/python3.6/tensorflow_core/python/util/dispatch.py in wrapper(*args, **kwargs)
178 """Call target, and fall back on dispatchers if there is a TypeError."""
179 try:
--> 180 return target(*args, **kwargs)
181 except (TypeError, ValueError):
182 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/tensorflow-1.15.2/python3.6/tensorflow_core/python/ops/math_ops.py in divide(x, y, name)
323 return DivideDelegateWithName(x, name) / y
324 else:
--> 325 return x / y
326
327
TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'
TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'
EDIT-2:根据建议,我在 TF 2.x 中写道。这实际上消除了错误。但是这两个网络是一样的吗?
class ActorNetwork(object):
def __init__(self, state_dim, action_dim, action_bound, learning_rate, tau, batch_size):
self.state_dim = state_dim
self.action_dim = action_dim
self.action_bound = action_bound
self.learning_rate = learning_rate
self.tau = tau
self.batch_size = batch_size
self.optimizer = tf.keras.optimizers.Adam(self.learning_rate)
#actor network
self.inputs, self.out, self.scaled_out = self.create_actor_network()
self.actor_model = keras.Model(inputs=self.inputs, outputs=self.scaled_out, name='actor_network')
self.network_params = self.actor_model.trainable_variables
#target actor network
self.target_inputs, self.target_out, self.target_scaled_out = self.create_actor_network()
self.target_actor_model = keras.Model(inputs=self.target_inputs, outputs=self.target_scaled_out, name='target_actor_network')
self.target_network_params = self.target_actor_model.trainable_variables
def create_actor_network(self):
inputs = Input(shape = (self.state_dim,), batch_size = None, name = "actor_input_state")
net = layers.Dense(400, name = 'actor_dense_1a')(inputs)
net = layers.BatchNormalization()(net)
net = layers.Activation(activation=tf.nn.relu)(net)
net = layers.Dense(300, name = 'actor_dense_1b')(net)
net = layers.BatchNormalization()(net)
net = layers.Activation(activation=tf.nn.relu)(net)
# net = layers.Dense(20, name = 'actor_dense_1c')(net)
# net = layers.BatchNormalization()(net)
# net = layers.Activation(activation=tf.nn.relu)(net)
# net = layers.Dense(10, name = 'actor_dense_1d')(net)
# net = layers.BatchNormalization()(net)
# net = layers.Activation(activation=tf.nn.relu)(net)
w_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003, seed=None)
out = layers.Dense(self.action_dim, activation='tanh', name = 'actor_dense_2', kernel_initializer = w_init)(net)
scaled_out = tf.multiply(out, self.action_bound, name = "actions_scaling")
return inputs, out, scaled_out
def update_target_network(self):
self.update_target_network_params = [self.target_network_params[i].assign(tf.multiply(self.network_params[i], self.tau) + tf.multiply(self.target_network_params[i], 1-self.tau)) for i in range(len(self.target_network_params))]
def train(self, inputs, a_gradient):
with tf.GradientTape() as self.tape:
self.prediction = self.actor_model(inputs)
self.unnormalized_actor_gradients = self.tape.gradient(self.prediction, self.network_params, output_gradients = -a_gradient)
self.actor_gradients = list(map(lambda x: tf.math.divide(x, self.batch_size), self.unnormalized_actor_gradients))
self.optimizer.apply_gradients(zip(self.actor_gradients, self.network_params))
def predict(self, inputs):
return self.actor_model(inputs)
def predict_target(self, inputs):
return self.target_actor_model(inputs)
你的问题在这里:
self.network_params = tf.trainable_variables()
这也绝对会给您带来麻烦:
self.target_network_params = tf.trainable_variables()[
len(self.network_params):]
问题是您在同一个 TensorFlow 图中创建了两个模型。当您获得 tf.trainable_variables()
时,您将获得图中所有可训练的变量。第一次,如果您刚刚使用 self.create_actor_network()
创建的模型,这些只是变量。但是第二次,它将包含第二个 ActorNetwork
和 第一个变量的变量。显然,第一个网络的变量和第二个网络的输出之间没有梯度,因此 tf.gradients
产生一些 None
结果,然后导致错误。最简单的解决方案是将每个网络放在不同的图中,例如:
with tf.Graph().as_default() as graph1, tf.Session() as sess1:
actor1 = ActorNetwork(sess1, 1, 2, 1, 0.01, 0.003, 200)
with tf.Graph().as_default() as graph2, tf.Session() as sess2:
actor2 = ActorNetwork(sess2, 1, 2, 1, 0.01, 0.003, 200)
您也可以从 class 内部执行此操作,这会更安全,但您无法事先创建会话:
def __init__(self, state_dim, action_dim, action_bound, learning_rate, tau, batch_size):
self.graph = tf.Graph()
with self.graph.as_default(): # Add at the beginning of all methods
self.sess = tf.Session()
# ...
但是,如果您希望在同一个图中同时使用两个模型,则需要进一步更改代码以避免使用 `tf.trainable_variables。例如,您可以自己跟踪变量:
def create_actor_network(self):
all_vars = [] # list of model variables
inputs = tflearn.input_data(shape=[None, self.s_dim])
net = tflearn.fully_connected(inputs, 400)
# Save layer variables
all_vars.append(net.W)
all_vars.append(net.b)
# ...
return inputs, out, scaled_out, all_vars # Return variable list
模块 tflearn.variables
提供了一些帮助程序来简化它,尽管没有太复杂。无论如何,如果可以避免的话,我建议不要使用 TFLearn,因为它无人维护并且已被 Keras 取代(您只需要使用 .weights
/ .trainable_weights
)。
我是 TensorFlow 的新手。我在 TensorFlow 1.x
中制作了以下神经网络import tensorflow as tf
import numpy as np
import tflearn
class ActorNetwork(object):
"""
Input to the network is the state, output is the action
under a deterministic policy.
The output layer activation is a tanh to keep the action
between -action_bound and action_bound
"""
def __init__(self, sess, state_dim, action_dim, action_bound, learning_rate, tau, batch_size):
self.sess = sess
self.s_dim = state_dim
self.a_dim = action_dim
self.action_bound = action_bound
self.learning_rate = learning_rate
self.tau = tau
self.batch_size = batch_size
# Actor Network
self.inputs, self.out, self.scaled_out = self.create_actor_network()
self.network_params = tf.trainable_variables()
# Target Network
self.target_inputs, self.target_out, self.target_scaled_out = self.create_actor_network()
self.target_network_params = tf.trainable_variables()[
len(self.network_params):]
# Op for periodically updating target network with online network
# weights
self.update_target_network_params = \
[self.target_network_params[i].assign(tf.multiply(self.network_params[i], self.tau) +
tf.multiply(self.target_network_params[i], 1. - self.tau))
for i in range(len(self.target_network_params))]
# This gradient will be provided by the critic network
self.action_gradient = tf.placeholder(tf.float32, [None, self.a_dim])
# Combine the gradients here
self.unnormalized_actor_gradients = tf.gradients(
self.scaled_out, self.network_params, -self.action_gradient)
self.actor_gradients = list(map(lambda x: tf.math.divide(x, self.batch_size), self.unnormalized_actor_gradients))
# Optimization Op
self.optimize = tf.train.AdamOptimizer(self.learning_rate).\
apply_gradients(zip(self.actor_gradients, self.network_params))
self.num_trainable_vars = len(
self.network_params) + len(self.target_network_params)
def create_actor_network(self):
inputs = tflearn.input_data(shape=[None, self.s_dim])
net = tflearn.fully_connected(inputs, 400)
net = tflearn.layers.normalization.batch_normalization(net)
net = tflearn.activations.relu(net)
net = tflearn.fully_connected(net, 300)
net = tflearn.layers.normalization.batch_normalization(net)
net = tflearn.activations.relu(net)
# Final layer weights are init to Uniform[-3e-3, 3e-3]
w_init = tflearn.initializations.uniform(minval=-0.003, maxval=0.003)
out = tflearn.fully_connected(
net, self.a_dim, activation='tanh', weights_init=w_init)
# Scale output to -action_bound to action_bound
scaled_out = tf.multiply(out, self.action_bound)
return inputs, out, scaled_out
def train(self, inputs, a_gradient):
self.sess.run(self.optimize, feed_dict={
self.inputs: inputs,
self.action_gradient: a_gradient
})
def predict(self, inputs):
return self.sess.run(self.scaled_out, feed_dict={
self.inputs: inputs
})
def predict_target(self, inputs):
return self.sess.run(self.target_scaled_out, feed_dict={
self.target_inputs: inputs
})
def update_target_network(self):
self.sess.run(self.update_target_network_params)
def get_num_trainable_vars(self):
return self.num_trainable_vars
当我调用它时,它没有给出任何错误,但是在第二次它给出了错误。例如
with tf.Session() as sess:
actor1 = ActorNetwork(sess, 1, 2, 1, 0.01, 0.003, 200)
actor2 = ActorNetwork(sess, 1, 2, 1, 0.01, 0.003, 200)
我只收到 actor2 的以下错误:
TypeError:/ 的操作数类型不受支持:'NoneType' 和 'int'
与lambda函数中的None值有关。但是,为什么第一次没有报错呢?
编辑:堆栈跟踪:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-2-2323bc1d5028> in <module>()
1 with tf.Session() as sess:
2 actor1 = ActorNetwork(sess, 1, 2, 1, 0.01, 0.003, 200)
----> 3 actor2 = ActorNetwork(sess, 1, 2, 1, 0.01, 0.003, 200)
3 frames
<ipython-input-1-895268594a81> in __init__(self, sess, state_dim, action_dim, action_bound, learning_rate, tau, batch_size)
48 self.unnormalized_actor_gradients = tf.gradients(
49 self.scaled_out, self.network_params, -self.action_gradient)
---> 50 self.actor_gradients = list(map(lambda x: tf.math.divide(x, self.batch_size), self.unnormalized_actor_gradients))
51
52 # Optimization Op
<ipython-input-1-895268594a81> in <lambda>(x)
48 self.unnormalized_actor_gradients = tf.gradients(
49 self.scaled_out, self.network_params, -self.action_gradient)
---> 50 self.actor_gradients = list(map(lambda x: tf.math.divide(x, self.batch_size), self.unnormalized_actor_gradients))
51
52 # Optimization Op
/tensorflow-1.15.2/python3.6/tensorflow_core/python/util/dispatch.py in wrapper(*args, **kwargs)
178 """Call target, and fall back on dispatchers if there is a TypeError."""
179 try:
--> 180 return target(*args, **kwargs)
181 except (TypeError, ValueError):
182 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/tensorflow-1.15.2/python3.6/tensorflow_core/python/ops/math_ops.py in divide(x, y, name)
323 return DivideDelegateWithName(x, name) / y
324 else:
--> 325 return x / y
326
327
TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'
TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'
EDIT-2:根据建议,我在 TF 2.x 中写道。这实际上消除了错误。但是这两个网络是一样的吗?
class ActorNetwork(object):
def __init__(self, state_dim, action_dim, action_bound, learning_rate, tau, batch_size):
self.state_dim = state_dim
self.action_dim = action_dim
self.action_bound = action_bound
self.learning_rate = learning_rate
self.tau = tau
self.batch_size = batch_size
self.optimizer = tf.keras.optimizers.Adam(self.learning_rate)
#actor network
self.inputs, self.out, self.scaled_out = self.create_actor_network()
self.actor_model = keras.Model(inputs=self.inputs, outputs=self.scaled_out, name='actor_network')
self.network_params = self.actor_model.trainable_variables
#target actor network
self.target_inputs, self.target_out, self.target_scaled_out = self.create_actor_network()
self.target_actor_model = keras.Model(inputs=self.target_inputs, outputs=self.target_scaled_out, name='target_actor_network')
self.target_network_params = self.target_actor_model.trainable_variables
def create_actor_network(self):
inputs = Input(shape = (self.state_dim,), batch_size = None, name = "actor_input_state")
net = layers.Dense(400, name = 'actor_dense_1a')(inputs)
net = layers.BatchNormalization()(net)
net = layers.Activation(activation=tf.nn.relu)(net)
net = layers.Dense(300, name = 'actor_dense_1b')(net)
net = layers.BatchNormalization()(net)
net = layers.Activation(activation=tf.nn.relu)(net)
# net = layers.Dense(20, name = 'actor_dense_1c')(net)
# net = layers.BatchNormalization()(net)
# net = layers.Activation(activation=tf.nn.relu)(net)
# net = layers.Dense(10, name = 'actor_dense_1d')(net)
# net = layers.BatchNormalization()(net)
# net = layers.Activation(activation=tf.nn.relu)(net)
w_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003, seed=None)
out = layers.Dense(self.action_dim, activation='tanh', name = 'actor_dense_2', kernel_initializer = w_init)(net)
scaled_out = tf.multiply(out, self.action_bound, name = "actions_scaling")
return inputs, out, scaled_out
def update_target_network(self):
self.update_target_network_params = [self.target_network_params[i].assign(tf.multiply(self.network_params[i], self.tau) + tf.multiply(self.target_network_params[i], 1-self.tau)) for i in range(len(self.target_network_params))]
def train(self, inputs, a_gradient):
with tf.GradientTape() as self.tape:
self.prediction = self.actor_model(inputs)
self.unnormalized_actor_gradients = self.tape.gradient(self.prediction, self.network_params, output_gradients = -a_gradient)
self.actor_gradients = list(map(lambda x: tf.math.divide(x, self.batch_size), self.unnormalized_actor_gradients))
self.optimizer.apply_gradients(zip(self.actor_gradients, self.network_params))
def predict(self, inputs):
return self.actor_model(inputs)
def predict_target(self, inputs):
return self.target_actor_model(inputs)
你的问题在这里:
self.network_params = tf.trainable_variables()
这也绝对会给您带来麻烦:
self.target_network_params = tf.trainable_variables()[
len(self.network_params):]
问题是您在同一个 TensorFlow 图中创建了两个模型。当您获得 tf.trainable_variables()
时,您将获得图中所有可训练的变量。第一次,如果您刚刚使用 self.create_actor_network()
创建的模型,这些只是变量。但是第二次,它将包含第二个 ActorNetwork
和 第一个变量的变量。显然,第一个网络的变量和第二个网络的输出之间没有梯度,因此 tf.gradients
产生一些 None
结果,然后导致错误。最简单的解决方案是将每个网络放在不同的图中,例如:
with tf.Graph().as_default() as graph1, tf.Session() as sess1:
actor1 = ActorNetwork(sess1, 1, 2, 1, 0.01, 0.003, 200)
with tf.Graph().as_default() as graph2, tf.Session() as sess2:
actor2 = ActorNetwork(sess2, 1, 2, 1, 0.01, 0.003, 200)
您也可以从 class 内部执行此操作,这会更安全,但您无法事先创建会话:
def __init__(self, state_dim, action_dim, action_bound, learning_rate, tau, batch_size):
self.graph = tf.Graph()
with self.graph.as_default(): # Add at the beginning of all methods
self.sess = tf.Session()
# ...
但是,如果您希望在同一个图中同时使用两个模型,则需要进一步更改代码以避免使用 `tf.trainable_variables。例如,您可以自己跟踪变量:
def create_actor_network(self):
all_vars = [] # list of model variables
inputs = tflearn.input_data(shape=[None, self.s_dim])
net = tflearn.fully_connected(inputs, 400)
# Save layer variables
all_vars.append(net.W)
all_vars.append(net.b)
# ...
return inputs, out, scaled_out, all_vars # Return variable list
模块 tflearn.variables
提供了一些帮助程序来简化它,尽管没有太复杂。无论如何,如果可以避免的话,我建议不要使用 TFLearn,因为它无人维护并且已被 Keras 取代(您只需要使用 .weights
/ .trainable_weights
)。