How to solve "ValueError: Unable to unpack value [] as a tf.compat.v1.GraphDef"
How to solve "ValueError: Unable to unpack value [] as a tf.compat.v1.GraphDef"
我正在学习 TensorFlow_Federated 的教程:custom_federated_algorithms_2。当我只复制和 运行 教程的代码时一切正常。所以我想自己改代码以便更熟悉tff。然后bug出现了。
我的运行时间环境:
python: 3.8.12
张量流:2.5.0
tensorflow_federated: 0.19.0
以下代码为教程中测试模型的原代码:
MODEL_SPEC = collections.OrderedDict(
weights=tf.TensorSpec(shape=[784, 10], dtype=tf.float32),
bias=tf.TensorSpec(shape=[10], dtype=tf.float32))
MODEL_TYPE = tff.to_type(MODEL_SPEC)
print(MODEL_TYPE) # <weights=float32[784,10],bias=float32[10]>
BATCH_SPEC = collections.OrderedDict(
x=tf.TensorSpec(shape=[None, 784], dtype=tf.float32),
y=tf.TensorSpec(shape=[None], dtype=tf.int32)
)
BATCH_TYPE = tff.to_type(BATCH_SPEC)
print(BATCH_TYPE) # <x=float32[?,784],y=int32[?]>
然后我将 MODEL_TYPE
更改为:
MODEL_SPEC = collections.OrderedDict(
fc1=tf.TensorSpec(shape=[784, 256], dtype=tf.float32),
b1=tf.TensorSpec(shape=[256], dtype=tf.float32),
fc2=tf.TensorSpec(shape=[256, 128], dtype=tf.float32),
b2=tf.TensorSpec(shape=[128], dtype=tf.float32),
fc3=tf.TensorSpec(shape=[128, 10], dtype=tf.float32),
b3=tf.TensorSpec(shape=[10], dtype=tf.float32)
)
MODEL_TYPE = tff.to_type(MODEL_SPEC)
由于模型结构改变,forward pass的过程也需要改变:
# original
@tf.function
def forward_pass(model, batch):
predicted_y = tf.nn.softmax(
tf.matmul(batch['x'], model['weights']) + model['bias'])
return -tf.reduce_mean(
tf.reduce_sum(
tf.one_hot(batch['y'], 10) * tf.math.log(predicted_y), axis=[1]))
@tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
return forward_pass(model, batch)
# new
@tf.function
def forward(model, batch):
logits = batch["x"] @ model["fc1"] + model["b1"]
logits = logits @ model["fc2"] + model["b2"]
logits = logits @ model["fc3"] + model["b3"]
logits = tf.nn.softmax(logits, axis=-1,)
one_hot_y = tf.one_hot(batch["y"], depth=10)
return -tf.reduce_mean(tf.reduce_sum(tf.math.log(logits) * one_hot_y, axis=[1]))
@tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
return forward(model, batch)
我没有更改 batch_train()
代码。
@tff.tf_computation(MODEL_TYPE, BATCH_TYPE, tf.float32)
def batch_train(initial_model, batch, learning_rate):
# Define a group of model variables and set them to `initial_model`. Must
# be defined outside the @tf.function.
model_vars = collections.OrderedDict([
(name, tf.Variable(name=name, initial_value=value))
for name, value in initial_model.items()
])
optimizer = tf.keras.optimizers.SGD(learning_rate)
@tf.function
def _train_on_batch(model_vars, batch):
# Perform one step of gradient descent using loss from `batch_loss`.
with tf.GradientTape() as tape:
loss = forward_pass(model_vars, batch)
grads = tape.gradient(loss, model_vars)
optimizer.apply_gradients(
zip(tf.nest.flatten(grads), tf.nest.flatten(model_vars)))
return model_vars
return _train_on_batch(model_vars, batch)
目前为止一切正常。但是在执行 local_train()
部分时,即使我只是使用原始代码也会出现错误。
initial_model = collections.OrderedDict(
fc1=tf.zeros([784, 256]),
b1=tf.zeros([256]),
fc2=tf.zeros([256,128]),
b2=tf.zeros([128]),
fc3=tf.zeros([128, 10]),
b3=tf.zeros([10])
)
LOCAL_DATA_TYPE = tff.SequenceType(BATCH_TYPE)
@tff.federated_computation(MODEL_TYPE, tf.float32, LOCAL_DATA_TYPE)
def local_train(initial_model, learning_rate, all_batches):
@tff.tf_computation(LOCAL_DATA_TYPE, tf.float32)
def _insert_learning_rate_to_sequence(dataset, learning_rate):
return dataset.map(lambda x: (x, learning_rate))
batches_with_learning_rate = _insert_learning_rate_to_sequence(all_batches, learning_rate)
# Mapping function to apply to each batch.
@tff.federated_computation(MODEL_TYPE, batches_with_learning_rate.type_signature.element)
def batch_fn(model, batch_with_lr):
batch, lr = batch_with_lr
return batch_train(model, batch, lr)
return tff.sequence_reduce(batches_with_learning_rate, initial_model, batch_fn)
locally_trained_model = local_train(initial_model, 1e-1, mnist_train_dataset[5])
# ValueError: Unable to unpack value [] as a tf.compat.v1.GraphDef
我在快速浏览(没有筛选所有粘贴的代码)时注意到的一个问题是这一行:
return batch_train(model, batch, lr)
要从 tff.federated_computation
的上下文中调用 tff.tf_computation
,您需要使用 tff.federated_map
运算符。所以它看起来像
return tff.federated_map(batch_train, (model, batch, lr))
终于发现我犯了low-level个错误♂️
这是我在我的自定义 jupyter notebook 上编码的,但忘记在开始的教程中添加以下关键代码:
executor_factory = tff.framework.local_executor_factory(
support_sequence_ops=True
)
execution_context = tff.framework.ExecutionContext(
executor_fn=executor_factory
)
tff.framework.set_default_context(execution_context)
我正在学习 TensorFlow_Federated 的教程:custom_federated_algorithms_2。当我只复制和 运行 教程的代码时一切正常。所以我想自己改代码以便更熟悉tff。然后bug出现了。
我的运行时间环境:
python: 3.8.12
张量流:2.5.0
tensorflow_federated: 0.19.0
以下代码为教程中测试模型的原代码:
MODEL_SPEC = collections.OrderedDict(
weights=tf.TensorSpec(shape=[784, 10], dtype=tf.float32),
bias=tf.TensorSpec(shape=[10], dtype=tf.float32))
MODEL_TYPE = tff.to_type(MODEL_SPEC)
print(MODEL_TYPE) # <weights=float32[784,10],bias=float32[10]>
BATCH_SPEC = collections.OrderedDict(
x=tf.TensorSpec(shape=[None, 784], dtype=tf.float32),
y=tf.TensorSpec(shape=[None], dtype=tf.int32)
)
BATCH_TYPE = tff.to_type(BATCH_SPEC)
print(BATCH_TYPE) # <x=float32[?,784],y=int32[?]>
然后我将 MODEL_TYPE
更改为:
MODEL_SPEC = collections.OrderedDict(
fc1=tf.TensorSpec(shape=[784, 256], dtype=tf.float32),
b1=tf.TensorSpec(shape=[256], dtype=tf.float32),
fc2=tf.TensorSpec(shape=[256, 128], dtype=tf.float32),
b2=tf.TensorSpec(shape=[128], dtype=tf.float32),
fc3=tf.TensorSpec(shape=[128, 10], dtype=tf.float32),
b3=tf.TensorSpec(shape=[10], dtype=tf.float32)
)
MODEL_TYPE = tff.to_type(MODEL_SPEC)
由于模型结构改变,forward pass的过程也需要改变:
# original
@tf.function
def forward_pass(model, batch):
predicted_y = tf.nn.softmax(
tf.matmul(batch['x'], model['weights']) + model['bias'])
return -tf.reduce_mean(
tf.reduce_sum(
tf.one_hot(batch['y'], 10) * tf.math.log(predicted_y), axis=[1]))
@tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
return forward_pass(model, batch)
# new
@tf.function
def forward(model, batch):
logits = batch["x"] @ model["fc1"] + model["b1"]
logits = logits @ model["fc2"] + model["b2"]
logits = logits @ model["fc3"] + model["b3"]
logits = tf.nn.softmax(logits, axis=-1,)
one_hot_y = tf.one_hot(batch["y"], depth=10)
return -tf.reduce_mean(tf.reduce_sum(tf.math.log(logits) * one_hot_y, axis=[1]))
@tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
return forward(model, batch)
我没有更改 batch_train()
代码。
@tff.tf_computation(MODEL_TYPE, BATCH_TYPE, tf.float32)
def batch_train(initial_model, batch, learning_rate):
# Define a group of model variables and set them to `initial_model`. Must
# be defined outside the @tf.function.
model_vars = collections.OrderedDict([
(name, tf.Variable(name=name, initial_value=value))
for name, value in initial_model.items()
])
optimizer = tf.keras.optimizers.SGD(learning_rate)
@tf.function
def _train_on_batch(model_vars, batch):
# Perform one step of gradient descent using loss from `batch_loss`.
with tf.GradientTape() as tape:
loss = forward_pass(model_vars, batch)
grads = tape.gradient(loss, model_vars)
optimizer.apply_gradients(
zip(tf.nest.flatten(grads), tf.nest.flatten(model_vars)))
return model_vars
return _train_on_batch(model_vars, batch)
目前为止一切正常。但是在执行 local_train()
部分时,即使我只是使用原始代码也会出现错误。
initial_model = collections.OrderedDict(
fc1=tf.zeros([784, 256]),
b1=tf.zeros([256]),
fc2=tf.zeros([256,128]),
b2=tf.zeros([128]),
fc3=tf.zeros([128, 10]),
b3=tf.zeros([10])
)
LOCAL_DATA_TYPE = tff.SequenceType(BATCH_TYPE)
@tff.federated_computation(MODEL_TYPE, tf.float32, LOCAL_DATA_TYPE)
def local_train(initial_model, learning_rate, all_batches):
@tff.tf_computation(LOCAL_DATA_TYPE, tf.float32)
def _insert_learning_rate_to_sequence(dataset, learning_rate):
return dataset.map(lambda x: (x, learning_rate))
batches_with_learning_rate = _insert_learning_rate_to_sequence(all_batches, learning_rate)
# Mapping function to apply to each batch.
@tff.federated_computation(MODEL_TYPE, batches_with_learning_rate.type_signature.element)
def batch_fn(model, batch_with_lr):
batch, lr = batch_with_lr
return batch_train(model, batch, lr)
return tff.sequence_reduce(batches_with_learning_rate, initial_model, batch_fn)
locally_trained_model = local_train(initial_model, 1e-1, mnist_train_dataset[5])
# ValueError: Unable to unpack value [] as a tf.compat.v1.GraphDef
我在快速浏览(没有筛选所有粘贴的代码)时注意到的一个问题是这一行:
return batch_train(model, batch, lr)
要从 tff.federated_computation
的上下文中调用 tff.tf_computation
,您需要使用 tff.federated_map
运算符。所以它看起来像
return tff.federated_map(batch_train, (model, batch, lr))
终于发现我犯了low-level个错误♂️ 这是我在我的自定义 jupyter notebook 上编码的,但忘记在开始的教程中添加以下关键代码:
executor_factory = tff.framework.local_executor_factory(
support_sequence_ops=True
)
execution_context = tff.framework.ExecutionContext(
executor_fn=executor_factory
)
tff.framework.set_default_context(execution_context)