How to solve "ValueError: Unable to unpack value [] as a tf.compat.v1.GraphDef"

Question

我正在学习 TensorFlow_Federated 的教程：custom_federated_algorithms_2。当我只复制和运行教程的代码时一切正常。所以我想自己改代码以便更熟悉tff。然后bug出现了。

我的运行时间环境：

python: 3.8.12

张量流：2.5.0

tensorflow_federated: 0.19.0

以下代码为教程中测试模型的原代码：

MODEL_SPEC = collections.OrderedDict(
    weights=tf.TensorSpec(shape=[784, 10], dtype=tf.float32),
    bias=tf.TensorSpec(shape=[10], dtype=tf.float32))
MODEL_TYPE = tff.to_type(MODEL_SPEC)
print(MODEL_TYPE) # <weights=float32[784,10],bias=float32[10]>


BATCH_SPEC = collections.OrderedDict(
    x=tf.TensorSpec(shape=[None, 784], dtype=tf.float32),
    y=tf.TensorSpec(shape=[None], dtype=tf.int32)
)
BATCH_TYPE = tff.to_type(BATCH_SPEC)
print(BATCH_TYPE) # <x=float32[?,784],y=int32[?]>

然后我将 MODEL_TYPE 更改为：

MODEL_SPEC = collections.OrderedDict(
    fc1=tf.TensorSpec(shape=[784, 256], dtype=tf.float32),
    b1=tf.TensorSpec(shape=[256], dtype=tf.float32),
    fc2=tf.TensorSpec(shape=[256, 128], dtype=tf.float32),
    b2=tf.TensorSpec(shape=[128], dtype=tf.float32),
    fc3=tf.TensorSpec(shape=[128, 10], dtype=tf.float32),
    b3=tf.TensorSpec(shape=[10], dtype=tf.float32)
)
MODEL_TYPE = tff.to_type(MODEL_SPEC)

由于模型结构改变，forward pass的过程也需要改变：

# original
@tf.function
def forward_pass(model, batch):
  predicted_y = tf.nn.softmax(
      tf.matmul(batch['x'], model['weights']) + model['bias'])
  return -tf.reduce_mean(
      tf.reduce_sum(
          tf.one_hot(batch['y'], 10) * tf.math.log(predicted_y), axis=[1]))

@tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
  return forward_pass(model, batch)

# new 
@tf.function
def forward(model, batch):
    logits = batch["x"] @ model["fc1"] + model["b1"]

    logits = logits @ model["fc2"] + model["b2"]

    logits = logits @ model["fc3"] + model["b3"]

    logits = tf.nn.softmax(logits, axis=-1,)
    
    one_hot_y = tf.one_hot(batch["y"], depth=10)
    return -tf.reduce_mean(tf.reduce_sum(tf.math.log(logits) * one_hot_y, axis=[1]))


@tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
    return forward(model, batch)

我没有更改 batch_train() 代码。

@tff.tf_computation(MODEL_TYPE, BATCH_TYPE, tf.float32)
def batch_train(initial_model, batch, learning_rate):
  # Define a group of model variables and set them to `initial_model`. Must
  # be defined outside the @tf.function.
  model_vars = collections.OrderedDict([
      (name, tf.Variable(name=name, initial_value=value))
      for name, value in initial_model.items()
  ])
  optimizer = tf.keras.optimizers.SGD(learning_rate)

  @tf.function
  def _train_on_batch(model_vars, batch):
    # Perform one step of gradient descent using loss from `batch_loss`.
    with tf.GradientTape() as tape:
      loss = forward_pass(model_vars, batch)
    grads = tape.gradient(loss, model_vars)
    optimizer.apply_gradients(
        zip(tf.nest.flatten(grads), tf.nest.flatten(model_vars)))
    return model_vars

  return _train_on_batch(model_vars, batch)

目前为止一切正常。但是在执行 local_train() 部分时，即使我只是使用原始代码也会出现错误。

initial_model = collections.OrderedDict(
    fc1=tf.zeros([784, 256]),
    b1=tf.zeros([256]),
    fc2=tf.zeros([256,128]),
    b2=tf.zeros([128]),
    fc3=tf.zeros([128, 10]),
    b3=tf.zeros([10])
)
LOCAL_DATA_TYPE = tff.SequenceType(BATCH_TYPE)

@tff.federated_computation(MODEL_TYPE, tf.float32, LOCAL_DATA_TYPE)
def local_train(initial_model, learning_rate, all_batches):

  @tff.tf_computation(LOCAL_DATA_TYPE, tf.float32)
  def _insert_learning_rate_to_sequence(dataset, learning_rate):
    return dataset.map(lambda x: (x, learning_rate))

  batches_with_learning_rate = _insert_learning_rate_to_sequence(all_batches, learning_rate)

  # Mapping function to apply to each batch.
  @tff.federated_computation(MODEL_TYPE, batches_with_learning_rate.type_signature.element)
  def batch_fn(model, batch_with_lr):
    batch, lr = batch_with_lr
    return batch_train(model, batch, lr)

  return tff.sequence_reduce(batches_with_learning_rate, initial_model, batch_fn)

locally_trained_model = local_train(initial_model, 1e-1, mnist_train_dataset[5])
# ValueError: Unable to unpack value [] as a tf.compat.v1.GraphDef

Answer 1

我在快速浏览（没有筛选所有粘贴的代码）时注意到的一个问题是这一行：

  return batch_train(model, batch, lr)

要从 tff.federated_computation 的上下文中调用 tff.tf_computation，您需要使用 tff.federated_map 运算符。所以它看起来像

  return tff.federated_map(batch_train, (model, batch, lr))

Answer 2

终于发现我犯了low-level个错误‍♂️ 这是我在我的自定义 jupyter notebook 上编码的，但忘记在开始的教程中添加以下关键代码：

executor_factory = tff.framework.local_executor_factory(
    support_sequence_ops=True

)
execution_context = tff.framework.ExecutionContext(
    executor_fn=executor_factory
)
tff.framework.set_default_context(execution_context)

How to solve "ValueError: Unable to unpack value [] as a tf.compat.v1.GraphDef"

How to solve "ValueError: Unable to unpack value [] as a tf.compat.v1.GraphDef"

python

tensorflow

tensorflow-federated