如何在 tensorflow federated 中保存模型
How to save model in tensorflow federated
blow代码中如何保存模型
如需运行代码,请访问https://github.com/tensorflow/federated
并下载 federated_learning_for_image_classification.ipynb.
如果您告诉我如何在教程中保存联邦学习模型,我将不胜感激federated_learning_for_image_classification.ipynb。
from __future__ import absolute_import, division, print_function
import tensorflow_federated as tff
from matplotlib import pyplot as plt
import tensorflow as tf
import six
import numpy as np
from six.moves import range
import warnings
import collections
import nest_asyncio
import h5py_character
from tensorflow.keras import layers
nest_asyncio.apply()
warnings.simplefilter('ignore')
tf.compat.v1.enable_v2_behavior()
np.random.seed(0)
NUM_CLIENTS = 1
NUM_EPOCHS = 1
BATCH_SIZE = 20
SHUFFLE_BUFFER = 500
num_classes = 3755
if six.PY3:
tff.framework.set_default_executor(
tff.framework.create_local_executor(NUM_CLIENTS))
data_train = h5py_character.load_characters_data()
print(len(data_train.client_ids))
example_dataset = data_train.create_tf_dataset_for_client(
data_train.client_ids[0])
def preprocess(dataset):
def element_fn(element):
# element['data'] = tf.expand_dims(element['data'], axis=-1)
return collections.OrderedDict([
# ('x', tf.reshape(element['data'], [-1])),
('x', tf.reshape(element['data'], [64, 64, 1])),
('y', tf.reshape(element['label'], [1])),
])
return dataset.repeat(NUM_EPOCHS).map(element_fn).shuffle(
SHUFFLE_BUFFER).batch(BATCH_SIZE)
preprocessed_example_dataset = preprocess(example_dataset)
print(iter(preprocessed_example_dataset).next())
sample_batch = tf.nest.map_structure(
lambda x: x.numpy(), iter(preprocessed_example_dataset).next())
def make_federated_data(client_data, client_ids):
return [preprocess(client_data.create_tf_dataset_for_client(x))
for x in client_ids]
sample_clients = data_train.client_ids[0:NUM_CLIENTS]
federated_train_data = make_federated_data(data_train, sample_clients)
def create_compiled_keras_model():
model = tf.keras.Sequential([
layers.Conv2D(input_shape=(64, 64, 1), filters=64, kernel_size=(3, 3), strides=(1, 1),
padding='same', activation='relu'),
layers.MaxPool2D(pool_size=(2, 2), padding='same'),
layers.Conv2D(filters=128, kernel_size=(3, 3), padding='same'),
layers.MaxPool2D(pool_size=(2, 2), padding='same'),
layers.Conv2D(filters=256, kernel_size=(3, 3), padding='same'),
layers.MaxPool2D(pool_size=(2, 2), padding='same'),
layers.Flatten(),
layers.Dense(1024, activation='relu'),
layers.Dense(3755, activation='softmax')
])
model.compile(
optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
# metrics=['accuracy'])
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
return model
def model_fn():
keras_model = create_compiled_keras_model()
global model_to_save
model_to_save = keras_model
print(keras_model.summary())
return tff.learning.from_compiled_keras_model(keras_model, sample_batch)
iterative_process = tff.learning.build_federated_averaging_process(model_fn)
state = iterative_process.initialize()
state, metrics = iterative_process.next(state, federated_train_data)
print('round 1, metrics={}'.format(metrics))
for round_num in range(2, 110):
state, metrics = iterative_process.next(state, federated_train_data)
print('round {:2d}, metrics={}'.format(round_num, metrics))
粗略地说,我们将使用对象 here 及其 save_checkpoint
/load_checkpoint
方法。特别是,你可以实例化一个FileCheckpointManager
,并要求它直接保存state
(几乎)。
state
在您的示例中是 tff.python.common_libs.anonymous_tuple.AnonymousTuple
(IIRC) 的一个实例,它与 tf.convert_to_tensor
不兼容,这是 save_checkpoint
所需要的,并在其声明中声明文档字符串。 TFF 研究代码中经常使用的一般解决方案是引入一个 Python attr
s class 以在返回状态后立即从匿名元组转换出去——参见 here举个例子。
假设以上,下面的草图应该有效:
# state assumed an anonymous tuple, previously created
# N some integer
ckpt_manager = FileCheckpointManager(...)
ckpt_manager.save_checkpoint(ServerState.from_anon_tuple(state), round_num=N)
要从此检查点恢复,您可以随时调用:
state = iterative_process.initialize()
ckpt_manager = FileCheckpointManager(...)
restored_state = ckpt_manager.load_latest_checkpoint(
ServerState.from_anon_tuple(state))
需要注意一点:上面链接的代码指针一般都在tff.python.research...
里面,pip包里没有;所以获取它们的首选方法是将代码分叉到您自己的项目中,或者拉下 repo 并从源代码构建它。
感谢您对 TFF 的关注!
model.save_weights 不能解决这个问题吗?我知道 FileCheckpointManager 会做更完整的工作(每轮捕获权重)但我想就最终联合平均模型而言,参数 space 应该在 save_weights.[=10 中可用=]
您可以在
中使用classFileCheckpointManager
但是,TFF 的发布版本 (v0.18.0) 不支持此 class。您应该将此文件复制到您的项目目录,以便您可以导入 FileCheckpointManager
.
'''
# PASTE YOUR CODE BEFORE HERE
# Required:
iterative_process = tff.learning.build_federated_averaging_process(model_fn)
state = iterative_process.initialize()
'''
from checkpoint_manager import FileCheckpointManager
fcm = FileCheckpointManager('checkpoint/')
# Save model
round_num = 110 # It depends on rounds you have trained
fcm.save_checkpoint(state, round_num)
# Load model
state, round_num = fcm.load_latest_checkpoint(state)
state, metrics = iterative_process.next(state, federated_train_data)
blow代码中如何保存模型
如需运行代码,请访问https://github.com/tensorflow/federated 并下载 federated_learning_for_image_classification.ipynb.
如果您告诉我如何在教程中保存联邦学习模型,我将不胜感激federated_learning_for_image_classification.ipynb。
from __future__ import absolute_import, division, print_function
import tensorflow_federated as tff
from matplotlib import pyplot as plt
import tensorflow as tf
import six
import numpy as np
from six.moves import range
import warnings
import collections
import nest_asyncio
import h5py_character
from tensorflow.keras import layers
nest_asyncio.apply()
warnings.simplefilter('ignore')
tf.compat.v1.enable_v2_behavior()
np.random.seed(0)
NUM_CLIENTS = 1
NUM_EPOCHS = 1
BATCH_SIZE = 20
SHUFFLE_BUFFER = 500
num_classes = 3755
if six.PY3:
tff.framework.set_default_executor(
tff.framework.create_local_executor(NUM_CLIENTS))
data_train = h5py_character.load_characters_data()
print(len(data_train.client_ids))
example_dataset = data_train.create_tf_dataset_for_client(
data_train.client_ids[0])
def preprocess(dataset):
def element_fn(element):
# element['data'] = tf.expand_dims(element['data'], axis=-1)
return collections.OrderedDict([
# ('x', tf.reshape(element['data'], [-1])),
('x', tf.reshape(element['data'], [64, 64, 1])),
('y', tf.reshape(element['label'], [1])),
])
return dataset.repeat(NUM_EPOCHS).map(element_fn).shuffle(
SHUFFLE_BUFFER).batch(BATCH_SIZE)
preprocessed_example_dataset = preprocess(example_dataset)
print(iter(preprocessed_example_dataset).next())
sample_batch = tf.nest.map_structure(
lambda x: x.numpy(), iter(preprocessed_example_dataset).next())
def make_federated_data(client_data, client_ids):
return [preprocess(client_data.create_tf_dataset_for_client(x))
for x in client_ids]
sample_clients = data_train.client_ids[0:NUM_CLIENTS]
federated_train_data = make_federated_data(data_train, sample_clients)
def create_compiled_keras_model():
model = tf.keras.Sequential([
layers.Conv2D(input_shape=(64, 64, 1), filters=64, kernel_size=(3, 3), strides=(1, 1),
padding='same', activation='relu'),
layers.MaxPool2D(pool_size=(2, 2), padding='same'),
layers.Conv2D(filters=128, kernel_size=(3, 3), padding='same'),
layers.MaxPool2D(pool_size=(2, 2), padding='same'),
layers.Conv2D(filters=256, kernel_size=(3, 3), padding='same'),
layers.MaxPool2D(pool_size=(2, 2), padding='same'),
layers.Flatten(),
layers.Dense(1024, activation='relu'),
layers.Dense(3755, activation='softmax')
])
model.compile(
optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
# metrics=['accuracy'])
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
return model
def model_fn():
keras_model = create_compiled_keras_model()
global model_to_save
model_to_save = keras_model
print(keras_model.summary())
return tff.learning.from_compiled_keras_model(keras_model, sample_batch)
iterative_process = tff.learning.build_federated_averaging_process(model_fn)
state = iterative_process.initialize()
state, metrics = iterative_process.next(state, federated_train_data)
print('round 1, metrics={}'.format(metrics))
for round_num in range(2, 110):
state, metrics = iterative_process.next(state, federated_train_data)
print('round {:2d}, metrics={}'.format(round_num, metrics))
粗略地说,我们将使用对象 here 及其 save_checkpoint
/load_checkpoint
方法。特别是,你可以实例化一个FileCheckpointManager
,并要求它直接保存state
(几乎)。
state
在您的示例中是 tff.python.common_libs.anonymous_tuple.AnonymousTuple
(IIRC) 的一个实例,它与 tf.convert_to_tensor
不兼容,这是 save_checkpoint
所需要的,并在其声明中声明文档字符串。 TFF 研究代码中经常使用的一般解决方案是引入一个 Python attr
s class 以在返回状态后立即从匿名元组转换出去——参见 here举个例子。
假设以上,下面的草图应该有效:
# state assumed an anonymous tuple, previously created
# N some integer
ckpt_manager = FileCheckpointManager(...)
ckpt_manager.save_checkpoint(ServerState.from_anon_tuple(state), round_num=N)
要从此检查点恢复,您可以随时调用:
state = iterative_process.initialize()
ckpt_manager = FileCheckpointManager(...)
restored_state = ckpt_manager.load_latest_checkpoint(
ServerState.from_anon_tuple(state))
需要注意一点:上面链接的代码指针一般都在tff.python.research...
里面,pip包里没有;所以获取它们的首选方法是将代码分叉到您自己的项目中,或者拉下 repo 并从源代码构建它。
感谢您对 TFF 的关注!
model.save_weights 不能解决这个问题吗?我知道 FileCheckpointManager 会做更完整的工作(每轮捕获权重)但我想就最终联合平均模型而言,参数 space 应该在 save_weights.[=10 中可用=]
您可以在
中使用classFileCheckpointManager
但是,TFF 的发布版本 (v0.18.0) 不支持此 class。您应该将此文件复制到您的项目目录,以便您可以导入 FileCheckpointManager
.
'''
# PASTE YOUR CODE BEFORE HERE
# Required:
iterative_process = tff.learning.build_federated_averaging_process(model_fn)
state = iterative_process.initialize()
'''
from checkpoint_manager import FileCheckpointManager
fcm = FileCheckpointManager('checkpoint/')
# Save model
round_num = 110 # It depends on rounds you have trained
fcm.save_checkpoint(state, round_num)
# Load model
state, round_num = fcm.load_latest_checkpoint(state)
state, metrics = iterative_process.next(state, federated_train_data)