Google 云 TPU:混合不同 tf.distribute.Strategy
Google cloud TPU: mixing different tf.distribute.Strategy
我正在使用 Talos 和 Google colab TPU 到 运行 超参数调整 Keras 模型。请注意,我使用的是 Tensorflow 2.0.0 和 Keras 2.2.4-tf.
# pip install --upgrade tensorflow
# pip install --upgrade --force-reinstall tensorflow-gpu
import os
import tensorflow as tf
import talos as ta
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
tf.compat.v1.disable_eager_execution()
def iris_model(x_train, y_train, x_val, y_val, params):
# Specify a distributed strategy to use TPU
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_host(resolver.master())
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
# Use the strategy to create and compile a Keras model
with strategy.scope():
model = Sequential()
model.add(Dense(32, input_dim=4, activation=params['activation']))
model.add(Dense(3, activation='softmax'))
model.compile(optimizer=params['optimizer'], loss=params['losses'])
# Convert the train set to a Dataset to use TPU
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
dataset = dataset.cache().shuffle(1000, reshuffle_each_iteration=True).repeat().batch(params['batch_size'], drop_remainder=True)
# Fit the Keras model on the dataset
out = model.fit(dataset,
batch_size=params['batch_size'],
epochs=params['epochs'],
validation_data=[x_val, y_val],
verbose=0,
steps_per_epoch=4)
return out, model
x, y = ta.templates.datasets.iris()
# Create a hyperparameter distributions
p = {'activation': ['relu', 'elu'],
'optimizer': ['Nadam', 'Adam'],
'losses': ['logcosh'],
'batch_size': (20, 50, 5),
'epochs': [10, 20]}
# Use Talos to scan the best hyperparameters of the Keras model
scan_object = ta.Scan(x, y, model=iris_model, params=p, fraction_limit=0.1, experiment_name='first_test')
使用 tf.data.Dataset 将训练集转换为数据集后,使用 out = [=30 拟合模型时出现以下错误=]:
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/distribute/distribute_lib.py in _wrong_strategy_scope(strategy, context)
218 raise RuntimeError(
219 "Mixing different tf.distribute.Strategy objects: %s is not %s" %
--> 220 (context.strategy, strategy))
221
222
RuntimeError: Mixing different tf.distribute.Strategy objects: <tensorflow.python.distribute.tpu_strategy.TPUStrategy object at 0x7f9886506c50> is not <tensorflow.python.distribute.tpu_strategy.TPUStrategy object at 0x7f988aa04080>
TensorFlow 2.0.0 版本不支持 TPU 训练。不过,这在 TensorFlow 2.2 中应该不是问题。我对您的代码进行了一些小修复,并在 Colab 上将其发布到 运行:
- 使用Talos 1.0 (
pip install git+https://github.com/autonomio/talos@1.0
)
- 将
tf.config.experimental_connect_to_host(resolver.master())
替换为tf.config.experimental_connect_to_cluster(resolver)
- 使用 tf.data.Dataset 作为验证数据。
%tensorflow_version 2.x
import os
import tensorflow as tf
import talos as ta
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
def iris_model(x_train, y_train, x_val, y_val, params):
# Specify a distributed strategy to use TPU
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
# Use the strategy to create and compile a Keras model
with strategy.scope():
model = Sequential()
model.add(Dense(32, input_dim=4, activation=params['activation']))
model.add(Dense(3, activation='softmax'))
model.compile(optimizer=params['optimizer'], loss=params['losses'])
# Convert the train set to a Dataset to use TPU
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
dataset = dataset.cache().shuffle(1000, reshuffle_each_iteration=True).repeat().batch(params['batch_size'], drop_remainder=True)
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(params['batch_size'], drop_remainder=True)
# Fit the Keras model on the dataset
out = model.fit(dataset,
batch_size=params['batch_size'],
epochs=params['epochs'],
validation_data=val_dataset,
verbose=0,
steps_per_epoch=4)
return out, model
我正在使用 Talos 和 Google colab TPU 到 运行 超参数调整 Keras 模型。请注意,我使用的是 Tensorflow 2.0.0 和 Keras 2.2.4-tf.
# pip install --upgrade tensorflow
# pip install --upgrade --force-reinstall tensorflow-gpu
import os
import tensorflow as tf
import talos as ta
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
tf.compat.v1.disable_eager_execution()
def iris_model(x_train, y_train, x_val, y_val, params):
# Specify a distributed strategy to use TPU
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_host(resolver.master())
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
# Use the strategy to create and compile a Keras model
with strategy.scope():
model = Sequential()
model.add(Dense(32, input_dim=4, activation=params['activation']))
model.add(Dense(3, activation='softmax'))
model.compile(optimizer=params['optimizer'], loss=params['losses'])
# Convert the train set to a Dataset to use TPU
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
dataset = dataset.cache().shuffle(1000, reshuffle_each_iteration=True).repeat().batch(params['batch_size'], drop_remainder=True)
# Fit the Keras model on the dataset
out = model.fit(dataset,
batch_size=params['batch_size'],
epochs=params['epochs'],
validation_data=[x_val, y_val],
verbose=0,
steps_per_epoch=4)
return out, model
x, y = ta.templates.datasets.iris()
# Create a hyperparameter distributions
p = {'activation': ['relu', 'elu'],
'optimizer': ['Nadam', 'Adam'],
'losses': ['logcosh'],
'batch_size': (20, 50, 5),
'epochs': [10, 20]}
# Use Talos to scan the best hyperparameters of the Keras model
scan_object = ta.Scan(x, y, model=iris_model, params=p, fraction_limit=0.1, experiment_name='first_test')
使用 tf.data.Dataset 将训练集转换为数据集后,使用 out = [=30 拟合模型时出现以下错误=]:
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/distribute/distribute_lib.py in _wrong_strategy_scope(strategy, context)
218 raise RuntimeError(
219 "Mixing different tf.distribute.Strategy objects: %s is not %s" %
--> 220 (context.strategy, strategy))
221
222
RuntimeError: Mixing different tf.distribute.Strategy objects: <tensorflow.python.distribute.tpu_strategy.TPUStrategy object at 0x7f9886506c50> is not <tensorflow.python.distribute.tpu_strategy.TPUStrategy object at 0x7f988aa04080>
TensorFlow 2.0.0 版本不支持 TPU 训练。不过,这在 TensorFlow 2.2 中应该不是问题。我对您的代码进行了一些小修复,并在 Colab 上将其发布到 运行:
- 使用Talos 1.0 (
pip install git+https://github.com/autonomio/talos@1.0
) - 将
tf.config.experimental_connect_to_host(resolver.master())
替换为tf.config.experimental_connect_to_cluster(resolver)
- 使用 tf.data.Dataset 作为验证数据。
%tensorflow_version 2.x
import os
import tensorflow as tf
import talos as ta
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
def iris_model(x_train, y_train, x_val, y_val, params):
# Specify a distributed strategy to use TPU
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
# Use the strategy to create and compile a Keras model
with strategy.scope():
model = Sequential()
model.add(Dense(32, input_dim=4, activation=params['activation']))
model.add(Dense(3, activation='softmax'))
model.compile(optimizer=params['optimizer'], loss=params['losses'])
# Convert the train set to a Dataset to use TPU
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
dataset = dataset.cache().shuffle(1000, reshuffle_each_iteration=True).repeat().batch(params['batch_size'], drop_remainder=True)
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(params['batch_size'], drop_remainder=True)
# Fit the Keras model on the dataset
out = model.fit(dataset,
batch_size=params['batch_size'],
epochs=params['epochs'],
validation_data=val_dataset,
verbose=0,
steps_per_epoch=4)
return out, model