使用 tf.GradientTape 的变分自动编码器
Varational autoencoder using tf.GradientTape
下面是keras为典型的变分自动编码器提供的tensorflow GradientTape的例子:
train_step function is implemented inside the model and it is trained with the "model.fit()"。该示例执行得很好,完全没有问题。
但是,对于另一个应用程序,我需要在模型定义的 之外实现train_step 函数。一开始,我从上面提到的例子开始,因为目标应用程序也是一种 VAE。因此,我应用了一些修改并尝试训练相同的模型结构;请在接下来找到完整的代码;但是,与原始代码相比,我得到的损失值非常奇怪;即使经过几次迭代,它也会得到 nan 的损失值。
你能告诉我错误是什么以及为什么会这样吗?
提前致谢
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow import keras
import numpy as np
print(tf.test.is_gpu_available()) # prints True
print(tf.__version__) # prints '2.0.0-beta1'
class Sampling(layers.Layer):
"""Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
latent_dim = 2
encoder_inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
x = layers.Dense(7 * 7 * 64, activation="relu")(z)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
model = keras.Model(encoder_inputs, [decoder_outputs, z_mean, z_log_var] , name="decoder")
model.summary()
optimizer = tf.keras.optimizers.Adam(lr=0.001)
objective = tf.keras.losses.SparseCategoricalCrossentropy()
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
num_samples = x_train.shape[0]
epochs=1
batch_size=128
@tf.function
def train_step(data):
with tf.GradientTape() as tape:
reconstruction, z_mean, z_log_var = model(data, training=True)
data = tf.expand_dims(data, axis=-1)
reconstruction_loss = tf.reduce_mean(
tf.reduce_sum(keras.losses.binary_crossentropy(data, reconstruction), axis=(1, 2)))
kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
total_loss = (reconstruction_loss + kl_loss)
grads = tape.gradient(total_loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
return total_loss, reconstruction_loss, kl_loss
with tf.device('gpu:0'):
for epoch in range (epochs):
for step in range(num_samples//batch_size):
s = step*batch_size
e = s+batch_size
x_batch = x_train[s:e,:,:]
total_loss, reconstruction_loss, kl_loss = train_step(x_batch)
print("-----------------")
print(f"epoch: {epoch} step: {step}")
print(f"reconstruction_loss: {reconstruction_loss} ")
print(f"kl_loss: {kl_loss} ")
print(f"total_loss: {total_loss}")
我认为您忘记了按照您所指的教程中所示规范化数据:
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
否则,您的代码似乎 运行 没问题,损失不 nan。以下是参考代码:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow import keras
import numpy as np
print(tf.test.is_gpu_available()) # prints True
print(tf.__version__) # prints '2.0.0-beta1'
class Sampling(layers.Layer):
"""Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
latent_dim = 2
encoder_inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
x = layers.Dense(7 * 7 * 64, activation="relu")(z)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
model = keras.Model(encoder_inputs, [decoder_outputs, z_mean, z_log_var] , name="decoder")
model.summary()
optimizer = tf.keras.optimizers.Adam(lr=0.001)
objective = tf.keras.losses.SparseCategoricalCrossentropy()
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
num_samples = x_train.shape[0]
epochs=4
batch_size=128
@tf.function
def train_step(data):
with tf.GradientTape() as tape:
reconstruction, z_mean, z_log_var = model(data, training=True)
reconstruction_loss = tf.reduce_mean(
tf.reduce_sum(keras.losses.binary_crossentropy(data, reconstruction), axis=(1, 2)))
kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))/batch_size
kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))/batch_size
total_loss = (reconstruction_loss + kl_loss)
grads = tape.gradient(total_loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
return total_loss, reconstruction_loss, kl_loss
with tf.device('gpu:0'):
for epoch in range (epochs):
for step in range(num_samples//batch_size):
s = step*batch_size
e = s+batch_size
x_batch = x_train[s:e,:,:, tf.newaxis]
print(x_batch.shape)
total_loss, reconstruction_loss, kl_loss = train_step(x_batch)
print("-----------------")
print(f"epoch: {epoch} step: {step}")
print(f"reconstruction_loss: {reconstruction_loss} ")
print(f"kl_loss: {kl_loss} ")
print(f"total_loss: {total_loss}")
下面是keras为典型的变分自动编码器提供的tensorflow GradientTape的例子:
train_step function is implemented inside the model and it is trained with the "model.fit()"。该示例执行得很好,完全没有问题。
但是,对于另一个应用程序,我需要在模型定义的 之外实现train_step 函数。一开始,我从上面提到的例子开始,因为目标应用程序也是一种 VAE。因此,我应用了一些修改并尝试训练相同的模型结构;请在接下来找到完整的代码;但是,与原始代码相比,我得到的损失值非常奇怪;即使经过几次迭代,它也会得到 nan 的损失值。 你能告诉我错误是什么以及为什么会这样吗?
提前致谢
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow import keras
import numpy as np
print(tf.test.is_gpu_available()) # prints True
print(tf.__version__) # prints '2.0.0-beta1'
class Sampling(layers.Layer):
"""Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
latent_dim = 2
encoder_inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
x = layers.Dense(7 * 7 * 64, activation="relu")(z)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
model = keras.Model(encoder_inputs, [decoder_outputs, z_mean, z_log_var] , name="decoder")
model.summary()
optimizer = tf.keras.optimizers.Adam(lr=0.001)
objective = tf.keras.losses.SparseCategoricalCrossentropy()
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
num_samples = x_train.shape[0]
epochs=1
batch_size=128
@tf.function
def train_step(data):
with tf.GradientTape() as tape:
reconstruction, z_mean, z_log_var = model(data, training=True)
data = tf.expand_dims(data, axis=-1)
reconstruction_loss = tf.reduce_mean(
tf.reduce_sum(keras.losses.binary_crossentropy(data, reconstruction), axis=(1, 2)))
kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
total_loss = (reconstruction_loss + kl_loss)
grads = tape.gradient(total_loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
return total_loss, reconstruction_loss, kl_loss
with tf.device('gpu:0'):
for epoch in range (epochs):
for step in range(num_samples//batch_size):
s = step*batch_size
e = s+batch_size
x_batch = x_train[s:e,:,:]
total_loss, reconstruction_loss, kl_loss = train_step(x_batch)
print("-----------------")
print(f"epoch: {epoch} step: {step}")
print(f"reconstruction_loss: {reconstruction_loss} ")
print(f"kl_loss: {kl_loss} ")
print(f"total_loss: {total_loss}")
我认为您忘记了按照您所指的教程中所示规范化数据:
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
否则,您的代码似乎 运行 没问题,损失不 nan。以下是参考代码:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow import keras
import numpy as np
print(tf.test.is_gpu_available()) # prints True
print(tf.__version__) # prints '2.0.0-beta1'
class Sampling(layers.Layer):
"""Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
latent_dim = 2
encoder_inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
x = layers.Dense(7 * 7 * 64, activation="relu")(z)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
model = keras.Model(encoder_inputs, [decoder_outputs, z_mean, z_log_var] , name="decoder")
model.summary()
optimizer = tf.keras.optimizers.Adam(lr=0.001)
objective = tf.keras.losses.SparseCategoricalCrossentropy()
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
num_samples = x_train.shape[0]
epochs=4
batch_size=128
@tf.function
def train_step(data):
with tf.GradientTape() as tape:
reconstruction, z_mean, z_log_var = model(data, training=True)
reconstruction_loss = tf.reduce_mean(
tf.reduce_sum(keras.losses.binary_crossentropy(data, reconstruction), axis=(1, 2)))
kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))/batch_size
kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))/batch_size
total_loss = (reconstruction_loss + kl_loss)
grads = tape.gradient(total_loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
return total_loss, reconstruction_loss, kl_loss
with tf.device('gpu:0'):
for epoch in range (epochs):
for step in range(num_samples//batch_size):
s = step*batch_size
e = s+batch_size
x_batch = x_train[s:e,:,:, tf.newaxis]
print(x_batch.shape)
total_loss, reconstruction_loss, kl_loss = train_step(x_batch)
print("-----------------")
print(f"epoch: {epoch} step: {step}")
print(f"reconstruction_loss: {reconstruction_loss} ")
print(f"kl_loss: {kl_loss} ")
print(f"total_loss: {total_loss}")