optimizer.apply_gradients 不更新 TF 2.0 中的权重
optimizer.apply_gradients does not update the weights in TF 2.0
我在 TensorFlow 2.0 中有一个 UNet 实现。代码运行良好,没有任何错误,但 custom_loss
的值保持不变。
首先,持有模型的 UNet class,
class UNet( object ):
def __init__ ( self ):
def get_weight(shape, name):
return tf.Variable( tf.random.normal( shape ), name=name, trainable=True , dtype=tf.float32 )
shapes = [
[3, 3, 3, 16],
[3, 3, 16, 16],
[3, 3, 16, 32],
[3, 3, 32, 32],
[3, 3, 32, 64],
[3, 3, 64, 64],
[3, 3, 64, 128],
[3, 3, 128, 128],
[3, 3, 128, 256],
[3, 3, 256, 256],
[3, 3, 128, 384],
[3, 3, 128, 128],
[3, 3, 64, 192],
[3, 3, 64, 64],
[3, 3, 32, 96],
[3, 3, 32, 32],
[3, 3, 16, 48],
[3, 3, 16, 16],
[1, 1, 16, 1],
]
self.weights = []
for i in range( len( shapes ) ) :
self.weights.append( get_weight( shapes[i] , 'weight{}'.format( i ) ) )
self.padding = 'SAME'
def conv2d_down( self , inputs, filters, stride_size):
out = tf.nn.conv2d(inputs, filters, strides=[1, stride_size, stride_size, 1], padding=self.padding)
return tf.nn.leaky_relu(out, alpha=0.2)
def maxpool_down( self, inputs, pool_size, stride_size):
return tf.nn.max_pool2d(inputs, ksize=[1, pool_size, pool_size, 1], padding='VALID',
strides=[1, stride_size, stride_size, 1])
def conv2d_up( self, inputs, filters, stride_size, output_shape):
out = tf.nn.conv2d_transpose(inputs, filters, output_shape=output_shape,
strides=[1, stride_size, stride_size, 1], padding=self.padding)
return tf.nn.leaky_relu(out, alpha=0.2)
def maxpool_up( self , inputs, size):
in_dimen = tf.shape(inputs)[1]
out_dimen = tf.cast(tf.round(in_dimen * size), dtype=tf.int32)
return tf.image.resize(inputs, [out_dimen, out_dimen], method='nearest')
def __call__( self , x ):
c1 = self.conv2d_down( x , self.weights[0], stride_size=1)
c1 = self.conv2d_down(c1, self.weights[1], stride_size=1)
p1 = self.maxpool_down(c1, pool_size=2, stride_size=2)
c2 = self.conv2d_down(p1, self.weights[2], stride_size=1)
c2 = self.conv2d_down(c2, self.weights[3], stride_size=1)
p2 = self.maxpool_down(c2, pool_size=2, stride_size=2)
c3 = self.conv2d_down(p2, self.weights[4], stride_size=1)
c3 = self.conv2d_down(c3, self.weights[5], stride_size=1)
p3 = self.maxpool_down(c3, pool_size=2, stride_size=2)
c4 = self.conv2d_down(p3, self.weights[6], stride_size=1)
c4 = self.conv2d_down(c4, self.weights[7], stride_size=1)
p4 = self.maxpool_down(c4, pool_size=2, stride_size=2)
c5 = self.conv2d_down(p4, self.weights[8], stride_size=1)
c5 = self.conv2d_down(c5, self.weights[9], stride_size=1)
p5 = self.maxpool_up(c5, 2)
concat_1 = tf.concat([p5, c4], axis=-1)
c6 = self.conv2d_up(concat_1, self.weights[10], stride_size=1, output_shape=[1, 16, 16, 128])
c6 = self.conv2d_up(c6, self.weights[11], stride_size=1, output_shape=[1, 16, 16, 128])
p6 = self.maxpool_up(c6, 2)
concat_2 = tf.concat([p6, c3], axis=-1)
c7 = self.conv2d_up(concat_2, self.weights[12], stride_size=1, output_shape=[1, 32, 32, 64])
c7 = self.conv2d_up(c7, self.weights[13], stride_size=1, output_shape=[1, 32, 32, 64])
p7 = self.maxpool_up(c7, 2)
concat_3 = tf.concat([p7, c2], axis=-1)
c8 = self.conv2d_up(concat_3, self.weights[14], stride_size=1, output_shape=[1, 64, 64, 32])
c8 = self.conv2d_up(c8, self.weights[15], stride_size=1, output_shape=[1, 64, 64, 32])
p8 = self.maxpool_up(c8, 2)
concat_4 = tf.concat([p8, c1], axis=-1)
c9 = self.conv2d_up(concat_4, self.weights[16], stride_size=1, output_shape=[1, 128, 128, 16])
c9 = self.conv2d_up(c9, self.weights[17], stride_size=1, output_shape=[1, 128, 128, 16])
output = tf.nn.conv2d(c9, self.weights[18], strides=[1, 1, 1, 1], padding=padding)
outputs = tf.nn.sigmoid(output)
return outputs
我已经为模型定义了损失函数和优化器。然后在循环中使用 train
方法,我正在更新权重。
loss = tf.losses.BinaryCrossentropy()
optimizer = tf.optimizers.Adam( learning_rate=0.0001 )
def train( model, inputs , outputs ):
with tf.GradientTape() as tape:
current_loss = loss( model( inputs ), outputs )
grads = tape.gradient( current_loss , model.weights )
optimizer.apply_gradients( zip( grads , weights ) )
print( current_loss )
sample_image = tf.Variable( tf.random.normal( [ 1 , 128 , 128 , 3 ]) )
target_image = tf.Variable( tf.random.normal( [ 1 , 128 , 128 , 1 ] ) )
model = UNet()
for i in range( 1000 ):
train( model , sample_image , target_image )
输出是,
tf.Tensor(3.9185588, shape=(), dtype=float32)
tf.Tensor(3.9185588, shape=(), dtype=float32)
tf.Tensor(3.9185588, shape=(), dtype=float32)
tf.Tensor(3.9185588, shape=(), dtype=float32)
tf.Tensor(3.9185588, shape=(), dtype=float32)
等等。 optimizer.apply_gradients
没有更新权重。
How can correctly update the model.weights
parameter? I suspect that the problem in some function which is not differentiable ( I am not sure though ).
所以,我自己解决了这个问题。问题是我使用 tf.random.normal
来初始化权重。当我用tf.initializers.glorot_uniform
的时候,loss开始变小
initializer = tf.initializers.glorot_uniform()
def get_weight(shape, name):
return tf.Variable( initializer( shape ), name=name, trainable=True , dtype=tf.float32 )
我在 TensorFlow 2.0 中有一个 UNet 实现。代码运行良好,没有任何错误,但 custom_loss
的值保持不变。
首先,持有模型的 UNet class,
class UNet( object ):
def __init__ ( self ):
def get_weight(shape, name):
return tf.Variable( tf.random.normal( shape ), name=name, trainable=True , dtype=tf.float32 )
shapes = [
[3, 3, 3, 16],
[3, 3, 16, 16],
[3, 3, 16, 32],
[3, 3, 32, 32],
[3, 3, 32, 64],
[3, 3, 64, 64],
[3, 3, 64, 128],
[3, 3, 128, 128],
[3, 3, 128, 256],
[3, 3, 256, 256],
[3, 3, 128, 384],
[3, 3, 128, 128],
[3, 3, 64, 192],
[3, 3, 64, 64],
[3, 3, 32, 96],
[3, 3, 32, 32],
[3, 3, 16, 48],
[3, 3, 16, 16],
[1, 1, 16, 1],
]
self.weights = []
for i in range( len( shapes ) ) :
self.weights.append( get_weight( shapes[i] , 'weight{}'.format( i ) ) )
self.padding = 'SAME'
def conv2d_down( self , inputs, filters, stride_size):
out = tf.nn.conv2d(inputs, filters, strides=[1, stride_size, stride_size, 1], padding=self.padding)
return tf.nn.leaky_relu(out, alpha=0.2)
def maxpool_down( self, inputs, pool_size, stride_size):
return tf.nn.max_pool2d(inputs, ksize=[1, pool_size, pool_size, 1], padding='VALID',
strides=[1, stride_size, stride_size, 1])
def conv2d_up( self, inputs, filters, stride_size, output_shape):
out = tf.nn.conv2d_transpose(inputs, filters, output_shape=output_shape,
strides=[1, stride_size, stride_size, 1], padding=self.padding)
return tf.nn.leaky_relu(out, alpha=0.2)
def maxpool_up( self , inputs, size):
in_dimen = tf.shape(inputs)[1]
out_dimen = tf.cast(tf.round(in_dimen * size), dtype=tf.int32)
return tf.image.resize(inputs, [out_dimen, out_dimen], method='nearest')
def __call__( self , x ):
c1 = self.conv2d_down( x , self.weights[0], stride_size=1)
c1 = self.conv2d_down(c1, self.weights[1], stride_size=1)
p1 = self.maxpool_down(c1, pool_size=2, stride_size=2)
c2 = self.conv2d_down(p1, self.weights[2], stride_size=1)
c2 = self.conv2d_down(c2, self.weights[3], stride_size=1)
p2 = self.maxpool_down(c2, pool_size=2, stride_size=2)
c3 = self.conv2d_down(p2, self.weights[4], stride_size=1)
c3 = self.conv2d_down(c3, self.weights[5], stride_size=1)
p3 = self.maxpool_down(c3, pool_size=2, stride_size=2)
c4 = self.conv2d_down(p3, self.weights[6], stride_size=1)
c4 = self.conv2d_down(c4, self.weights[7], stride_size=1)
p4 = self.maxpool_down(c4, pool_size=2, stride_size=2)
c5 = self.conv2d_down(p4, self.weights[8], stride_size=1)
c5 = self.conv2d_down(c5, self.weights[9], stride_size=1)
p5 = self.maxpool_up(c5, 2)
concat_1 = tf.concat([p5, c4], axis=-1)
c6 = self.conv2d_up(concat_1, self.weights[10], stride_size=1, output_shape=[1, 16, 16, 128])
c6 = self.conv2d_up(c6, self.weights[11], stride_size=1, output_shape=[1, 16, 16, 128])
p6 = self.maxpool_up(c6, 2)
concat_2 = tf.concat([p6, c3], axis=-1)
c7 = self.conv2d_up(concat_2, self.weights[12], stride_size=1, output_shape=[1, 32, 32, 64])
c7 = self.conv2d_up(c7, self.weights[13], stride_size=1, output_shape=[1, 32, 32, 64])
p7 = self.maxpool_up(c7, 2)
concat_3 = tf.concat([p7, c2], axis=-1)
c8 = self.conv2d_up(concat_3, self.weights[14], stride_size=1, output_shape=[1, 64, 64, 32])
c8 = self.conv2d_up(c8, self.weights[15], stride_size=1, output_shape=[1, 64, 64, 32])
p8 = self.maxpool_up(c8, 2)
concat_4 = tf.concat([p8, c1], axis=-1)
c9 = self.conv2d_up(concat_4, self.weights[16], stride_size=1, output_shape=[1, 128, 128, 16])
c9 = self.conv2d_up(c9, self.weights[17], stride_size=1, output_shape=[1, 128, 128, 16])
output = tf.nn.conv2d(c9, self.weights[18], strides=[1, 1, 1, 1], padding=padding)
outputs = tf.nn.sigmoid(output)
return outputs
我已经为模型定义了损失函数和优化器。然后在循环中使用 train
方法,我正在更新权重。
loss = tf.losses.BinaryCrossentropy()
optimizer = tf.optimizers.Adam( learning_rate=0.0001 )
def train( model, inputs , outputs ):
with tf.GradientTape() as tape:
current_loss = loss( model( inputs ), outputs )
grads = tape.gradient( current_loss , model.weights )
optimizer.apply_gradients( zip( grads , weights ) )
print( current_loss )
sample_image = tf.Variable( tf.random.normal( [ 1 , 128 , 128 , 3 ]) )
target_image = tf.Variable( tf.random.normal( [ 1 , 128 , 128 , 1 ] ) )
model = UNet()
for i in range( 1000 ):
train( model , sample_image , target_image )
输出是,
tf.Tensor(3.9185588, shape=(), dtype=float32)
tf.Tensor(3.9185588, shape=(), dtype=float32)
tf.Tensor(3.9185588, shape=(), dtype=float32)
tf.Tensor(3.9185588, shape=(), dtype=float32)
tf.Tensor(3.9185588, shape=(), dtype=float32)
等等。 optimizer.apply_gradients
没有更新权重。
How can correctly update the
model.weights
parameter? I suspect that the problem in some function which is not differentiable ( I am not sure though ).
所以,我自己解决了这个问题。问题是我使用 tf.random.normal
来初始化权重。当我用tf.initializers.glorot_uniform
的时候,loss开始变小
initializer = tf.initializers.glorot_uniform()
def get_weight(shape, name):
return tf.Variable( initializer( shape ), name=name, trainable=True , dtype=tf.float32 )