Tensorflow 2.0:最小化一个简单的函数
Tensorflow 2.0: minimize a simple function
import tensorflow as tf
import numpy as np
x = tf.Variable(2, name='x', trainable=True, dtype=tf.float32)
with tf.GradientTape() as t:
t.watch(x)
log_x = tf.math.log(x)
y = tf.math.square(log_x)
opt = tf.optimizers.Adam(0.5)
# train = opt.minimize(lambda: y, var_list=[x]) # FAILS
@tf.function
def f(x):
log_x = tf.math.log(x)
y = tf.math.square(log_x)
return y
yy = f(x)
train = opt.minimize(lambda: yy, var_list=[x]) # ALSO FAILS
收益率值错误:
No gradients provided for any variable: ['x:0'].
这看起来像他们部分给出的例子。我不确定这是 eager 或 2.0 的错误还是我做错了什么。
更新:
由于存在一些问题和有趣的注释,因此在下面粘贴了解决方案的美化版本。
import numpy as np
import tensorflow as tf
x = tf.Variable(3, name='x', trainable=True, dtype=tf.float32)
with tf.GradientTape(persistent=True) as t:
# log_x = tf.math.log(x)
# y = tf.math.square(log_x)
y = (x - 1) ** 2
opt = tf.optimizers.Adam(learning_rate=0.001)
def get_gradient_wrong(x0):
# this does not work, it does not actually update the value of x
x.assign(x0)
return t.gradient(y, [x])
def get_gradient(x0):
# this works
x.assign(x0)
with tf.GradientTape(persistent=True) as t:
y = (x - 1) ** 2
return t.gradient(y, [x])
#### Option 1
def a(x0, tol=1e-8, max_iter=10000):
# does not appear to work properly
x.assign(x0)
err = np.Inf # step error (banach), not actual erro
i = 0
while err > tol:
x0 = x.numpy()
# IMPORTANT: WITHOUT THIS INSIDE THE LOOP THE GRADIENTS DO NOT UPDATE
with tf.GradientTape(persistent=True) as t:
y = (x - 1) ** 2
gradients = t.gradient(y, [x])
l = opt.apply_gradients(zip(gradients, [x]))
err = np.abs(x.numpy() - x0)
print(err, x.numpy(), gradients[0].numpy())
i += 1
if i > max_iter:
print(f'stopping at max_iter={max_iter}')
return x.numpy()
print(f'stopping at err={err}<{tol}')
return x.numpy()
#### Option 2
def b(x0, tol=1e-8, max_iter=10000):
x.assign(x0)
# To use minimize you have to define your loss computation as a funcction
def compute_loss():
log_x = tf.math.log(x)
y = tf.math.square(log_x)
return y
err = np.Inf # step error (banach), not actual erro
i = 0
while err > tol:
x0 = x.numpy()
train = opt.minimize(compute_loss, var_list=[x])
err = np.abs(x.numpy() - x0)
print(err, x.numpy())
i += 1
if i > max_iter:
print(f'stopping at max_iter={max_iter}')
return x.numpy()
print(f'stopping at err={err}<{tol}')
return x.numpy()
你做错了什么。您有两个选择:
使用磁带计算梯度
在这种情况下,您只需使用优化器来应用更新规则。
import tensorflow as tf
x = tf.Variable(2, name='x', trainable=True, dtype=tf.float32)
with tf.GradientTape() as t:
# no need to watch a variable:
# trainable variables are always watched
log_x = tf.math.log(x)
y = tf.math.square(log_x)
#### Option 1
# Is the tape that computes the gradients!
trainable_variables = [x]
gradients = t.gradient(y, trainable_variables)
# The optimize applies the update, using the variables
# and the optimizer update rule
opt.apply_gradients(zip(gradients, trainable_variables))
将损失定义为函数
在这种情况下,您可以使用优化器 .minimize
方法,这将创建磁带来计算梯度 + 为您更新参数
#### Option 2
# To use minimize you have to define your loss computation as a funcction
def compute_loss():
log_x = tf.math.log(x)
y = tf.math.square(log_x)
return y
train = opt.minimize(compute_loss, var_list=trainable_variables)
我也up-voted上面接受的解决方案,但我仍然需要一些时间来得到一个端到端的解决方案运行,所以让我也和你分享一下,代码正在解决一些简单的数学难题:
f(x)=x-(6/7)*x-1/7
g(x)=f(f(f(f(x))))
Find x such that g(x) == 0
!pip install setuptools --upgrade
!pip install -q tensorflow==2.0.0-beta1
import tensorflow as tf
import numpy as np
tf.__version__ #=> '2.0.0-beta1'
@tf.function
def f(x):
return x-(6/7)*x-1/7
print(tf.autograph.to_code(step.python_function))
x = tf.Variable(0, trainable=True, dtype=tf.float64)
y = tf.constant([0], dtype=tf.float64)
@tf.function
def g(x):
return f(f(f(f(x))))
print(tf.autograph.to_code(compute.python_function))
# Create a list of variables which needs to be adjusted during the training process, in this simple case it is only x
variables = [x]
# Instantiate a Gradient Decent Optimizer variant, it this case learning rate and specific type of optimizer doesn't matter too much
optimizer = tf.optimizers.Adam(0.5)
# We need to somehow specify the error between the actual value of the evaluated function in contrast to the target (which is zero)
loss_object = tf.keras.losses.MeanAbsoluteError()
# Since we are not running inside a TensorFlow execution graph anymore we need some means of keeping state of the gradient during training
# so a persistent GradientTape is your friend and the way to go in TensorFlow 2.0
with tf.GradientTape(persistent=True) as tape:
#Let's train for some iterations
for i in range(1000):
# given the actual value of X (which we now continueously adjust in order to find the root of the equation)
y_pred = g(x)
# At this point we are actually setting the whole equation to zero. Since X is variable, the goal is to find an X which satisfies the condition
# (that the whole equations becomes zero). We are doing this by defining a loss which becomes zero if y_pred approximates y. Or in other words,
# since y is zero, the loss becomes zero if y_pred approximates zero.
loss = loss_object(y,y_pred)
# Now the magic happens. Loss basically represents the error surface and is only dependent on X. So now let's compute the first derivative and
# see in which direction we need to adjust X in order to minimize the error and getting a value (output of the nested equations) closer to zero
grads = tape.gradient(loss, variables)
# Once we've found this magic number magically, let's update the value of X based on this magic number in order to perform better on the next
# iteration
optimizer.apply_gradients(zip(grads, variables))
# And now it's pretty cool, we can just print the current error (loss) and the actual value of X in each iteration. At the end of the training,
# we've found the optima wich a loss / error close to zero and a value of X close to 400 where 400 is the correct solution.
# Small deviations from the true solutions stem from numeric errors
print('Loss: {}, X: {}'.format(loss.numpy(), x.numpy()))
import tensorflow as tf
import numpy as np
x = tf.Variable(2, name='x', trainable=True, dtype=tf.float32)
with tf.GradientTape() as t:
t.watch(x)
log_x = tf.math.log(x)
y = tf.math.square(log_x)
opt = tf.optimizers.Adam(0.5)
# train = opt.minimize(lambda: y, var_list=[x]) # FAILS
@tf.function
def f(x):
log_x = tf.math.log(x)
y = tf.math.square(log_x)
return y
yy = f(x)
train = opt.minimize(lambda: yy, var_list=[x]) # ALSO FAILS
收益率值错误:
No gradients provided for any variable: ['x:0'].
这看起来像他们部分给出的例子。我不确定这是 eager 或 2.0 的错误还是我做错了什么。
更新:
由于存在一些问题和有趣的注释,因此在下面粘贴了解决方案的美化版本。
import numpy as np
import tensorflow as tf
x = tf.Variable(3, name='x', trainable=True, dtype=tf.float32)
with tf.GradientTape(persistent=True) as t:
# log_x = tf.math.log(x)
# y = tf.math.square(log_x)
y = (x - 1) ** 2
opt = tf.optimizers.Adam(learning_rate=0.001)
def get_gradient_wrong(x0):
# this does not work, it does not actually update the value of x
x.assign(x0)
return t.gradient(y, [x])
def get_gradient(x0):
# this works
x.assign(x0)
with tf.GradientTape(persistent=True) as t:
y = (x - 1) ** 2
return t.gradient(y, [x])
#### Option 1
def a(x0, tol=1e-8, max_iter=10000):
# does not appear to work properly
x.assign(x0)
err = np.Inf # step error (banach), not actual erro
i = 0
while err > tol:
x0 = x.numpy()
# IMPORTANT: WITHOUT THIS INSIDE THE LOOP THE GRADIENTS DO NOT UPDATE
with tf.GradientTape(persistent=True) as t:
y = (x - 1) ** 2
gradients = t.gradient(y, [x])
l = opt.apply_gradients(zip(gradients, [x]))
err = np.abs(x.numpy() - x0)
print(err, x.numpy(), gradients[0].numpy())
i += 1
if i > max_iter:
print(f'stopping at max_iter={max_iter}')
return x.numpy()
print(f'stopping at err={err}<{tol}')
return x.numpy()
#### Option 2
def b(x0, tol=1e-8, max_iter=10000):
x.assign(x0)
# To use minimize you have to define your loss computation as a funcction
def compute_loss():
log_x = tf.math.log(x)
y = tf.math.square(log_x)
return y
err = np.Inf # step error (banach), not actual erro
i = 0
while err > tol:
x0 = x.numpy()
train = opt.minimize(compute_loss, var_list=[x])
err = np.abs(x.numpy() - x0)
print(err, x.numpy())
i += 1
if i > max_iter:
print(f'stopping at max_iter={max_iter}')
return x.numpy()
print(f'stopping at err={err}<{tol}')
return x.numpy()
你做错了什么。您有两个选择:
使用磁带计算梯度
在这种情况下,您只需使用优化器来应用更新规则。
import tensorflow as tf
x = tf.Variable(2, name='x', trainable=True, dtype=tf.float32)
with tf.GradientTape() as t:
# no need to watch a variable:
# trainable variables are always watched
log_x = tf.math.log(x)
y = tf.math.square(log_x)
#### Option 1
# Is the tape that computes the gradients!
trainable_variables = [x]
gradients = t.gradient(y, trainable_variables)
# The optimize applies the update, using the variables
# and the optimizer update rule
opt.apply_gradients(zip(gradients, trainable_variables))
将损失定义为函数
在这种情况下,您可以使用优化器 .minimize
方法,这将创建磁带来计算梯度 + 为您更新参数
#### Option 2
# To use minimize you have to define your loss computation as a funcction
def compute_loss():
log_x = tf.math.log(x)
y = tf.math.square(log_x)
return y
train = opt.minimize(compute_loss, var_list=trainable_variables)
我也up-voted上面接受的解决方案,但我仍然需要一些时间来得到一个端到端的解决方案运行,所以让我也和你分享一下,代码正在解决一些简单的数学难题:
f(x)=x-(6/7)*x-1/7
g(x)=f(f(f(f(x))))
Find x such that g(x) == 0
!pip install setuptools --upgrade
!pip install -q tensorflow==2.0.0-beta1
import tensorflow as tf
import numpy as np
tf.__version__ #=> '2.0.0-beta1'
@tf.function
def f(x):
return x-(6/7)*x-1/7
print(tf.autograph.to_code(step.python_function))
x = tf.Variable(0, trainable=True, dtype=tf.float64)
y = tf.constant([0], dtype=tf.float64)
@tf.function
def g(x):
return f(f(f(f(x))))
print(tf.autograph.to_code(compute.python_function))
# Create a list of variables which needs to be adjusted during the training process, in this simple case it is only x
variables = [x]
# Instantiate a Gradient Decent Optimizer variant, it this case learning rate and specific type of optimizer doesn't matter too much
optimizer = tf.optimizers.Adam(0.5)
# We need to somehow specify the error between the actual value of the evaluated function in contrast to the target (which is zero)
loss_object = tf.keras.losses.MeanAbsoluteError()
# Since we are not running inside a TensorFlow execution graph anymore we need some means of keeping state of the gradient during training
# so a persistent GradientTape is your friend and the way to go in TensorFlow 2.0
with tf.GradientTape(persistent=True) as tape:
#Let's train for some iterations
for i in range(1000):
# given the actual value of X (which we now continueously adjust in order to find the root of the equation)
y_pred = g(x)
# At this point we are actually setting the whole equation to zero. Since X is variable, the goal is to find an X which satisfies the condition
# (that the whole equations becomes zero). We are doing this by defining a loss which becomes zero if y_pred approximates y. Or in other words,
# since y is zero, the loss becomes zero if y_pred approximates zero.
loss = loss_object(y,y_pred)
# Now the magic happens. Loss basically represents the error surface and is only dependent on X. So now let's compute the first derivative and
# see in which direction we need to adjust X in order to minimize the error and getting a value (output of the nested equations) closer to zero
grads = tape.gradient(loss, variables)
# Once we've found this magic number magically, let's update the value of X based on this magic number in order to perform better on the next
# iteration
optimizer.apply_gradients(zip(grads, variables))
# And now it's pretty cool, we can just print the current error (loss) and the actual value of X in each iteration. At the end of the training,
# we've found the optima wich a loss / error close to zero and a value of X close to 400 where 400 is the correct solution.
# Small deviations from the true solutions stem from numeric errors
print('Loss: {}, X: {}'.format(loss.numpy(), x.numpy()))