使用 tf.GradientTape() 训练逻辑回归无法收敛
Training logistic regression with tf.GradientTape() can't converge
我用 tf.GradientTape 训练了逻辑回归,但它无法收敛
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
xs = np.array([[1, 1], [-1, -1], [-1, -1.1]])
ys = np.array([1, 0, 0])
def lr_model():
inputs = keras.Input(shape=(2))
outputs = layers.Dense(1, activation='sigmoid')(inputs)
return keras.Model(inputs=inputs, outputs=outputs)
model = lr_model()
model.compile(loss=keras.losses.BinaryCrossentropy(),
optimizer=keras.optimizers.SGD(0.1),
metrics=['accuracy'])
history = model.fit(xs, ys, batch_size=3, epochs=10)
for i in range(10):
print(i, history.history['loss'][i], history.history['accuracy'][i])
It converges and the results:
0 1.04525887966156 0.0
1 0.9557339549064636 0.0
2 0.8753216862678528 0.0
3 0.8033372759819031 0.0
4 0.7390384674072266 0.0
5 0.6816689968109131 0.6666667
6 0.6304909586906433 1.0
7 0.5848075151443481 1.0
8 0.5439766049385071 1.0
9 0.5074175596237183 1.0
但是我想写下我的训练流程如下:
train_loss = keras.metrics.Mean(name='train_loss')
train_acc = keras.metrics.BinaryAccuracy()
model = lr_model()
optimizer = keras.optimizers.SGD(0.1)
def train_step(data, labels):
with tf.GradientTape() as tape:
data = tf.cast(data, tf.float32)
pred = model(data)
loss = keras.losses.binary_crossentropy(labels, pred)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss(loss)
train_acc(labels, pred)
for i in range(100):
train_loss.reset_states()
train_acc.reset_states()
train_step(xs, ys)
if i % 10 == 0:
print(i, train_loss.result().numpy(),train_acc.result().numpy())
It can't converge as the previous example and I don't know why
0 0.7586897 1.0
10 0.6607897 1.0
20 0.64341 0.6666667
30 0.63867164 0.6666667
40 0.63722247 0.6666667
50 0.63676286 0.6666667
60 0.63661444 0.6666667
70 0.636566 0.6666667
80 0.63654995 0.6666667
90 0.6365447 0.6666667
我的代码有什么问题?我应该如何通过 tf.GradientTape 修改我的训练代码以像 keras compile/fit 那样收敛?谢谢
试试这个,对我有用:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
import tensorflow.contrib.eager as tfe
tf.enable_eager_execution()
xs = np.array([[1, 1], [-1, -1], [-1, -1.1]])
ys = np.array([1., 0., 0.])
def get_model():
inputs = keras.Input(shape=(2,))
outputs = layers.Dense(1, activation='sigmoid')(inputs)
return keras.Model(inputs=inputs, outputs=outputs)
# Without Gradient Tape
model = get_model()
model.compile(loss=keras.losses.BinaryCrossentropy(),
optimizer=keras.optimizers.SGD(0.1),
metrics=['accuracy'])
model.fit(xs, ys, batch_size=3, epochs=50)
# With Gradient Tape
optimizer = tf.train.GradientDescentOptimizer(0.1)
train_acc = keras.metrics.BinaryAccuracy()
model = get_model()
for i in range(50):
with tf.GradientTape() as tape:
xs_ = tf.cast(xs, tf.float32)
ys_ = tf.cast(ys.reshape(-1,1), tf.float32)
pred = model(xs_)
loss = keras.losses.binary_crossentropy(ys_, pred)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
print (train_acc(ys_, pred))
#print (tf.math.reduce_sum(loss))
我用 tf.GradientTape 训练了逻辑回归,但它无法收敛
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
xs = np.array([[1, 1], [-1, -1], [-1, -1.1]])
ys = np.array([1, 0, 0])
def lr_model():
inputs = keras.Input(shape=(2))
outputs = layers.Dense(1, activation='sigmoid')(inputs)
return keras.Model(inputs=inputs, outputs=outputs)
model = lr_model()
model.compile(loss=keras.losses.BinaryCrossentropy(),
optimizer=keras.optimizers.SGD(0.1),
metrics=['accuracy'])
history = model.fit(xs, ys, batch_size=3, epochs=10)
for i in range(10):
print(i, history.history['loss'][i], history.history['accuracy'][i])
It converges and the results:
0 1.04525887966156 0.0
1 0.9557339549064636 0.0
2 0.8753216862678528 0.0
3 0.8033372759819031 0.0
4 0.7390384674072266 0.0
5 0.6816689968109131 0.6666667
6 0.6304909586906433 1.0
7 0.5848075151443481 1.0
8 0.5439766049385071 1.0
9 0.5074175596237183 1.0
但是我想写下我的训练流程如下:
train_loss = keras.metrics.Mean(name='train_loss')
train_acc = keras.metrics.BinaryAccuracy()
model = lr_model()
optimizer = keras.optimizers.SGD(0.1)
def train_step(data, labels):
with tf.GradientTape() as tape:
data = tf.cast(data, tf.float32)
pred = model(data)
loss = keras.losses.binary_crossentropy(labels, pred)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss(loss)
train_acc(labels, pred)
for i in range(100):
train_loss.reset_states()
train_acc.reset_states()
train_step(xs, ys)
if i % 10 == 0:
print(i, train_loss.result().numpy(),train_acc.result().numpy())
It can't converge as the previous example and I don't know why
0 0.7586897 1.0
10 0.6607897 1.0
20 0.64341 0.6666667
30 0.63867164 0.6666667
40 0.63722247 0.6666667
50 0.63676286 0.6666667
60 0.63661444 0.6666667
70 0.636566 0.6666667
80 0.63654995 0.6666667
90 0.6365447 0.6666667
我的代码有什么问题?我应该如何通过 tf.GradientTape 修改我的训练代码以像 keras compile/fit 那样收敛?谢谢
试试这个,对我有用:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
import tensorflow.contrib.eager as tfe
tf.enable_eager_execution()
xs = np.array([[1, 1], [-1, -1], [-1, -1.1]])
ys = np.array([1., 0., 0.])
def get_model():
inputs = keras.Input(shape=(2,))
outputs = layers.Dense(1, activation='sigmoid')(inputs)
return keras.Model(inputs=inputs, outputs=outputs)
# Without Gradient Tape
model = get_model()
model.compile(loss=keras.losses.BinaryCrossentropy(),
optimizer=keras.optimizers.SGD(0.1),
metrics=['accuracy'])
model.fit(xs, ys, batch_size=3, epochs=50)
# With Gradient Tape
optimizer = tf.train.GradientDescentOptimizer(0.1)
train_acc = keras.metrics.BinaryAccuracy()
model = get_model()
for i in range(50):
with tf.GradientTape() as tape:
xs_ = tf.cast(xs, tf.float32)
ys_ = tf.cast(ys.reshape(-1,1), tf.float32)
pred = model(xs_)
loss = keras.losses.binary_crossentropy(ys_, pred)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
print (train_acc(ys_, pred))
#print (tf.math.reduce_sum(loss))