Tensorflow概率中的可训练数组
Trainable array in Tensorflow probability
我正在尝试训练混合模型,但我不清楚如何指定可训练的数组参数以更新权重。所以如果我有以下权重硬编码
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow_probability import distributions as tfd
weights = [0.2, 0.8]
dist = tfd.Mixture(cat=tfd.Categorical(probs=weights),
components=[tfd.Normal(loc=tf.Variable(0., name='loc1'), scale=tf.Variable(1., name='scale1')),
tfd.Normal(loc=tf.Variable(0., name='loc2'), scale=tf.Variable(1., name='scale2'))])
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
@tf.function
def train_step(X):
with tf.GradientTape() as tape:
loss = -tf.reduce_mean(dist.log_prob(X))
gradients = tape.gradient(loss,dist.trainable_variables)
optimizer.apply_gradients(zip(gradients, dist.trainable_variables))
return loss
for i in range(20000):
loss = train_step(X)
其中 X 是形状为 (272, 1) 的一维 Numpy 数组
现在假设我想学习权重。如果我在分类分布构造函数中尝试
probs=[tf.Variable(0.2, name='weight1'),tf.Variable(0.8, name='weight2')]
然后我收到错误消息“没有为任何变量提供梯度”
如果我尝试
probs=tf.Variable([tf.Variable(0.2, name='weight1'),tf.Variable(0.8, name='weight2')], trainable=True, name='weights')
那么weight1和weight2不会出现在可训练变量列表中。权重已列出但未更新。
为 probs 参数指定权重以便在训练期间更新它们的正确方法是什么?
也许可以尝试以下方法:
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow_probability import distributions as tfd
dist = tfd.Mixture(cat=tfd.Categorical(probs=tf.Variable([0.2, 0.8])),
components=[tfd.Normal(loc=tf.Variable(0., name='loc1'), scale=tf.Variable(1., name='scale1')),
tfd.Normal(loc=tf.Variable(0., name='loc2'), scale=tf.Variable(1., name='scale2'))])
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
@tf.function
def train_step(X):
with tf.GradientTape() as tape:
loss = -tf.reduce_mean(dist.log_prob(X))
tf.print(dist.trainable_variables)
gradients = tape.gradient(loss, dist.trainable_variables)
optimizer.apply_gradients(zip(gradients, dist.trainable_variables)) #E
return loss
for i in range(10):
loss = train_step(tf.random.normal((272, 1)))
([0.2 0.8], 0, 1, 0, 1)
([0.2 0.8], -0.00999249145, 1.00999844, -0.0099981213, 1.00999963)
([0.200921655 0.799828708], -0.00638755737, 1.00682414, -0.00639217719, 1.00682521)
([0.20176363 0.799696386], -0.000149463303, 1.00765562, -0.000160227064, 1.00764322)
([0.200775564 0.800094664], 0.000889031217, 1.00637043, 0.000898908474, 1.00636196)
([0.199177444 0.800768435], -0.00115872873, 1.0025779, -0.00113528164, 1.0025754)
([0.19703567 0.801662683], -0.000830670586, 0.998396218, -0.000778611051, 0.998392522)
([0.193336055 0.80336237], 0.00244163908, 0.993740082, 0.00255049323, 0.993718445)
([0.192727238 0.803925216], 0.00376213156, 0.989788294, 0.00386576797, 0.989756942)
([0.194845349 0.802922785], 0.0022987891, 0.986021399, 0.00232516858, 0.985970497)
我正在尝试训练混合模型,但我不清楚如何指定可训练的数组参数以更新权重。所以如果我有以下权重硬编码
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow_probability import distributions as tfd
weights = [0.2, 0.8]
dist = tfd.Mixture(cat=tfd.Categorical(probs=weights),
components=[tfd.Normal(loc=tf.Variable(0., name='loc1'), scale=tf.Variable(1., name='scale1')),
tfd.Normal(loc=tf.Variable(0., name='loc2'), scale=tf.Variable(1., name='scale2'))])
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
@tf.function
def train_step(X):
with tf.GradientTape() as tape:
loss = -tf.reduce_mean(dist.log_prob(X))
gradients = tape.gradient(loss,dist.trainable_variables)
optimizer.apply_gradients(zip(gradients, dist.trainable_variables))
return loss
for i in range(20000):
loss = train_step(X)
其中 X 是形状为 (272, 1) 的一维 Numpy 数组
现在假设我想学习权重。如果我在分类分布构造函数中尝试
probs=[tf.Variable(0.2, name='weight1'),tf.Variable(0.8, name='weight2')]
然后我收到错误消息“没有为任何变量提供梯度”
如果我尝试
probs=tf.Variable([tf.Variable(0.2, name='weight1'),tf.Variable(0.8, name='weight2')], trainable=True, name='weights')
那么weight1和weight2不会出现在可训练变量列表中。权重已列出但未更新。
为 probs 参数指定权重以便在训练期间更新它们的正确方法是什么?
也许可以尝试以下方法:
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow_probability import distributions as tfd
dist = tfd.Mixture(cat=tfd.Categorical(probs=tf.Variable([0.2, 0.8])),
components=[tfd.Normal(loc=tf.Variable(0., name='loc1'), scale=tf.Variable(1., name='scale1')),
tfd.Normal(loc=tf.Variable(0., name='loc2'), scale=tf.Variable(1., name='scale2'))])
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
@tf.function
def train_step(X):
with tf.GradientTape() as tape:
loss = -tf.reduce_mean(dist.log_prob(X))
tf.print(dist.trainable_variables)
gradients = tape.gradient(loss, dist.trainable_variables)
optimizer.apply_gradients(zip(gradients, dist.trainable_variables)) #E
return loss
for i in range(10):
loss = train_step(tf.random.normal((272, 1)))
([0.2 0.8], 0, 1, 0, 1)
([0.2 0.8], -0.00999249145, 1.00999844, -0.0099981213, 1.00999963)
([0.200921655 0.799828708], -0.00638755737, 1.00682414, -0.00639217719, 1.00682521)
([0.20176363 0.799696386], -0.000149463303, 1.00765562, -0.000160227064, 1.00764322)
([0.200775564 0.800094664], 0.000889031217, 1.00637043, 0.000898908474, 1.00636196)
([0.199177444 0.800768435], -0.00115872873, 1.0025779, -0.00113528164, 1.0025754)
([0.19703567 0.801662683], -0.000830670586, 0.998396218, -0.000778611051, 0.998392522)
([0.193336055 0.80336237], 0.00244163908, 0.993740082, 0.00255049323, 0.993718445)
([0.192727238 0.803925216], 0.00376213156, 0.989788294, 0.00386576797, 0.989756942)
([0.194845349 0.802922785], 0.0022987891, 0.986021399, 0.00232516858, 0.985970497)