Theano 在线性回归中使用 `scan` 而不是 `for` 循环
Theano Using `scan` Instead Of `for` Loop In Linear Regression
我试图更好地掌握 theano
中的 scan
功能,我的理解是它的行为类似于基于此 [=16= 的 for
循环].我创建了一个非常简单的工作示例来查找执行线性回归时的权重和偏差。
#### Libraries
# Third Party Libraries
import numpy as np
import theano
import theano.tensor as T
# not intended for mini-batch
def gen_data(num_points=50, slope=1, bias=10, x_max=50):
f = lambda z: slope * z + bias
x = np.zeros(shape=(num_points), dtype=theano.config.floatX)
y = np.zeros(shape=(num_points), dtype=theano.config.floatX)
for i in range(num_points):
x_temp = np.random.uniform()*x_max
x[i] = x_temp
y[i] = f(x_temp) + np.random.normal(scale=3.0)
return (x, y)
#############################################################
#############################################################
train_x, train_y = gen_data(num_points=50, slope=2, bias=5)
epochs = 50
# Declaring variable
learn_rate = T.scalar(name='learn_rate', dtype=theano.config.floatX)
x = T.vector(name='x', dtype=theano.config.floatX)
y = T.vector(name='y', dtype=theano.config.floatX)
# Variables that will be updated
theta = theano.shared(np.random.rand(), name='theta')
bias = theano.shared(np.random.rand(), name='bias')
hyp = T.dot(theta, x) + bias
cost = T.mean((hyp - y)**2)/2
f_cost = theano.function(inputs=[x, y], outputs=cost)
grad_t, grad_b = T.grad(cost, [theta, bias])
train = theano.function(inputs=[x, y, learn_rate], outputs=cost,
updates=((theta, theta-learn_rate*grad_t),
(bias, bias-learn_rate*grad_b)))
print('weight: {}, bias: {}'.format(theta.get_value(), bias.get_value()))
for i in range(epochs): # Try changing this to a `scan`
train(train_x, train_y, 0.001)
print('------------------------------')
print('weight: {}, bias: {}'.format(theta.get_value(), bias.get_value()))
我想将 for
循环更改为 theano.scan
函数,但我所做的每一次尝试都在下一条之后产生一条错误消息。
为了使用 theano.scan
我从 collection
导入了 OrderedDict
以用于共享变量。使用 dict
将导致以下错误消息:
Expected OrderedDict or OrderedUpdates, got <class 'dict'>. This can make your script non-deterministic.
其次,我定义了一个计算损失和梯度的函数。函数returns loss
和OrderedDict()
。函数
def cost(inputs, outputs, learn_rate, theta, bias):
hyp = T.dot(theta, inputs) + bias
loss = T.mean((hyp - outputs)**2)/2
grad_t, grad_b = T.grad(loss, [theta, bias])
return loss, OrderedDict([(theta, theta-learn_rate*grad_t),
(bias, bias-learn_rate*grad_b)])
接下来是这样定义 theano.scan()
:
results, updates = theano.scan(fn=cost,
non_sequences=[x, y, learn_rate, theta, bias],
n_steps=epochs)
我选择将 x
和 y
作为 non_sequences
包括在内,因为这个玩具示例的尺寸相对较小,而且与将它们作为 sequences
。
最后,theano.function()
是使用 theano.scan()
中的 results, updates
定义的
train = theano.function(inputs=[x, y, learn_rate, epochs], outputs=results,
updates=updates)
综合起来我们有:
#### Libraries
# Standard Libraries
from collections import OrderedDict
# Third Party Libraries
# import matplotlib.pyplot as plt
import numpy as np
# from sklearn import linear_model
import theano
import theano.tensor as T
# def gen_data(num_points=50, slope=1, bias=10, x_max=50):
# pass # Use the code in the above post to generate sample points
########################################################################
# Generate Data
train_x, train_y = gen_data(num_points=50, slope=2)
# Declaring variable
x = T.vector(name='x', dtype=theano.config.floatX)
y = T.vector(name='y', dtype=theano.config.floatX)
learn_rate = T.scalar(name='learn_rate', dtype=theano.config.floatX)
epochs = T.iscalar(name='epochs')
# Variables that will be updated, hence are declared as `theano.share`
theta = theano.shared(np.random.rand(), name='theta')
bias = theano.shared(np.random.rand(), name='bias')
def cost(inputs, outputs, learn_rate, theta, bias):
hyp = T.dot(theta, inputs) + bias
loss = T.mean((hyp - outputs)**2)/2
grad_t, grad_b = T.grad(loss, [theta, bias])
return loss, OrderedDict([(theta, theta-learn_rate*grad_t),
(bias, bias-learn_rate*grad_b)])
results, updates = theano.scan(fn=cost,
non_sequences=[x, y, learn_rate, theta, bias],
n_steps=epochs)
# results, updates = theano.scan(fn=cost,
# sequences=[x, y],
# non_sequences = [learn_rate, theta, bias],
# n_steps=epochs)
train = theano.function(inputs=[x, y, learn_rate, epochs], outputs=results,
updates=updates)
print('weight: {}, bias: {}'.format(theta.get_value(), bias.get_value()))
train(train_x, train_y, 0.001, 30)
print('------------------------------')
print('weight: {}, bias: {}'.format(theta.get_value(), bias.get_value()))
为了完整起见,我已经包含了将 x
和 y
作为 sequences
传递的代码。只需取消注释掉那部分代码,然后 AND 注释掉 theano.scan()
.
的另一个实例
我试图更好地掌握 theano
中的 scan
功能,我的理解是它的行为类似于基于此 [=16= 的 for
循环].我创建了一个非常简单的工作示例来查找执行线性回归时的权重和偏差。
#### Libraries
# Third Party Libraries
import numpy as np
import theano
import theano.tensor as T
# not intended for mini-batch
def gen_data(num_points=50, slope=1, bias=10, x_max=50):
f = lambda z: slope * z + bias
x = np.zeros(shape=(num_points), dtype=theano.config.floatX)
y = np.zeros(shape=(num_points), dtype=theano.config.floatX)
for i in range(num_points):
x_temp = np.random.uniform()*x_max
x[i] = x_temp
y[i] = f(x_temp) + np.random.normal(scale=3.0)
return (x, y)
#############################################################
#############################################################
train_x, train_y = gen_data(num_points=50, slope=2, bias=5)
epochs = 50
# Declaring variable
learn_rate = T.scalar(name='learn_rate', dtype=theano.config.floatX)
x = T.vector(name='x', dtype=theano.config.floatX)
y = T.vector(name='y', dtype=theano.config.floatX)
# Variables that will be updated
theta = theano.shared(np.random.rand(), name='theta')
bias = theano.shared(np.random.rand(), name='bias')
hyp = T.dot(theta, x) + bias
cost = T.mean((hyp - y)**2)/2
f_cost = theano.function(inputs=[x, y], outputs=cost)
grad_t, grad_b = T.grad(cost, [theta, bias])
train = theano.function(inputs=[x, y, learn_rate], outputs=cost,
updates=((theta, theta-learn_rate*grad_t),
(bias, bias-learn_rate*grad_b)))
print('weight: {}, bias: {}'.format(theta.get_value(), bias.get_value()))
for i in range(epochs): # Try changing this to a `scan`
train(train_x, train_y, 0.001)
print('------------------------------')
print('weight: {}, bias: {}'.format(theta.get_value(), bias.get_value()))
我想将 for
循环更改为 theano.scan
函数,但我所做的每一次尝试都在下一条之后产生一条错误消息。
为了使用 theano.scan
我从 collection
导入了 OrderedDict
以用于共享变量。使用 dict
将导致以下错误消息:
Expected OrderedDict or OrderedUpdates, got <class 'dict'>. This can make your script non-deterministic.
其次,我定义了一个计算损失和梯度的函数。函数returns loss
和OrderedDict()
。函数
def cost(inputs, outputs, learn_rate, theta, bias):
hyp = T.dot(theta, inputs) + bias
loss = T.mean((hyp - outputs)**2)/2
grad_t, grad_b = T.grad(loss, [theta, bias])
return loss, OrderedDict([(theta, theta-learn_rate*grad_t),
(bias, bias-learn_rate*grad_b)])
接下来是这样定义 theano.scan()
:
results, updates = theano.scan(fn=cost,
non_sequences=[x, y, learn_rate, theta, bias],
n_steps=epochs)
我选择将 x
和 y
作为 non_sequences
包括在内,因为这个玩具示例的尺寸相对较小,而且与将它们作为 sequences
。
最后,theano.function()
是使用 theano.scan()
results, updates
定义的
train = theano.function(inputs=[x, y, learn_rate, epochs], outputs=results,
updates=updates)
综合起来我们有:
#### Libraries
# Standard Libraries
from collections import OrderedDict
# Third Party Libraries
# import matplotlib.pyplot as plt
import numpy as np
# from sklearn import linear_model
import theano
import theano.tensor as T
# def gen_data(num_points=50, slope=1, bias=10, x_max=50):
# pass # Use the code in the above post to generate sample points
########################################################################
# Generate Data
train_x, train_y = gen_data(num_points=50, slope=2)
# Declaring variable
x = T.vector(name='x', dtype=theano.config.floatX)
y = T.vector(name='y', dtype=theano.config.floatX)
learn_rate = T.scalar(name='learn_rate', dtype=theano.config.floatX)
epochs = T.iscalar(name='epochs')
# Variables that will be updated, hence are declared as `theano.share`
theta = theano.shared(np.random.rand(), name='theta')
bias = theano.shared(np.random.rand(), name='bias')
def cost(inputs, outputs, learn_rate, theta, bias):
hyp = T.dot(theta, inputs) + bias
loss = T.mean((hyp - outputs)**2)/2
grad_t, grad_b = T.grad(loss, [theta, bias])
return loss, OrderedDict([(theta, theta-learn_rate*grad_t),
(bias, bias-learn_rate*grad_b)])
results, updates = theano.scan(fn=cost,
non_sequences=[x, y, learn_rate, theta, bias],
n_steps=epochs)
# results, updates = theano.scan(fn=cost,
# sequences=[x, y],
# non_sequences = [learn_rate, theta, bias],
# n_steps=epochs)
train = theano.function(inputs=[x, y, learn_rate, epochs], outputs=results,
updates=updates)
print('weight: {}, bias: {}'.format(theta.get_value(), bias.get_value()))
train(train_x, train_y, 0.001, 30)
print('------------------------------')
print('weight: {}, bias: {}'.format(theta.get_value(), bias.get_value()))
为了完整起见,我已经包含了将 x
和 y
作为 sequences
传递的代码。只需取消注释掉那部分代码,然后 AND 注释掉 theano.scan()
.