示例 theanos 的完整空间

Simple XOR example on theano

我是theano的新手,我没能用theano做一个简单的异或例子。我尝试了很多方法来让它发挥作用,但似乎我只是在做萨满教。看看代码,它非常简单,但我得到了随机结果。

import numpy as np
import collections

import theano
import theano.tensor as T

INPUT_SIZE = 2
HIDDEN_SIZE = 2
OUTPUT_SIZE = 1

def train_2(data, valid_set_x):
    lr = 0.2

    x, y = data

    # symbol declarations
    ep = T.scalar()
    sx = T.matrix()
    sy = T.matrix()

    w1 = theano.shared(np.random.normal(loc=0, scale=1, size=(INPUT_SIZE, HIDDEN_SIZE)))
    b1 = theano.shared(np.random.normal(loc=0, scale=1, size=(HIDDEN_SIZE)))
    w2 = theano.shared(np.random.normal(loc=0, scale=1, size=(HIDDEN_SIZE, OUTPUT_SIZE)))
    b2 = theano.shared(np.random.normal(loc=0, scale=1, size=(OUTPUT_SIZE)))

    # symbolic expression-building
    hid = T.tanh(T.dot(sx, w1) + b1)
    out = T.tanh(T.dot(hid, w2) + b2)

    err = 0.5 * T.sum(out - sy) ** 2

    gw = T.grad(err, w1)
    gb = T.grad(err, b1)
    gv = T.grad(err, w2)
    gc = T.grad(err, b2)

    list = ((w1, w1 - (lr / ep) * gw),
            (b1, b1 - (lr / ep) * gb),
            (w2, w2 - (lr / ep) * gv),
            (b2, b2 - (lr / ep) * gc))

    dict = collections.OrderedDict(list)

    # compile a fast training function
    train = theano.function([sx, sy, ep], err, updates=dict)
    sample = theano.function([sx], out)

    train_set_size = x.shape[0]

    # now do the computations
    batchsize = 1
    for epoch in xrange(10):
        err = 0
        for i in xrange(train_set_size):
            x_i = x[i * batchsize: (i + 1) * batchsize]
            y_i = y[i * batchsize: (i + 1) * batchsize]
            err += train(x_i, y_i, epoch + 1)
        print "Error: " + str(err)

    print "Weights:"
    print w1.get_value()
    print b1.get_value()
    print w2.get_value()
    print b2.get_value()

    return sample(valid_set_x)

def test__(files=None):
    x_set = np.array([[-5, -5],
                      [-5, 5],
                      [5, -5],
                      [5, 5]]).astype("float32")
    y_set = np.array([[-0.9], [-0.9], [-0.9], [0.9]]).astype("float32")

    print "Processing..."
    result_set_x = train_2((x_set, y_set), x_set)

    print x_set
    print result_set_x
    print y_set

if __name__ == '__main__':
    test__()

问题出在您的更新部分。我将变量重命名为 'updates' 因为 'list' 和 'dict' 是保留字;不是一个好的选择。另外,我不知道你为什么要这么快地降低你的学习率,我把它去掉了。 更新应该是像这里这样的数组

updates = [(w1, w1 - lr * gw),
           (b1, b1 - lr * gb),
           (w2, w2 - lr * gv),
           (b2, b2 - lr * gc)]

# compile a fast training function
train = theano.function([sx, sy], err, updates=updates)

我运行修改了例子,得到了下面的结果。它需要更多的迭代来减少损失,但除此之外,还可以。

Processing...
Error: 1.4456279556
...
Error: 0.0767515052046
Weights:
[[ 0.52955082 -1.26936557]
 [-1.05887804  0.04998216]]
[ 0.29209577 -0.22703456]
[[-0.89983822]
 [-0.88619565]]
[-0.86047891]
[[-5. -5.]
 [-5.  5.]
 [ 5. -5.]
 [ 5.  5.]]
[[-0.98989634]
 [-0.68941057]
 [-0.7034631 ]
 [ 0.72087948]]
[[-0.89999998]
 [-0.89999998]
 [-0.89999998]
 [ 0.89999998]]