Theano 中的 1-of-k(one-hot)编码
1-of-k (one-hot) encoding in Theano
我正在为 Numpy 做 。 seq
是一个带有索引的列表。 IE。这实现了 1-of-k 编码(也称为 one-hot)。
def 1_of_k(seq, num_classes):
num_frames = len(seq)
m = np.zeros((num_frames, num_classes))
m[np.arange(num_frames), seq] = 1
return m
我如何在 Theano 中做同样的事情? (最有效的解决方案,对 CUDA 也有效。)
有一个内置函数可以执行此操作 (theano.tensor.extra_ops.to_one_hot
),但它仍然比在 numpy 中执行要慢得多。如果您的任务可行,您最好在 Theano 之外计算它并将密集结果作为输入传递,而不是仅传递索引。
这里有一些代码说明了三个 numpy 方法和四个 Theano 方法。此代码包含Albert(numpy_1_of_k_3
/compile_theano_1_of_k_3
)和eickenberg(numpy_1_of_k_2
/compile_theano_1_of_k_4
)提供的答案以供比较。
事实证明,内置的 Theano 方法 (compile_theano_1_of_k_2
) 使用的代码与我自己的尝试 (numpy_1_of_k_1
/compile_theano_1_of_k_1
) 大致相同。
import timeit
import numpy as np
import theano
import theano.tensor as tt
import theano.tensor.extra_ops
def numpy_1_of_k_1(seq, num_classes):
num_frames = len(seq)
m = np.zeros((num_frames, num_classes))
m[np.arange(num_frames), seq] = 1
return m
def numpy_1_of_k_2(seq, num_classes):
return seq[:, np.newaxis] == np.arange(num_classes)
def numpy_1_of_k_3(seq, num_classes):
shape = [seq.shape[i] for i in range(seq.ndim)] + [num_classes]
eye = np.eye(num_classes)
return eye[seq].reshape(shape)
def compile_theano_1_of_k_1():
seq = tt.lvector()
num_classes = tt.lscalar()
num_frames = seq.shape[0]
m = tt.zeros((num_frames, num_classes))
m = tt.set_subtensor(m[tt.arange(num_frames), seq], 1)
return theano.function([seq, num_classes], outputs=m)
def compile_theano_1_of_k_2():
seq = tt.lvector()
num_classes = tt.lscalar()
return theano.function([seq, num_classes], outputs=theano.tensor.extra_ops.to_one_hot(seq, num_classes))
def compile_theano_1_of_k_3():
seq = tt.lvector()
num_classes = tt.lscalar()
shape = [seq.shape[i] for i in range(seq.ndim)] + [num_classes]
eye = tt.eye(num_classes)
m = eye[seq].reshape(shape)
return theano.function([seq, num_classes], outputs=m)
def compile_theano_1_of_k_4():
seq = tt.lvector()
num_classes = tt.lscalar()
one_hot = tt.eq(seq.reshape((-1, 1)), tt.arange(num_classes))
return theano.function([seq, num_classes], outputs=one_hot)
def main(iterations):
theano_1_of_k_1 = compile_theano_1_of_k_1()
theano_1_of_k_2 = compile_theano_1_of_k_2()
theano_1_of_k_3 = compile_theano_1_of_k_3()
theano_1_of_k_4 = compile_theano_1_of_k_4()
test_seq = np.array([0, 1, 2, 0, 1, 2])
test_num_classes = 4
test_functions = [numpy_1_of_k_1, numpy_1_of_k_2, numpy_1_of_k_3, theano_1_of_k_1, theano_1_of_k_2, theano_1_of_k_3,
theano_1_of_k_4]
test_results = [test_function(test_seq, test_num_classes) for test_function in test_functions]
for a, b in zip(test_results[:-1], test_results[1:]):
assert np.all(np.equal(a, b)), (a, b)
data = []
for _ in xrange(iterations):
num_classes = np.random.randint(100) + 1
seq = np.random.randint(num_classes, size=(np.random.randint(100) + 1))
data.append((seq, num_classes))
for test_function in test_functions:
start = timeit.default_timer()
total = 0
for seq, num_classes in data:
total += test_function(seq, num_classes).sum()
print timeit.default_timer() - start, total
main(100000)
使用笔记本电脑和 运行 CPU 上的 Theano 代码,我在几秒钟内得到以下计时:
numpy_1_of_k_1 1.0645
numpy_1_of_k_2 1.4018
numpy_1_of_k_3 1.6131
theano_1_of_k_1 6.3542
theano_1_of_k_2 6.4628
theano_1_of_k_3 6.5637
theano_1_of_k_4 5.4588
所以在 numpy 中,身份方法比简单广播慢,简单广播比从零开始设置慢。然而,在 Theano 中,相对性能顺序不同;这里简单的广播方法是最快的。
这些是非常小的测试用例,因此相对性能可能会因更大的矩阵或在 GPU 上 运行 时而有所不同。
我的解决方案:
def class_idx_seq_to_1_of_k(seq, num_classes, dtype="float32"):
shape = [seq.shape[i] for i in range(seq.ndim)] + [num_classes]
eye = T.eye(num_classes, dtype=dtype)
m = eye[T.cast(seq, 'int32')].reshape(shape)
return m
这不是简单的广播吗?
import numpy as np
seq = np.random.randint(0, 5, 10)
one_hot = seq[:, np.newaxis] == np.arange(seq.max()) # class ids contiguous, maximum class seen (otherwise set variable n_classes)
print one_hot
import theano
import theano.tensor as T
t_seq = T.ivector()
t_one_hot = T.eq(t_seq.reshape((-1, 1)), T.arange(t_seq.max()))
f = theano.function([t_seq], [t_one_hot])
print f(seq.astype('int32'))
我正在为 Numpy 做 seq
是一个带有索引的列表。 IE。这实现了 1-of-k 编码(也称为 one-hot)。
def 1_of_k(seq, num_classes):
num_frames = len(seq)
m = np.zeros((num_frames, num_classes))
m[np.arange(num_frames), seq] = 1
return m
我如何在 Theano 中做同样的事情? (最有效的解决方案,对 CUDA 也有效。)
有一个内置函数可以执行此操作 (theano.tensor.extra_ops.to_one_hot
),但它仍然比在 numpy 中执行要慢得多。如果您的任务可行,您最好在 Theano 之外计算它并将密集结果作为输入传递,而不是仅传递索引。
这里有一些代码说明了三个 numpy 方法和四个 Theano 方法。此代码包含Albert(numpy_1_of_k_3
/compile_theano_1_of_k_3
)和eickenberg(numpy_1_of_k_2
/compile_theano_1_of_k_4
)提供的答案以供比较。
事实证明,内置的 Theano 方法 (compile_theano_1_of_k_2
) 使用的代码与我自己的尝试 (numpy_1_of_k_1
/compile_theano_1_of_k_1
) 大致相同。
import timeit
import numpy as np
import theano
import theano.tensor as tt
import theano.tensor.extra_ops
def numpy_1_of_k_1(seq, num_classes):
num_frames = len(seq)
m = np.zeros((num_frames, num_classes))
m[np.arange(num_frames), seq] = 1
return m
def numpy_1_of_k_2(seq, num_classes):
return seq[:, np.newaxis] == np.arange(num_classes)
def numpy_1_of_k_3(seq, num_classes):
shape = [seq.shape[i] for i in range(seq.ndim)] + [num_classes]
eye = np.eye(num_classes)
return eye[seq].reshape(shape)
def compile_theano_1_of_k_1():
seq = tt.lvector()
num_classes = tt.lscalar()
num_frames = seq.shape[0]
m = tt.zeros((num_frames, num_classes))
m = tt.set_subtensor(m[tt.arange(num_frames), seq], 1)
return theano.function([seq, num_classes], outputs=m)
def compile_theano_1_of_k_2():
seq = tt.lvector()
num_classes = tt.lscalar()
return theano.function([seq, num_classes], outputs=theano.tensor.extra_ops.to_one_hot(seq, num_classes))
def compile_theano_1_of_k_3():
seq = tt.lvector()
num_classes = tt.lscalar()
shape = [seq.shape[i] for i in range(seq.ndim)] + [num_classes]
eye = tt.eye(num_classes)
m = eye[seq].reshape(shape)
return theano.function([seq, num_classes], outputs=m)
def compile_theano_1_of_k_4():
seq = tt.lvector()
num_classes = tt.lscalar()
one_hot = tt.eq(seq.reshape((-1, 1)), tt.arange(num_classes))
return theano.function([seq, num_classes], outputs=one_hot)
def main(iterations):
theano_1_of_k_1 = compile_theano_1_of_k_1()
theano_1_of_k_2 = compile_theano_1_of_k_2()
theano_1_of_k_3 = compile_theano_1_of_k_3()
theano_1_of_k_4 = compile_theano_1_of_k_4()
test_seq = np.array([0, 1, 2, 0, 1, 2])
test_num_classes = 4
test_functions = [numpy_1_of_k_1, numpy_1_of_k_2, numpy_1_of_k_3, theano_1_of_k_1, theano_1_of_k_2, theano_1_of_k_3,
theano_1_of_k_4]
test_results = [test_function(test_seq, test_num_classes) for test_function in test_functions]
for a, b in zip(test_results[:-1], test_results[1:]):
assert np.all(np.equal(a, b)), (a, b)
data = []
for _ in xrange(iterations):
num_classes = np.random.randint(100) + 1
seq = np.random.randint(num_classes, size=(np.random.randint(100) + 1))
data.append((seq, num_classes))
for test_function in test_functions:
start = timeit.default_timer()
total = 0
for seq, num_classes in data:
total += test_function(seq, num_classes).sum()
print timeit.default_timer() - start, total
main(100000)
使用笔记本电脑和 运行 CPU 上的 Theano 代码,我在几秒钟内得到以下计时:
numpy_1_of_k_1 1.0645
numpy_1_of_k_2 1.4018
numpy_1_of_k_3 1.6131
theano_1_of_k_1 6.3542
theano_1_of_k_2 6.4628
theano_1_of_k_3 6.5637
theano_1_of_k_4 5.4588
所以在 numpy 中,身份方法比简单广播慢,简单广播比从零开始设置慢。然而,在 Theano 中,相对性能顺序不同;这里简单的广播方法是最快的。
这些是非常小的测试用例,因此相对性能可能会因更大的矩阵或在 GPU 上 运行 时而有所不同。
我的解决方案:
def class_idx_seq_to_1_of_k(seq, num_classes, dtype="float32"):
shape = [seq.shape[i] for i in range(seq.ndim)] + [num_classes]
eye = T.eye(num_classes, dtype=dtype)
m = eye[T.cast(seq, 'int32')].reshape(shape)
return m
这不是简单的广播吗?
import numpy as np
seq = np.random.randint(0, 5, 10)
one_hot = seq[:, np.newaxis] == np.arange(seq.max()) # class ids contiguous, maximum class seen (otherwise set variable n_classes)
print one_hot
import theano
import theano.tensor as T
t_seq = T.ivector()
t_one_hot = T.eq(t_seq.reshape((-1, 1)), T.arange(t_seq.max()))
f = theano.function([t_seq], [t_one_hot])
print f(seq.astype('int32'))