将 python/numpy 索引转移到 Tensorflow 并提高性能
Transfer python/numpy indexing to Tensorflow and improve performance
在较早的问题 中,我询问了有关更快地将项目分配给数组的建议。从那时起,我取得了一些进展,例如我扩展了推荐版本以处理 3-D 数组,其目的是类似于神经网络后期训练数据的批量大小:
import numpy as np
import time
batch_dim = 2
first_dim = 5
second_dim = 7
depth_dim = 10
upper_count = 5000
toy_dict = {k:np.random.random_sample(size = depth_dim) for k in range(upper_count)}
a = np.array(list(toy_dict.values()))
def create_input_3d(orig_arr):
print("Input shape:", orig_arr.shape)
goal_arr = np.full(shape=(batch_dim, orig_arr.shape[1], orig_arr.shape[2], depth_dim), fill_value=1234, dtype=float)
print("Goal shape:", goal_arr.shape)
idx = np.indices(orig_arr.shape)
print("Idx shape", idx.shape)
goal_arr[idx[0], idx[1], idx[2]] = a[orig_arr[idx[0], idx[1], idx[2]]]
return goal_arr
orig_arr_three_dim = np.random.randint(0, upper_count, size=(batch_dim, first_dim, second_dim))
orig_arr_three_dim.shape # (2,5,7)
reshaped = create_input_3d(orig_arr_three_dim)
然后我决定创建自定义层以提高性能并即时进行转换(减少内存):
import tensorflow as tf
from tensorflow import keras
import numpy as np
#custom layer
class CustLayer(keras.layers.Layer):
def __init__(self, info_matrix, first_dim, second_dim, info_dim, batch_size):
super(CustLayer, self).__init__()
self.w = tf.Variable(
initial_value=info_matrix,
trainable=False,
dtype=tf.dtypes.float32
)
self.info_dim = info_dim
self.first_dim = first_dim
self.second_dim = second_dim
self.batch_size = batch_size
def call(self, orig_arr):
goal_arr = tf.Variable(tf.zeros(shape=(self.batch_size, self.first_dim, self.second_dim, self.info_dim), dtype=float))
#loop-approach (slower)
for example in tf.range(self.batch_size):
for row in tf.range(self.first_dim):
for col in tf.range(self.second_dim):
goal_arr[example,row,col].assign(self.w[orig_arr[example, row, col]])
return goal_arr
upper_count = 50
info_length = 10
batch_size = 4
first_dim = 5
second_dim = 7
info_dim = 10
info_dict = {k:np.random.random_sample(size = info_length) for k in range(upper_count)} #toy dict that stores information about
info_matrix = np.array(list(info_dict.values()))
linear_layer = CustLayer(info_matrix, first_dim=first_dim, second_dim=second_dim, info_dim=info_dim, batch_size=batch_size)
test = []
for i in range(batch_size):
test.append(np.random.randint(1,upper_count, size=(first_dim,second_dim)))
test = np.asarray(test)
test.shape # (4, 5, 7)
y= linear_layer(test)
y.shape # TensorShape([4, 5, 7, 10])
由于高级索引(如我第一次发布的代码)不起作用,我 return 转向天真的 for 循环 - 这太慢了。
我正在寻找的是一种使用高级索引的方法,如第一个代码片段中所示,并重新编程为 tf 兼容。这使我能够使用 GPU 进行学习。
简而言之:输入的形状为 (batch_size, first_dim, second_dim)
,return 的形状为 (batch_size, first_dim, second_dim, info_dim)
,摆脱了缓慢的 for 循环。提前致谢。
我看过的其他答案:
, also old tf
对于其他寻找答案的人,这就是我最终想出的答案:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import time
class CustLayer(keras.layers.Layer):
def __init__(self, info_matrix, first_dim, second_dim, info_dim, batch_size):
super(CustLayer, self).__init__()
self.w = tf.Variable(
initial_value=info_matrix,
trainable=False,
dtype=tf.dtypes.float32
)
self.info_matrix = info_matrix
self.info_dim = info_dim
self.first_dim = first_dim
self.second_dim = second_dim
self.batch_size = batch_size
def my_numpy_func(self, x):
# x will be a numpy array with the contents of the input to the
# tf.function
shape = x.shape
goal_arr = np.zeros(shape=(shape[0], shape[1], shape[2], self.info_dim), dtype=np.float32)
# indices to expand
idx = np.indices(shape)
goal_arr[idx[0], idx[1], idx[2]] = self.info_matrix[x[idx[0], idx[1], idx[2]]]
shape_arr = np.array([shape[0], shape[1], shape[2]], dtype=np.int8)
#tf.print("Shape:", shape)
#tf.print("Shape_arr:", shape_arr)
#tf.print("Type:",type(shape_arr))
return goal_arr, shape_arr
@tf.function(input_signature=[tf.TensorSpec((None, 39, 25), tf.int64)])
def tf_function(self, input):
y, shape_arr = tf.numpy_function(self.my_numpy_func, [input], [tf.float32, tf.int8], "Nameless")
#tf.print("shape_arr", shape_arr)
y = tf.reshape(y, shape=(shape_arr[0], shape_arr[1], shape_arr[2], self.info_dim))
return y
def call(self, orig_arr):
return self.tf_function(orig_arr)
注意事项:在 GPU 上运行,但不能在 TPU 上运行。
在较早的问题
import numpy as np
import time
batch_dim = 2
first_dim = 5
second_dim = 7
depth_dim = 10
upper_count = 5000
toy_dict = {k:np.random.random_sample(size = depth_dim) for k in range(upper_count)}
a = np.array(list(toy_dict.values()))
def create_input_3d(orig_arr):
print("Input shape:", orig_arr.shape)
goal_arr = np.full(shape=(batch_dim, orig_arr.shape[1], orig_arr.shape[2], depth_dim), fill_value=1234, dtype=float)
print("Goal shape:", goal_arr.shape)
idx = np.indices(orig_arr.shape)
print("Idx shape", idx.shape)
goal_arr[idx[0], idx[1], idx[2]] = a[orig_arr[idx[0], idx[1], idx[2]]]
return goal_arr
orig_arr_three_dim = np.random.randint(0, upper_count, size=(batch_dim, first_dim, second_dim))
orig_arr_three_dim.shape # (2,5,7)
reshaped = create_input_3d(orig_arr_three_dim)
然后我决定创建自定义层以提高性能并即时进行转换(减少内存):
import tensorflow as tf
from tensorflow import keras
import numpy as np
#custom layer
class CustLayer(keras.layers.Layer):
def __init__(self, info_matrix, first_dim, second_dim, info_dim, batch_size):
super(CustLayer, self).__init__()
self.w = tf.Variable(
initial_value=info_matrix,
trainable=False,
dtype=tf.dtypes.float32
)
self.info_dim = info_dim
self.first_dim = first_dim
self.second_dim = second_dim
self.batch_size = batch_size
def call(self, orig_arr):
goal_arr = tf.Variable(tf.zeros(shape=(self.batch_size, self.first_dim, self.second_dim, self.info_dim), dtype=float))
#loop-approach (slower)
for example in tf.range(self.batch_size):
for row in tf.range(self.first_dim):
for col in tf.range(self.second_dim):
goal_arr[example,row,col].assign(self.w[orig_arr[example, row, col]])
return goal_arr
upper_count = 50
info_length = 10
batch_size = 4
first_dim = 5
second_dim = 7
info_dim = 10
info_dict = {k:np.random.random_sample(size = info_length) for k in range(upper_count)} #toy dict that stores information about
info_matrix = np.array(list(info_dict.values()))
linear_layer = CustLayer(info_matrix, first_dim=first_dim, second_dim=second_dim, info_dim=info_dim, batch_size=batch_size)
test = []
for i in range(batch_size):
test.append(np.random.randint(1,upper_count, size=(first_dim,second_dim)))
test = np.asarray(test)
test.shape # (4, 5, 7)
y= linear_layer(test)
y.shape # TensorShape([4, 5, 7, 10])
由于高级索引(如我第一次发布的代码)不起作用,我 return 转向天真的 for 循环 - 这太慢了。
我正在寻找的是一种使用高级索引的方法,如第一个代码片段中所示,并重新编程为 tf 兼容。这使我能够使用 GPU 进行学习。
简而言之:输入的形状为 (batch_size, first_dim, second_dim)
,return 的形状为 (batch_size, first_dim, second_dim, info_dim)
,摆脱了缓慢的 for 循环。提前致谢。
我看过的其他答案:
对于其他寻找答案的人,这就是我最终想出的答案:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import time
class CustLayer(keras.layers.Layer):
def __init__(self, info_matrix, first_dim, second_dim, info_dim, batch_size):
super(CustLayer, self).__init__()
self.w = tf.Variable(
initial_value=info_matrix,
trainable=False,
dtype=tf.dtypes.float32
)
self.info_matrix = info_matrix
self.info_dim = info_dim
self.first_dim = first_dim
self.second_dim = second_dim
self.batch_size = batch_size
def my_numpy_func(self, x):
# x will be a numpy array with the contents of the input to the
# tf.function
shape = x.shape
goal_arr = np.zeros(shape=(shape[0], shape[1], shape[2], self.info_dim), dtype=np.float32)
# indices to expand
idx = np.indices(shape)
goal_arr[idx[0], idx[1], idx[2]] = self.info_matrix[x[idx[0], idx[1], idx[2]]]
shape_arr = np.array([shape[0], shape[1], shape[2]], dtype=np.int8)
#tf.print("Shape:", shape)
#tf.print("Shape_arr:", shape_arr)
#tf.print("Type:",type(shape_arr))
return goal_arr, shape_arr
@tf.function(input_signature=[tf.TensorSpec((None, 39, 25), tf.int64)])
def tf_function(self, input):
y, shape_arr = tf.numpy_function(self.my_numpy_func, [input], [tf.float32, tf.int8], "Nameless")
#tf.print("shape_arr", shape_arr)
y = tf.reshape(y, shape=(shape_arr[0], shape_arr[1], shape_arr[2], self.info_dim))
return y
def call(self, orig_arr):
return self.tf_function(orig_arr)
注意事项:在 GPU 上运行,但不能在 TPU 上运行。