使用 numpy 进行矢量化

Vectorization using numpy

我从 matlab.It 翻译了这段代码是更大的代码体的一部分但是我想获得一些关于如何矢量化这一部分以使其更快的建议。我主要关心的是 for 循环和 if 语句。如果可能的话,我想在不使用 if else 语句的情况下编写它。 (Jax 无法 jit if 条件)。谢谢

import numpy as np

num_rows = 5
num_cols = 20
smf = np.array([np.inf, 0.1, 0.1, 0.1, 0.1])
par_init = np.array([1,2,3,4,5])
lb = np.array([0.1, 0.1, 0.1, 0.1, 0.1])
ub = np.array([10, 10, 10, 10, 10])
par = np.broadcast_to(par_init[:,None],(num_rows,num_cols))

print(par.shape)

par0_col = np.zeros(num_rows*num_cols - (num_cols-1) * np.sum(np.isinf(smf)))
lb_col = np.zeros(num_rows*num_cols - (num_cols-1) * np.sum(np.isinf(smf)))
ub_col = np.zeros(num_rows*num_cols- (num_cols-1) * np.sum(np.isinf(smf)))


# First looping
k = 0
for i in range(num_rows):
    smf_1 = smf.copy()    
    if  smf_1[i] == np.inf:
        
        
        par0_col[k] = par[i, 0]
        lb_col[k] = lb[i]
        ub_col[k] = ub[i]
        k = k+1
        
    else:
        par0_col[k:k+num_cols] = par[i, :num_cols]
        lb_col[k:k+num_cols] = lb[i]
        ub_col[k:k+num_cols] = ub[i]
        k = k+num_cols


arr_1 = np.zeros(shape = (num_rows, num_cols))
arr_2 = np.zeros(shape = (num_rows, num_cols))


par_log = np.log10((par0_col - lb_col) / (1 - par0_col / ub_col))

# second looping
k = 0
for i in range(num_rows):
    smf_1 = smf.copy()
    if np.isinf(smf_1[i]):
        smf_1[i] = 0
        arr_1[i, :] = (par_log[k])
        arr_2[i, :] = 10**par_log[k]
        k = k+1
    
    else:
        arr_1[i, :] = par_log[k:k+num_cols]
    
        arr_2[i, :] = 10**par_log[k:k+num_cols]
        
        
        k = k+num_cols

# print(arr_1)

# [[0.         0.         0.         0.         0.         0.
#   0.         0.         0.         0.         0.         0.
#   0.         0.         0.         0.         0.         0.
#   0.         0.        ]
#  [0.37566361 0.37566361 0.37566361 0.37566361 0.37566361 0.37566361
#   0.37566361 0.37566361 0.37566361 0.37566361 0.37566361 0.37566361
#   0.37566361 0.37566361 0.37566361 0.37566361 0.37566361 0.37566361
#   0.37566361 0.37566361]
#  [0.61729996 0.61729996 0.61729996 0.61729996 0.61729996 0.61729996
#   0.61729996 0.61729996 0.61729996 0.61729996 0.61729996 0.61729996
#   0.61729996 0.61729996 0.61729996 0.61729996 0.61729996 0.61729996
#   0.61729996 0.61729996]
#  [0.81291336 0.81291336 0.81291336 0.81291336 0.81291336 0.81291336
#   0.81291336 0.81291336 0.81291336 0.81291336 0.81291336 0.81291336
#   0.81291336 0.81291336 0.81291336 0.81291336 0.81291336 0.81291336
#   0.81291336 0.81291336]
#  [0.99122608 0.99122608 0.99122608 0.99122608 0.99122608 0.99122608
#   0.99122608 0.99122608 0.99122608 0.99122608 0.99122608 0.99122608
#   0.99122608 0.99122608 0.99122608 0.99122608 0.99122608 0.99122608
#   0.99122608 0.99122608]]

完全矢量化是一项挑战,但我认为这是可能的。这是一个删除第一个循环的 if/else 逻辑的开始。诀窍是预先计算 k 值(smf_1 是微不足道的,所以我删除了它):

num_rows = 5
num_cols = 20
smf = np.array([np.inf, 0.1, 0.1, 0.1, 0.1])
par_init = np.array([1,2,3,4,5])
lb = np.array([0.1, 0.1, 0.1, 0.1, 0.1])
ub = np.array([10, 10, 10, 10, 10])
par = np.broadcast_to(par_init[:,None],(num_rows,num_cols))

kvals = np.where(np.isinf(smf), 1, num_cols)
kvals = np.insert(kvals, 0, 0)
kvals = np.cumsum(kvals)

par0_col = np.zeros(num_rows*num_cols - (num_cols-1) * np.sum(np.isinf(smf)))
lb_col = np.zeros(num_rows*num_cols - (num_cols-1) * np.sum(np.isinf(smf)))
ub_col = np.zeros(num_rows*num_cols- (num_cols-1) * np.sum(np.isinf(smf)))
# First looping
for i in range(num_rows):
    par0_col[kvals[i]:kvals[i+1]] = par[i, :kvals[i+1]-kvals[i]]
    lb_col[kvals[i]:kvals[i+1]] = lb[i]
    ub_col[kvals[i]:kvals[i+1]] = ub[i]

假设同样的模式应该适用于第二个循环,但没有仔细观察。如果可以对其进行完全矢量化(即删除 for 循环),那么这就是我所理解的前进道路。