Python 中尺度矩阵(协方差矩阵)的高效计算
Efficient computation of a scale matrix (covariance matrix) in Python
下面的函数计算多变量时间序列的时间间隔 (t0,t1)
的尺度矩阵(协方差矩阵)。我想重写此函数,使其不需要列表或 for 循环。有没有办法只使用 numpy 数组操作来执行以下操作?看来我需要 numpy.outer
的一个版本,它接受二维数组作为输入,然后沿着指定的轴获取外部产品。但是我在numpy中找不到这样的函数。
import numpy as np
def scale_matrix(multivariate_time_series, t0=0, t1=0):
# multivariate_time_series is a 2d array.
if t1==0:
t1 = len(multivariate_time_series)
a = np.mean([np.outer(multivariate_time_series[t,:],multivariate_time_series[t,:])
for t in range(t0,t1)], axis=0)
return a
您可以使用矩阵乘法或einsum
:
>>> data = np.random.random((20, 5))
>>> t0 = t1 = 0
>>> data_r = data[t0:t1 or len(data)]
>>>
>>> data_r.T@data_r/data_r.shape[0]
array([[0.31445868, 0.15057765, 0.25087819, 0.26003647, 0.24403643],
[0.15057765, 0.32387482, 0.25741824, 0.27916451, 0.26457779],
[0.25087819, 0.25741824, 0.38244811, 0.31093482, 0.30124948],
[0.26003647, 0.27916451, 0.31093482, 0.39589237, 0.30220028],
[0.24403643, 0.26457779, 0.30124948, 0.30220028, 0.3548833 ]])
>>>
>>> np.einsum('ij,ik->jk', data_r, data_r)/data_r.shape[0]
array([[0.31445868, 0.15057765, 0.25087819, 0.26003647, 0.24403643],
[0.15057765, 0.32387482, 0.25741824, 0.27916451, 0.26457779],
[0.25087819, 0.25741824, 0.38244811, 0.31093482, 0.30124948],
[0.26003647, 0.27916451, 0.31093482, 0.39589237, 0.30220028],
[0.24403643, 0.26457779, 0.30124948, 0.30220028, 0.3548833 ]])
>>>
>>> scale_matrix(data, t0, t1)
array([[0.31445868, 0.15057765, 0.25087819, 0.26003647, 0.24403643],
[0.15057765, 0.32387482, 0.25741824, 0.27916451, 0.26457779],
[0.25087819, 0.25741824, 0.38244811, 0.31093482, 0.30124948],
[0.26003647, 0.27916451, 0.31093482, 0.39589237, 0.30220028],
[0.24403643, 0.26457779, 0.30124948, 0.30220028, 0.3548833 ]])
下面的函数计算多变量时间序列的时间间隔 (t0,t1)
的尺度矩阵(协方差矩阵)。我想重写此函数,使其不需要列表或 for 循环。有没有办法只使用 numpy 数组操作来执行以下操作?看来我需要 numpy.outer
的一个版本,它接受二维数组作为输入,然后沿着指定的轴获取外部产品。但是我在numpy中找不到这样的函数。
import numpy as np
def scale_matrix(multivariate_time_series, t0=0, t1=0):
# multivariate_time_series is a 2d array.
if t1==0:
t1 = len(multivariate_time_series)
a = np.mean([np.outer(multivariate_time_series[t,:],multivariate_time_series[t,:])
for t in range(t0,t1)], axis=0)
return a
您可以使用矩阵乘法或einsum
:
>>> data = np.random.random((20, 5))
>>> t0 = t1 = 0
>>> data_r = data[t0:t1 or len(data)]
>>>
>>> data_r.T@data_r/data_r.shape[0]
array([[0.31445868, 0.15057765, 0.25087819, 0.26003647, 0.24403643],
[0.15057765, 0.32387482, 0.25741824, 0.27916451, 0.26457779],
[0.25087819, 0.25741824, 0.38244811, 0.31093482, 0.30124948],
[0.26003647, 0.27916451, 0.31093482, 0.39589237, 0.30220028],
[0.24403643, 0.26457779, 0.30124948, 0.30220028, 0.3548833 ]])
>>>
>>> np.einsum('ij,ik->jk', data_r, data_r)/data_r.shape[0]
array([[0.31445868, 0.15057765, 0.25087819, 0.26003647, 0.24403643],
[0.15057765, 0.32387482, 0.25741824, 0.27916451, 0.26457779],
[0.25087819, 0.25741824, 0.38244811, 0.31093482, 0.30124948],
[0.26003647, 0.27916451, 0.31093482, 0.39589237, 0.30220028],
[0.24403643, 0.26457779, 0.30124948, 0.30220028, 0.3548833 ]])
>>>
>>> scale_matrix(data, t0, t1)
array([[0.31445868, 0.15057765, 0.25087819, 0.26003647, 0.24403643],
[0.15057765, 0.32387482, 0.25741824, 0.27916451, 0.26457779],
[0.25087819, 0.25741824, 0.38244811, 0.31093482, 0.30124948],
[0.26003647, 0.27916451, 0.31093482, 0.39589237, 0.30220028],
[0.24403643, 0.26457779, 0.30124948, 0.30220028, 0.3548833 ]])