Numpy 滚动列相关
Numpy Rolling Columwise Correlation
我有两个形状相同的矩阵:
import numpy as np
from scipy.stats import pearsonr
np.random.seed(10)
a = np.random.random(30).reshape(10,3)
b = np.random.random(30).reshape(10,3)
即 10 行和 3 列。我需要每个矩阵中具有相同列索引的列的 rolling 相关性。慢的方式是:
def roll_corr((a, b), window):
out = np.ones_like(a)*np.nan
for i in xrange(window-1, a.shape[0]):
#print "%d --> %d" % ((i-(window-1)), i)
for j in xrange(a.shape[1]):
out[i, j] = pearsonr(
a[(i-(window-1)):(i), j], b[(i-(window-1)):(i), j]
)[0]
return out
roll_corr((a, b), 5)
的结果如我所愿,
array([[ nan, nan, nan],
[ nan, nan, nan],
[ nan, nan, nan],
[ nan, nan, nan],
[ 0.28810753, 0.27836622, 0.88397851],
[-0.04076151, 0.45254981, 0.83259104],
[ 0.62262963, -0.4188768 , 0.35479134],
[ 0.13130652, -0.91441413, -0.21713372],
[ 0.54327228, -0.91390053, -0.84033286],
[ 0.45268257, -0.95245888, -0.50107515]])
问题是:是否有更惯用的 numpy 方法来做到这一点?矢量化?大步走技巧? Numba?
我已经搜索过了,但没有找到这个。我不想使用 pandas;必须是麻木的。
我们可以利用 np.lib.stride_tricks.as_strided
based scikit-image's view_as_windows
to get sliding windows.
因此,我们将有一个基于 corr2_coeff_rowwise
的解决方案,就像这样 -
from skimage.util import view_as_windows
A = view_as_windows(a,(window,1))[...,0]
B = view_as_windows(b,(window,1))[...,0]
A_mA = A - A.mean(-1, keepdims=True)
B_mB = B - B.mean(-1, keepdims=True)
## Sum of squares across rows
ssA = (A_mA**2).sum(-1) # or better : np.einsum('ijk,ijk->ij',A_mA,A_mA)
ssB = (B_mB**2).sum(-1) # or better : np.einsum('ijk,ijk->ij',B_mB,B_mB)
## Finally get corr coeff
out = np.full(a.shape, np.nan)
out[window-1:] = np.einsum('ijk,ijk->ij',A_mA,B_mB)/np.sqrt(ssA*ssB)
您可以使用 pandas.rolling_curr() 函数生成相关性。不过,我不明白为什么它们会给出不同的输出。
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
np.random.seed(10)
a = np.random.random(30).reshape(10,3)
b = np.random.random(30).reshape(10,3)
a_1 = pd.DataFrame(a)
b_1 = pd.DataFrame(b)
print pd.rolling_corr(arg1=a_1, arg2=b_1, window=5)
# OUTPUT
===============================
0 1 2
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
3 NaN NaN NaN
4 0.441993 0.254435 0.707801
5 0.314446 0.233392 0.425191
6 0.243755 -0.441434 0.352801
7 0.281139 -0.864357 -0.192409
8 0.543645 -0.925822 -0.563786
9 0.445918 -0.784808 -0.532234
我有两个形状相同的矩阵:
import numpy as np
from scipy.stats import pearsonr
np.random.seed(10)
a = np.random.random(30).reshape(10,3)
b = np.random.random(30).reshape(10,3)
即 10 行和 3 列。我需要每个矩阵中具有相同列索引的列的 rolling 相关性。慢的方式是:
def roll_corr((a, b), window):
out = np.ones_like(a)*np.nan
for i in xrange(window-1, a.shape[0]):
#print "%d --> %d" % ((i-(window-1)), i)
for j in xrange(a.shape[1]):
out[i, j] = pearsonr(
a[(i-(window-1)):(i), j], b[(i-(window-1)):(i), j]
)[0]
return out
roll_corr((a, b), 5)
的结果如我所愿,
array([[ nan, nan, nan],
[ nan, nan, nan],
[ nan, nan, nan],
[ nan, nan, nan],
[ 0.28810753, 0.27836622, 0.88397851],
[-0.04076151, 0.45254981, 0.83259104],
[ 0.62262963, -0.4188768 , 0.35479134],
[ 0.13130652, -0.91441413, -0.21713372],
[ 0.54327228, -0.91390053, -0.84033286],
[ 0.45268257, -0.95245888, -0.50107515]])
问题是:是否有更惯用的 numpy 方法来做到这一点?矢量化?大步走技巧? Numba?
我已经搜索过了,但没有找到这个。我不想使用 pandas;必须是麻木的。
我们可以利用 np.lib.stride_tricks.as_strided
based scikit-image's view_as_windows
to get sliding windows.
因此,我们将有一个基于 corr2_coeff_rowwise
的解决方案,就像这样 -
from skimage.util import view_as_windows
A = view_as_windows(a,(window,1))[...,0]
B = view_as_windows(b,(window,1))[...,0]
A_mA = A - A.mean(-1, keepdims=True)
B_mB = B - B.mean(-1, keepdims=True)
## Sum of squares across rows
ssA = (A_mA**2).sum(-1) # or better : np.einsum('ijk,ijk->ij',A_mA,A_mA)
ssB = (B_mB**2).sum(-1) # or better : np.einsum('ijk,ijk->ij',B_mB,B_mB)
## Finally get corr coeff
out = np.full(a.shape, np.nan)
out[window-1:] = np.einsum('ijk,ijk->ij',A_mA,B_mB)/np.sqrt(ssA*ssB)
您可以使用 pandas.rolling_curr() 函数生成相关性。不过,我不明白为什么它们会给出不同的输出。
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
np.random.seed(10)
a = np.random.random(30).reshape(10,3)
b = np.random.random(30).reshape(10,3)
a_1 = pd.DataFrame(a)
b_1 = pd.DataFrame(b)
print pd.rolling_corr(arg1=a_1, arg2=b_1, window=5)
# OUTPUT
===============================
0 1 2
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
3 NaN NaN NaN
4 0.441993 0.254435 0.707801
5 0.314446 0.233392 0.425191
6 0.243755 -0.441434 0.352801
7 0.281139 -0.864357 -0.192409
8 0.543645 -0.925822 -0.563786
9 0.445918 -0.784808 -0.532234