hdf5 数组的点积

Dot product of hdf5 array

我正在制作神经网络,其输入数组为 hdf5 数组 X,权重矩阵为 W1。我正在尝试如下所示对这 2 个进行点积并将其存储在其他 hdf5 数组中。

f = h5py.File('z2.hdf5')
self.f['z2'] = np.dot(X,self.W1)

但是上面一行给出了 MemoryError。如何解决?如何对hdf5数组进行点积?

来自 http://dask.pydata.org/en/latest/array-overview.html

"""Dask Array implements a subset of the NumPy ndarray interface using blocked algorithms, cutting up the large array into many small arrays. This lets us compute on arrays larger than memory using all of our cores."""

""" dask.array 库支持来自 numpy 的以下接口:

...

张量收缩/点积/矩阵乘法,tensordot"""

用于说明的工作示例,尝试不同的维度以查看 numpy 与 dask 的性能。

import dask as dk
import tables
import numpy as np
from time import time

outpath = "/tmp/"
lenx = 300
leny = 100000
fname = "t{0:03d}_{1:03d}.h5".format(int(lenx/100),int(leny/100))

def write_test_file():
    h5file = tables.open_file(outpath+fname,"w")
    pres = np.random.random((lenx,leny))
    atom = tables.Float64Atom()
    filters = tables.Filters(complevel=6, complib='zlib', shuffle=True)
    print("Writing data")
    t01 = time()
    h5file.create_carray(h5file.root,'pressure',atom,(lenx,leny),filters=filters,obj=pres)
    h5file.flush()
    del pres
    t02 = time()
    lines = np.random.random((leny,lenx))
    h5file.create_carray(h5file.root,"lines",atom,(leny,lenx),filters=filters,obj=lines)
    t03 = time()
    print("Data written",t03-t02,t02-t01)
    h5file.close()

def numpy_dot_test():
    print("Open data")
    t1 = time()
    h5open = tables.open_file(outpath+fname,mode="r")
    pressureObject = h5open.get_node("/", "pressure")
    print(pressureObject.shape)
    linesObject=h5open.get_node("/","lines")
    print(linesObject.shape)
    t2 = time()
    ohoo = np.array(linesObject).dot(np.array(pressureObject))
    t3 = time()
    print(ohoo.shape,np.mean(ohoo))
    print("matmul time:",t3-t2,t2-t1)
    h5open.close()

def dask_dot_test():
    import h5py
    import dask.array as da
    h5open2 = h5py.File(outpath+fname)
    t21=time()
    d1=da.from_array(h5open2["/pressure"],chunks=(100,lenx))
    d2=da.from_array(h5open2["/lines"],chunks=(leny,100))
    t22=time()
    print('d1,d2',d1.shape,d2.shape)
    d1.dot(d2).to_hdf5(outpath+'output.h5','testout')
    t23=time()
    print('ohoo',t23-t22,t22-t21)
    h5open2.close()


write_test_file()
    ## numpy_dot_test()
dask_dot_test()