加载 hdf5 文件并使用 pyqtgraph 显示数据

Loading a hdf5 file and displaying the data with pyqtgraph

我想在 pyqtgraph 的 ImageView() class 中显示 hdf5 文件的数据。显示 ImageView() 绘图的裸代码是:

from pyqtgraph.Qt import QtCore, QtGui
import pyqtgraph as pg

# Interpret image data as row-major instead of col-major
pg.setConfigOptions(leftButtonPan = False, imageAxisOrder='row-major')

app = QtGui.QApplication([])

## Create window with ImageView widget
win = QtGui.QMainWindow()
win.resize(800,800)
imv = pg.ImageView()
win.setCentralWidget(imv)
win.show()
win.setWindowTitle('pyqtgraph example: ImageView')

if __name__ == '__main__':
    import sys
    if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
        QtGui.QApplication.instance().exec_()

然而在 pyqtgraph 示例集中也有一个 hdf5 示例。不幸的是,我无法让它工作。我对示例进行了一些更改以使其满足我的需要,但出现错误。首先是代码:

import numpy as np
import h5py
import pyqtgraph as pg
from pyqtgraph.Qt import QtCore, QtGui

pg.mkQApp()

plt = pg.plot()
plt.setWindowTitle('pyqtgraph example: HDF5 big data')
plt.enableAutoRange(False, False)
plt.setXRange(0, 500)


class HDF5Plot(pg.ImageItem):
    def __init__(self, *args, **kwds):
        self.hdf5 = None
        self.limit = 10000  # maximum number of samples to be plotted
        pg.ImageItem.__init__(self, *args, **kwds)

    def setHDF5(self, data):
        self.hdf5 = data
        self.updateHDF5Plot()

    def viewRangeChanged(self):
        self.updateHDF5Plot()

    def updateHDF5Plot(self):
        if self.hdf5 is None:
            self.setData([])
            return

        vb = self.getViewBox()
        if vb is None:
            return  # no ViewBox yet

        # Determine what data range must be read from HDF5
        xrange = vb.viewRange()[0]
        start = max(0, int(xrange[0]) - 1)
        stop = min(len(self.hdf5), int(xrange[1] + 2))

        # Decide by how much we should downsample
        ds = int((stop - start) / self.limit) + 1

        if ds == 1:
            # Small enough to display with no intervention.
            visible = self.hdf5[start:stop]
            scale = 1
        else:
            # Here convert data into a down-sampled array suitable for visualizing.
            # Must do this piecewise to limit memory usage.
            samples = 1 + ((stop - start) // ds)
            visible = np.zeros(samples * 2, dtype=self.hdf5.dtype)
            sourcePtr = start
            targetPtr = 0

            # read data in chunks of ~1M samples
            chunkSize = (1000000 // ds) * ds
            while sourcePtr < stop - 1:
                chunk = self.hdf5[sourcePtr:min(stop, sourcePtr + chunkSize)]
                sourcePtr += len(chunk)

                # reshape chunk to be integral multiple of ds
                chunk = chunk[:(len(chunk) // ds) * ds].reshape(len(chunk) // ds, ds)

                # compute max and min
                chunkMax = chunk.max(axis=1)
                chunkMin = chunk.min(axis=1)

                # interleave min and max into plot data to preserve envelope shape
                visible[targetPtr:targetPtr + chunk.shape[0] * 2:2] = chunkMin
                visible[1 + targetPtr:1 + targetPtr + chunk.shape[0] * 2:2] = chunkMax
                targetPtr += chunk.shape[0] * 2

            visible = visible[:targetPtr]
            scale = ds * 0.5

        self.setData(visible)  # update the plot
        self.setPos(start, 0)  # shift to match starting index
        self.resetTransform()
        self.scale(scale, 1)  # scale to match downsampling


f = h5py.File('test.hdf5', 'r')
curve = HDF5Plot()
curve.setHDF5(f['data'])
plt.addItem(curve)

## Start Qt event loop unless running in interactive mode or using pyside.
if __name__ == '__main__':

    import sys

    if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
        QtGui.QApplication.instance().exec_()

这里是错误:

Traceback (most recent call last):
  File "pyqtg.py", line 206, in <module>
    curve.setHDF5(f['data'])
  File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
  File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
  File "/home/anaconda3/envs/img/lib/python3.8/site-packages/h5py-3.3.0-py3.8-linux-x86_64.egg/h5py/_hl/group.py", line 305, in __getitem__
    oid = h5o.open(self.id, self._e(name), lapl=self._lapl)
  File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
  File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
  File "h5py/h5o.pyx", line 190, in h5py.h5o.open
KeyError: "Unable to open object (object 'data' doesn't exist)"

问题是我不知道 what/how hdf5 文件的外观,所以我不确定如何用正确的术语替换 'data' 或者它本身是否完全不同。非常感谢任何帮助。

编辑 1: 我从 运行 python -m pyqtgraph.examples 得到了例子。一旦 GUI 在列表中弹出,您将看到“HDF5 大数据”。我的代码源于那个例子。从示例中,顶部的第三个 ImageView 是我想用来显示 HDF5 文件的代码。

编辑 2: 这是代码 kcw78 第二部分 运行 的结果: http://pastie.org/p/3scRyUm1ZFVJNMwTHQHCBv

编辑 3: 所以我 运行 上面的代码但是在 kcw78 的帮助下做了一个小改动。我改变了:

f = h5py.File('test.hdf5', 'r')
curve = HDF5Plot()
curve.setHDF5(f['data'])
plt.addItem(curve)

至:

with h5py.File('test.hdf5', 'r') as h5f:
    curve = HDF5Plot()
    curve.setHDF5(h5f['aggea'])
    plt.addItem(curve)

并得到错误:

Traceback (most recent call last):
  File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsObject.py", line 23, in itemChange
    self.parentChanged()
  File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsItem.py", line 458, in parentChanged
    self._updateView()
  File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsItem.py", line 514, in _updateView
    self.viewRangeChanged()
  File "pyqtg.py", line 25, in viewRangeChanged
    self.updateHDF5Plot()
  File "pyqtg.py", line 77, in updateHDF5Plot
    self.setData(visible)  # update the plot
TypeError: setData(self, int, Any): argument 1 has unexpected type 'numpy.ndarray'
Traceback (most recent call last):
  File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsObject.py", line 23, in itemChange
    self.parentChanged()
  File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsItem.py", line 458, in parentChanged
    self._updateView()
  File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsItem.py", line 514, in _updateView
    self.viewRangeChanged()
  File "pyqtg.py", line 25, in viewRangeChanged
    self.updateHDF5Plot()
  File "pyqtg.py", line 77, in updateHDF5Plot
    self.setData(visible)  # update the plot
TypeError: setData(self, int, Any): argument 1 has unexpected type 'numpy.ndarray'
Traceback (most recent call last):
  File "pyqtg.py", line 25, in viewRangeChanged
    self.updateHDF5Plot()
  File "pyqtg.py", line 77, in updateHDF5Plot
    self.setData(visible)  # update the plot
TypeError: setData(self, int, Any): argument 1 has unexpected type 'numpy.ndarray'

编辑 4:

这是结果的照片:https://imgur.com/a/tVHNdx9。我从创建 2d hdf5 文件和使用我的 2d 数据文件得到相同的空结果。

with h5py.File('mytest.hdf5', 'r') as h5fr, \
     h5py.File('test_1d.hdf5', 'w') as h5fw:
    arr = h5fr['aggea'][:].reshape(-1,)
    h5fw.create_dataset('data', data=arr)
    print(h5fw['data'].shape, h5fw['data'].dtype)

编辑 5:运行和绘图的代码

import sys, os
import numpy as np
import h5py
import pyqtgraph as pg
from pyqtgraph.Qt import QtCore, QtGui

pg.mkQApp()

plt = pg.plot()
plt.setWindowTitle('pyqtgraph example: HDF5 big data')
plt.enableAutoRange(False, False)
plt.setXRange(0, 500)

class HDF5Plot(pg.PlotCurveItem):
    def __init__(self, *args, **kwds):
        self.hdf5 = None
        self.limit = 10000  # maximum number of samples to be plotted
        pg.PlotCurveItem.__init__(self, *args, **kwds)

    def setHDF5(self, data):
        self.hdf5 = data
        self.updateHDF5Plot()

    def viewRangeChanged(self):
        self.updateHDF5Plot()

    def updateHDF5Plot(self):
        if self.hdf5 is None:
            self.setData([])
            return

        vb = self.getViewBox()
        if vb is None:
            return  # no ViewBox yet

        # Determine what data range must be read from HDF5
        xrange = vb.viewRange()[0]
        start = max(0, int(xrange[0]) - 1)
        stop = min(len(self.hdf5), int(xrange[1] + 2))

        # Decide by how much we should downsample
        ds = int((stop - start) / self.limit) + 1

        if ds == 1:
            # Small enough to display with no intervention.
            visible = self.hdf5[start:stop]
            scale = 1
        else:
            # Here convert data into a down-sampled array suitable for visualizing.
            # Must do this piecewise to limit memory usage.
            samples = 1 + ((stop - start) // ds)
            visible = np.zeros(samples * 2, dtype=self.hdf5.dtype)
            sourcePtr = start
            targetPtr = 0

            # read data in chunks of ~1M samples
            chunkSize = (1000000 // ds) * ds
            while sourcePtr < stop - 1:
                chunk = self.hdf5[sourcePtr:min(stop, sourcePtr + chunkSize)]
                sourcePtr += len(chunk)

                # reshape chunk to be integral multiple of ds
                chunk = chunk[:(len(chunk) // ds) * ds].reshape(len(chunk) // ds, ds)

                # compute max and min
                chunkMax = chunk.max(axis=1)
                chunkMin = chunk.min(axis=1)

                # interleave min and max into plot data to preserve envelope shape
                visible[targetPtr:targetPtr + chunk.shape[0] * 2:2] = chunkMin
                visible[1 + targetPtr:1 + targetPtr + chunk.shape[0] * 2:2] = chunkMax
                targetPtr += chunk.shape[0] * 2

            visible = visible[:targetPtr]
            scale = ds * 0.5

        self.setData(visible)  # update the plot
        self.setPos(start, 0)  # shift to match starting index
        self.resetTransform()
        self.scale(scale, 1)  # scale to match downsampling


with h5py.File('mytest.hdf5', 'r') as h5fr, \
     h5py.File('test_1d.hdf5', 'w') as h5fw:
    arr = h5fr['aggea'][:].reshape(-1,)
    h5fw.create_dataset('data', data=arr)
    curve = HDF5Plot()
    curve.setHDF5(h5fw['data'])
    plt.addItem(curve)

## Start Qt event loop unless running in interactive mode or using pyside.
if __name__ == '__main__':

    import sys

    if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
        QtGui.QApplication.instance().exec_()

编辑 6: 最终的效果:

from pyqtgraph.Qt import QtGui, QtCore
import numpy as np
import h5py
import pyqtgraph as pg
import matplotlib.pyplot as plt

app = QtGui.QApplication([])

win = QtGui.QMainWindow()
win.resize(800,800)
imv = pg.ImageView()
win.setCentralWidget(imv)
win.show()
win.setWindowTitle('pyqtgraph example: ImageView')

with h5py.File('test.hdf5', 'r') as h5fr:
    data = h5fr.get('aggea')[()] #this gets the values. You can also use hf.get('dataset_name').value as this gives insight what `[()]` is doing, though it's deprecated
    imv.setImage(data)

    # hf = h5py.File('test.hdf5', 'r')
    # n1 = np.array(hf['/pathtodata'][:])
    # print(n1.shape)
## Set a custom color map
colors = [
    (0, 0, 0),
    (45, 5, 61),
    (84, 42, 55),
    (150, 87, 60),
    (208, 171, 141),
    (255, 255, 255)
]
cmap = pg.ColorMap(pos=np.linspace(0.0, 1.0, 6), color=colors)
imv.setColorMap(cmap)

## Start Qt event loop unless running in interactive mode.
if __name__ == '__main__':
    import sys
    if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
        QtGui.QApplication.instance().exec_()

该错误表明您的 HDF5 文件中不存在数据集 'data'。所以,我们必须弄清楚为什么它不存在。 :-) 你没有说你在哪里找到你的例子 运行。我在 pyqtgraph/examples 存储库中找到的那个具有在函数 def createFile(finalSize=2000000000):.
中创建文件的代码 我假设您 运行 此代码创建 test.hdf5?
如果您没有使用示例代码创建文件,那么您从哪里得到 test.hdf5?
无论哪种方式,这里都有一些代码可以查询您的 HDF5 文件。它将为我们提供数据集名称和属性(形状和数据类型)。有了这些信息,我们就可以确定接下来的步骤。

import h5py
# define h5f as a h5py file object:
h5f = h5py.File('test.hdf5', 'r')
# print the dataset names and attributes
for ds in h5f.keys():
    if isinstance(h5f[ds], h5py.Group):
        print(f'{ds} is a Group')
    elif isinstance(h5f[ds], h5py.Dataset):
        print(f'{ds} is Dataset')   
        print(f'shape={h5f[ds].shape}, dtype={h5f[ds].dtype}')
    else:
        print(f'Object: {ds} is not a Group or a Dataset') 
h5f.close()

示例中 test.hdf5 的预期输出:

# data is Dataset
# shape=(501000000,), dtype=float32 

下面是一个更通用的方法,使用.visititems()方法递归检查层次结构中的所有对象。我 prefer/recommend 当您将数据集组织在组中时。注意:它使用 Python 的 with/as 上下文管理器,这是首选。请注意如何 f.close() 不是必需的。它会自动处理安装和拆卸操作,因此如果您的代码意外退出,文件不会保持打开状态。

import h5py
def visitor_func(name, node):
    if isinstance(node, h5py.Group):
        print(node.name, 'is a Group')
    elif isinstance(node, h5py.Dataset):
       if (node.dtype == 'object') :
            print (node.name, 'is an object Dataset')
       else:
            print(node.name, 'is a Dataset')   
    else:
        print(node.name, 'is an unknown type')    
     
#####  main  #####
with h5py.File('test.hdf5', 'r') as h5f:  
    h5f.visititems(visitor_func)  

2021-08-20 添加:
与示例 .hdf5 文件相比,我们需要弄清楚为什么您的数据会出现不同的行为。 运行 这个小代码段与您的文件。

import h5py
h5f = h5py.File('test.hdf5', 'r')
ds = '/system.soft.avtcams.glasscellvert/absK40'
print(f'shape={h5f[ds].shape}, dtype={h5f[ds].dtype}')
print(f'type={type(h5f[ds])}')
h5f.close()

作为参考,这是我通过示例 test.hdf5 文件获得的输出:

# shape=(501000000,), dtype=float32 
# type=h5py._hl.dataset.Dataset

2021-08-21 添加:
这是一个测试,看看您是否可以将 NumPy 数组用于曲线数据。它读取 HDF5 数据集并将二维数据集整形为一维数组。希望这有效。如果没有,那我就没办法了,你得找 pyqtgraph 有专业知识的人来诊断问题。

with h5py.File('test.hdf5', 'r') as h5f:
    curve = HDF5Plot()
    arr = h5f['/system.soft.avtcams.glasscellvert/absK40'][:].reshape(-1,)
    curve.setHDF5(arr)
    plt.addItem(curve)

这是基于上述评论中问答的新答案。

除了我评论中描述的编码错误外,示例中还有一个限制 (hdf5.py)。它旨在展示如何读取“大数据”——当数据不适合内存时。它通过分块读取数据集和下采样(由 self.limit = 10000 定义)来实现。结果,它只从一维数据集中读取数据。有一种解决方法可以按原样使用代码处理您的数据:创建一个小实用程序来提取感兴趣的数据集,重塑为一维数组并复制到新的 hdf5 文件。 (这假设感兴趣的数据集适合内存。在某些时候,需要对 HDF5 数据与 NumPy 数组进行更长时间的讨论。)

为了演示此行为,我创建了一个新的 hdf5 文件,其中包含模仿 OP 数据的二维数据集 (shape=(1038, 1388), dtype=uint16)。这是创建文件(名为 test_2d.hdf5)的代码:

import h5py
import numpy as np    
chunk = np.random.random_integers(65535,size=(1038,1388)).astype(np.uint16)
# chunk = np.random.normal(size=1038*1388).astype(np.uint16).reshape(1038, 1388)    
with h5py.File('test_2d.hdf5', 'w') as h5f:
    h5f.create_dataset('data', data=chunk)
    print(h5f['data'].shape, h5f['data'].dtype)

创建此文件后,使用下面的代码将该数据提取到我们将用于绘制数据的新文件中。 (假设这有效,这就是“解决方案”。您可以修改它以从测试文件中提取数据,然后使用该文件从新文件中读取和绘图。)

import h5py
with h5py.File('test_2d.hdf5', 'r') as h5fr, \
     h5py.File('test_1d.hdf5', 'w') as h5fw:
    arr = h5fr['data'][:].reshape(-1,)    
    h5fw.create_dataset('data', data=arr)
    print(h5fw['data'].shape, h5fw['data'].dtype)

最后,这是对原始 post 稍作修改的代码。它与您的类似,我在 class HDF5Plot() 声明的注释中描述了更改(使用 pg.PlotCurveItem 而不是 pg.ImageItem)。该更改是触发此错误消息的原因:TypeError: setData(self, int, Any): argument 1 has unexpected type 'numpy.ndarray'

绘图代码:

import sys, os
import numpy as np
import h5py
import pyqtgraph as pg
from pyqtgraph.Qt import QtCore, QtGui

pg.mkQApp()

plt = pg.plot()
plt.setWindowTitle('pyqtgraph example: HDF5 big data')
plt.enableAutoRange(False, False)
plt.setXRange(0, 500)

class HDF5Plot(pg.PlotCurveItem):
    def __init__(self, *args, **kwds):
        self.hdf5 = None
        self.limit = 10000  # maximum number of samples to be plotted
        pg.PlotCurveItem.__init__(self, *args, **kwds)

    def setHDF5(self, data):
        self.hdf5 = data
        self.updateHDF5Plot()

    def viewRangeChanged(self):
        self.updateHDF5Plot()

    def updateHDF5Plot(self):
        if self.hdf5 is None:
            self.setData([])
            return

        vb = self.getViewBox()
        if vb is None:
            return  # no ViewBox yet

        # Determine what data range must be read from HDF5
        xrange = vb.viewRange()[0]
        start = max(0, int(xrange[0]) - 1)
        stop = min(len(self.hdf5), int(xrange[1] + 2))

        # Decide by how much we should downsample
        ds = int((stop - start) / self.limit) + 1

        if ds == 1:
            # Small enough to display with no intervention.
            visible = self.hdf5[start:stop]
            scale = 1
        else:
            # Here convert data into a down-sampled array suitable for visualizing.
            # Must do this piecewise to limit memory usage.
            samples = 1 + ((stop - start) // ds)
            visible = np.zeros(samples * 2, dtype=self.hdf5.dtype)
            sourcePtr = start
            targetPtr = 0

            # read data in chunks of ~1M samples
            chunkSize = (1000000 // ds) * ds
            while sourcePtr < stop - 1:
                chunk = self.hdf5[sourcePtr:min(stop, sourcePtr + chunkSize)]
                sourcePtr += len(chunk)

                # reshape chunk to be integral multiple of ds
                chunk = chunk[:(len(chunk) // ds) * ds].reshape(len(chunk) // ds, ds)

                # compute max and min
                chunkMax = chunk.max(axis=1)
                chunkMin = chunk.min(axis=1)

                # interleave min and max into plot data to preserve envelope shape
                visible[targetPtr:targetPtr + chunk.shape[0] * 2:2] = chunkMin
                visible[1 + targetPtr:1 + targetPtr + chunk.shape[0] * 2:2] = chunkMax
                targetPtr += chunk.shape[0] * 2

            visible = visible[:targetPtr]
            scale = ds * 0.5

        self.setData(visible)  # update the plot
        self.setPos(start, 0)  # shift to match starting index
        self.resetTransform()
        self.scale(scale, 1)  # scale to match downsampling

f = h5py.File('test_1d.hdf5', 'r')
curve = HDF5Plot()
curve.setHDF5(f['data'])
plt.addItem(curve)

## Start Qt event loop unless running in interactive mode or using pyside.
if __name__ == '__main__':

    import sys

    if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
        QtGui.QApplication.instance().exec_()

根据我的随机数据创建的图。

这是一个简单的示例,展示了如何从 HDF5 文件中提取数据(作为二维数组)并使用 pyqtgraph 绘图。它还从 NumPy 数组创建一个图(用于比较目的——这两种方法几乎相同)。

  • 为前 2 个图中的 x=y= 值生成“随机”NumPy 数组数据。
  • x=y= 值是从 HDF5 文件中读取的第二对 地块。

只需稍作改动即可使用您的数据。需要更改:1) HDF5 文件名,和 2) 数据集名称。您将必须弄清楚如何将数据从 shape=(1038,1388) 重塑为 X 和 Y 数据的适当形状的数组。

代码如下:

from pyqtgraph.Qt import QtGui, QtCore
import numpy as np
import h5py
import pyqtgraph as pg

# create some HDF5 data in a 2-d array of X,Y pairs
with h5py.File('plot_2d_data.h5','w') as h5f:
    data = h5f.create_dataset('data',shape=(100,2))
    data[:,0] = np.arange(0.0,10.0,0.1) ## X data points
    data[:,1] = np.random.normal(size=100) ## Y data points

app = QtGui.QApplication([])

win = pg.GraphicsLayoutWidget(show=True, title="2-D plot examples")
win.resize(1000,600)
win.setWindowTitle('pyqtgraph example: 2D Plotting')

# Enable antialiasing for prettier plots
pg.setConfigOptions(antialias=True)

p1 = win.addPlot(title="Plot of NumPy data", 
                 x=np.arange(0.0,10.0,0.1), y=np.random.normal(size=100))

p2 = win.addPlot(title="NumPy data with Points", 
                 x=np.arange(0.0,10.0,0.1), y=np.random.normal(size=100),
                 pen=(255,0,0), symbolBrush=(255,0,0))

win.nextRow()

with h5py.File('plot_2d_data.h5','r') as h5f:
    
    p3 = win.addPlot(title="Plot of HDF5 data", 
                     x=h5f['data'][:,0], y=h5f['data'][:,1])

    p4 = win.addPlot(title="HDF5 data with Points", 
                     x=h5f['data'][:,0], y=h5f['data'][:,1],
                     pen=(0,0,255), symbolBrush=(0,0,255))

## Start Qt event loop unless running in interactive mode or using pyside.
if __name__ == '__main__':
    import sys
    if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
        QtGui.QApplication.instance().exec_()