加载 hdf5 文件并使用 pyqtgraph 显示数据
Loading a hdf5 file and displaying the data with pyqtgraph
我想在 pyqtgraph 的 ImageView() class 中显示 hdf5 文件的数据。显示 ImageView() 绘图的裸代码是:
from pyqtgraph.Qt import QtCore, QtGui
import pyqtgraph as pg
# Interpret image data as row-major instead of col-major
pg.setConfigOptions(leftButtonPan = False, imageAxisOrder='row-major')
app = QtGui.QApplication([])
## Create window with ImageView widget
win = QtGui.QMainWindow()
win.resize(800,800)
imv = pg.ImageView()
win.setCentralWidget(imv)
win.show()
win.setWindowTitle('pyqtgraph example: ImageView')
if __name__ == '__main__':
import sys
if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
QtGui.QApplication.instance().exec_()
然而在 pyqtgraph 示例集中也有一个 hdf5 示例。不幸的是,我无法让它工作。我对示例进行了一些更改以使其满足我的需要,但出现错误。首先是代码:
import numpy as np
import h5py
import pyqtgraph as pg
from pyqtgraph.Qt import QtCore, QtGui
pg.mkQApp()
plt = pg.plot()
plt.setWindowTitle('pyqtgraph example: HDF5 big data')
plt.enableAutoRange(False, False)
plt.setXRange(0, 500)
class HDF5Plot(pg.ImageItem):
def __init__(self, *args, **kwds):
self.hdf5 = None
self.limit = 10000 # maximum number of samples to be plotted
pg.ImageItem.__init__(self, *args, **kwds)
def setHDF5(self, data):
self.hdf5 = data
self.updateHDF5Plot()
def viewRangeChanged(self):
self.updateHDF5Plot()
def updateHDF5Plot(self):
if self.hdf5 is None:
self.setData([])
return
vb = self.getViewBox()
if vb is None:
return # no ViewBox yet
# Determine what data range must be read from HDF5
xrange = vb.viewRange()[0]
start = max(0, int(xrange[0]) - 1)
stop = min(len(self.hdf5), int(xrange[1] + 2))
# Decide by how much we should downsample
ds = int((stop - start) / self.limit) + 1
if ds == 1:
# Small enough to display with no intervention.
visible = self.hdf5[start:stop]
scale = 1
else:
# Here convert data into a down-sampled array suitable for visualizing.
# Must do this piecewise to limit memory usage.
samples = 1 + ((stop - start) // ds)
visible = np.zeros(samples * 2, dtype=self.hdf5.dtype)
sourcePtr = start
targetPtr = 0
# read data in chunks of ~1M samples
chunkSize = (1000000 // ds) * ds
while sourcePtr < stop - 1:
chunk = self.hdf5[sourcePtr:min(stop, sourcePtr + chunkSize)]
sourcePtr += len(chunk)
# reshape chunk to be integral multiple of ds
chunk = chunk[:(len(chunk) // ds) * ds].reshape(len(chunk) // ds, ds)
# compute max and min
chunkMax = chunk.max(axis=1)
chunkMin = chunk.min(axis=1)
# interleave min and max into plot data to preserve envelope shape
visible[targetPtr:targetPtr + chunk.shape[0] * 2:2] = chunkMin
visible[1 + targetPtr:1 + targetPtr + chunk.shape[0] * 2:2] = chunkMax
targetPtr += chunk.shape[0] * 2
visible = visible[:targetPtr]
scale = ds * 0.5
self.setData(visible) # update the plot
self.setPos(start, 0) # shift to match starting index
self.resetTransform()
self.scale(scale, 1) # scale to match downsampling
f = h5py.File('test.hdf5', 'r')
curve = HDF5Plot()
curve.setHDF5(f['data'])
plt.addItem(curve)
## Start Qt event loop unless running in interactive mode or using pyside.
if __name__ == '__main__':
import sys
if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
QtGui.QApplication.instance().exec_()
这里是错误:
Traceback (most recent call last):
File "pyqtg.py", line 206, in <module>
curve.setHDF5(f['data'])
File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
File "/home/anaconda3/envs/img/lib/python3.8/site-packages/h5py-3.3.0-py3.8-linux-x86_64.egg/h5py/_hl/group.py", line 305, in __getitem__
oid = h5o.open(self.id, self._e(name), lapl=self._lapl)
File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
File "h5py/h5o.pyx", line 190, in h5py.h5o.open
KeyError: "Unable to open object (object 'data' doesn't exist)"
问题是我不知道 what/how hdf5 文件的外观,所以我不确定如何用正确的术语替换 'data' 或者它本身是否完全不同。非常感谢任何帮助。
编辑 1:
我从 运行 python -m pyqtgraph.examples
得到了例子。一旦 GUI 在列表中弹出,您将看到“HDF5 大数据”。我的代码源于那个例子。从示例中,顶部的第三个 ImageView 是我想用来显示 HDF5 文件的代码。
编辑 2:
这是代码 kcw78 第二部分 运行 的结果:
http://pastie.org/p/3scRyUm1ZFVJNMwTHQHCBv
编辑 3:
所以我 运行 上面的代码但是在 kcw78 的帮助下做了一个小改动。我改变了:
f = h5py.File('test.hdf5', 'r')
curve = HDF5Plot()
curve.setHDF5(f['data'])
plt.addItem(curve)
至:
with h5py.File('test.hdf5', 'r') as h5f:
curve = HDF5Plot()
curve.setHDF5(h5f['aggea'])
plt.addItem(curve)
并得到错误:
Traceback (most recent call last):
File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsObject.py", line 23, in itemChange
self.parentChanged()
File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsItem.py", line 458, in parentChanged
self._updateView()
File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsItem.py", line 514, in _updateView
self.viewRangeChanged()
File "pyqtg.py", line 25, in viewRangeChanged
self.updateHDF5Plot()
File "pyqtg.py", line 77, in updateHDF5Plot
self.setData(visible) # update the plot
TypeError: setData(self, int, Any): argument 1 has unexpected type 'numpy.ndarray'
Traceback (most recent call last):
File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsObject.py", line 23, in itemChange
self.parentChanged()
File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsItem.py", line 458, in parentChanged
self._updateView()
File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsItem.py", line 514, in _updateView
self.viewRangeChanged()
File "pyqtg.py", line 25, in viewRangeChanged
self.updateHDF5Plot()
File "pyqtg.py", line 77, in updateHDF5Plot
self.setData(visible) # update the plot
TypeError: setData(self, int, Any): argument 1 has unexpected type 'numpy.ndarray'
Traceback (most recent call last):
File "pyqtg.py", line 25, in viewRangeChanged
self.updateHDF5Plot()
File "pyqtg.py", line 77, in updateHDF5Plot
self.setData(visible) # update the plot
TypeError: setData(self, int, Any): argument 1 has unexpected type 'numpy.ndarray'
编辑 4:
这是结果的照片:https://imgur.com/a/tVHNdx9。我从创建 2d hdf5 文件和使用我的 2d 数据文件得到相同的空结果。
with h5py.File('mytest.hdf5', 'r') as h5fr, \
h5py.File('test_1d.hdf5', 'w') as h5fw:
arr = h5fr['aggea'][:].reshape(-1,)
h5fw.create_dataset('data', data=arr)
print(h5fw['data'].shape, h5fw['data'].dtype)
编辑 5:运行和绘图的代码
import sys, os
import numpy as np
import h5py
import pyqtgraph as pg
from pyqtgraph.Qt import QtCore, QtGui
pg.mkQApp()
plt = pg.plot()
plt.setWindowTitle('pyqtgraph example: HDF5 big data')
plt.enableAutoRange(False, False)
plt.setXRange(0, 500)
class HDF5Plot(pg.PlotCurveItem):
def __init__(self, *args, **kwds):
self.hdf5 = None
self.limit = 10000 # maximum number of samples to be plotted
pg.PlotCurveItem.__init__(self, *args, **kwds)
def setHDF5(self, data):
self.hdf5 = data
self.updateHDF5Plot()
def viewRangeChanged(self):
self.updateHDF5Plot()
def updateHDF5Plot(self):
if self.hdf5 is None:
self.setData([])
return
vb = self.getViewBox()
if vb is None:
return # no ViewBox yet
# Determine what data range must be read from HDF5
xrange = vb.viewRange()[0]
start = max(0, int(xrange[0]) - 1)
stop = min(len(self.hdf5), int(xrange[1] + 2))
# Decide by how much we should downsample
ds = int((stop - start) / self.limit) + 1
if ds == 1:
# Small enough to display with no intervention.
visible = self.hdf5[start:stop]
scale = 1
else:
# Here convert data into a down-sampled array suitable for visualizing.
# Must do this piecewise to limit memory usage.
samples = 1 + ((stop - start) // ds)
visible = np.zeros(samples * 2, dtype=self.hdf5.dtype)
sourcePtr = start
targetPtr = 0
# read data in chunks of ~1M samples
chunkSize = (1000000 // ds) * ds
while sourcePtr < stop - 1:
chunk = self.hdf5[sourcePtr:min(stop, sourcePtr + chunkSize)]
sourcePtr += len(chunk)
# reshape chunk to be integral multiple of ds
chunk = chunk[:(len(chunk) // ds) * ds].reshape(len(chunk) // ds, ds)
# compute max and min
chunkMax = chunk.max(axis=1)
chunkMin = chunk.min(axis=1)
# interleave min and max into plot data to preserve envelope shape
visible[targetPtr:targetPtr + chunk.shape[0] * 2:2] = chunkMin
visible[1 + targetPtr:1 + targetPtr + chunk.shape[0] * 2:2] = chunkMax
targetPtr += chunk.shape[0] * 2
visible = visible[:targetPtr]
scale = ds * 0.5
self.setData(visible) # update the plot
self.setPos(start, 0) # shift to match starting index
self.resetTransform()
self.scale(scale, 1) # scale to match downsampling
with h5py.File('mytest.hdf5', 'r') as h5fr, \
h5py.File('test_1d.hdf5', 'w') as h5fw:
arr = h5fr['aggea'][:].reshape(-1,)
h5fw.create_dataset('data', data=arr)
curve = HDF5Plot()
curve.setHDF5(h5fw['data'])
plt.addItem(curve)
## Start Qt event loop unless running in interactive mode or using pyside.
if __name__ == '__main__':
import sys
if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
QtGui.QApplication.instance().exec_()
编辑 6:
最终的效果:
from pyqtgraph.Qt import QtGui, QtCore
import numpy as np
import h5py
import pyqtgraph as pg
import matplotlib.pyplot as plt
app = QtGui.QApplication([])
win = QtGui.QMainWindow()
win.resize(800,800)
imv = pg.ImageView()
win.setCentralWidget(imv)
win.show()
win.setWindowTitle('pyqtgraph example: ImageView')
with h5py.File('test.hdf5', 'r') as h5fr:
data = h5fr.get('aggea')[()] #this gets the values. You can also use hf.get('dataset_name').value as this gives insight what `[()]` is doing, though it's deprecated
imv.setImage(data)
# hf = h5py.File('test.hdf5', 'r')
# n1 = np.array(hf['/pathtodata'][:])
# print(n1.shape)
## Set a custom color map
colors = [
(0, 0, 0),
(45, 5, 61),
(84, 42, 55),
(150, 87, 60),
(208, 171, 141),
(255, 255, 255)
]
cmap = pg.ColorMap(pos=np.linspace(0.0, 1.0, 6), color=colors)
imv.setColorMap(cmap)
## Start Qt event loop unless running in interactive mode.
if __name__ == '__main__':
import sys
if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
QtGui.QApplication.instance().exec_()
该错误表明您的 HDF5 文件中不存在数据集 'data'
。所以,我们必须弄清楚为什么它不存在。 :-) 你没有说你在哪里找到你的例子 运行。我在 pyqtgraph/examples
存储库中找到的那个具有在函数 def createFile(finalSize=2000000000):
.
中创建文件的代码
我假设您 运行 此代码创建 test.hdf5
?
如果您没有使用示例代码创建文件,那么您从哪里得到 test.hdf5
?
无论哪种方式,这里都有一些代码可以查询您的 HDF5 文件。它将为我们提供数据集名称和属性(形状和数据类型)。有了这些信息,我们就可以确定接下来的步骤。
import h5py
# define h5f as a h5py file object:
h5f = h5py.File('test.hdf5', 'r')
# print the dataset names and attributes
for ds in h5f.keys():
if isinstance(h5f[ds], h5py.Group):
print(f'{ds} is a Group')
elif isinstance(h5f[ds], h5py.Dataset):
print(f'{ds} is Dataset')
print(f'shape={h5f[ds].shape}, dtype={h5f[ds].dtype}')
else:
print(f'Object: {ds} is not a Group or a Dataset')
h5f.close()
示例中 test.hdf5 的预期输出:
# data is Dataset
# shape=(501000000,), dtype=float32
下面是一个更通用的方法,使用.visititems()
方法递归检查层次结构中的所有对象。我 prefer/recommend 当您将数据集组织在组中时。注意:它使用 Python 的 with/as
上下文管理器,这是首选。请注意如何 f.close()
不是必需的。它会自动处理安装和拆卸操作,因此如果您的代码意外退出,文件不会保持打开状态。
import h5py
def visitor_func(name, node):
if isinstance(node, h5py.Group):
print(node.name, 'is a Group')
elif isinstance(node, h5py.Dataset):
if (node.dtype == 'object') :
print (node.name, 'is an object Dataset')
else:
print(node.name, 'is a Dataset')
else:
print(node.name, 'is an unknown type')
##### main #####
with h5py.File('test.hdf5', 'r') as h5f:
h5f.visititems(visitor_func)
2021-08-20 添加:
与示例 .hdf5 文件相比,我们需要弄清楚为什么您的数据会出现不同的行为。 运行 这个小代码段与您的文件。
import h5py
h5f = h5py.File('test.hdf5', 'r')
ds = '/system.soft.avtcams.glasscellvert/absK40'
print(f'shape={h5f[ds].shape}, dtype={h5f[ds].dtype}')
print(f'type={type(h5f[ds])}')
h5f.close()
作为参考,这是我通过示例 test.hdf5 文件获得的输出:
# shape=(501000000,), dtype=float32
# type=h5py._hl.dataset.Dataset
2021-08-21 添加:
这是一个测试,看看您是否可以将 NumPy 数组用于曲线数据。它读取 HDF5 数据集并将二维数据集整形为一维数组。希望这有效。如果没有,那我就没办法了,你得找 pyqtgraph
有专业知识的人来诊断问题。
with h5py.File('test.hdf5', 'r') as h5f:
curve = HDF5Plot()
arr = h5f['/system.soft.avtcams.glasscellvert/absK40'][:].reshape(-1,)
curve.setHDF5(arr)
plt.addItem(curve)
这是基于上述评论中问答的新答案。
除了我评论中描述的编码错误外,示例中还有一个限制 (hdf5.py
)。它旨在展示如何读取“大数据”——当数据不适合内存时。它通过分块读取数据集和下采样(由 self.limit = 10000
定义)来实现。结果,它只从一维数据集中读取数据。有一种解决方法可以按原样使用代码处理您的数据:创建一个小实用程序来提取感兴趣的数据集,重塑为一维数组并复制到新的 hdf5 文件。 (这假设感兴趣的数据集适合内存。在某些时候,需要对 HDF5 数据与 NumPy 数组进行更长时间的讨论。)
为了演示此行为,我创建了一个新的 hdf5 文件,其中包含模仿 OP 数据的二维数据集 (shape=(1038, 1388), dtype=uint16
)。这是创建文件(名为 test_2d.hdf5
)的代码:
import h5py
import numpy as np
chunk = np.random.random_integers(65535,size=(1038,1388)).astype(np.uint16)
# chunk = np.random.normal(size=1038*1388).astype(np.uint16).reshape(1038, 1388)
with h5py.File('test_2d.hdf5', 'w') as h5f:
h5f.create_dataset('data', data=chunk)
print(h5f['data'].shape, h5f['data'].dtype)
创建此文件后,使用下面的代码将该数据提取到我们将用于绘制数据的新文件中。 (假设这有效,这就是“解决方案”。您可以修改它以从测试文件中提取数据,然后使用该文件从新文件中读取和绘图。)
import h5py
with h5py.File('test_2d.hdf5', 'r') as h5fr, \
h5py.File('test_1d.hdf5', 'w') as h5fw:
arr = h5fr['data'][:].reshape(-1,)
h5fw.create_dataset('data', data=arr)
print(h5fw['data'].shape, h5fw['data'].dtype)
最后,这是对原始 post 稍作修改的代码。它与您的类似,我在 class HDF5Plot()
声明的注释中描述了更改(使用 pg.PlotCurveItem
而不是 pg.ImageItem
)。该更改是触发此错误消息的原因:TypeError: setData(self, int, Any): argument 1 has unexpected type 'numpy.ndarray'
绘图代码:
import sys, os
import numpy as np
import h5py
import pyqtgraph as pg
from pyqtgraph.Qt import QtCore, QtGui
pg.mkQApp()
plt = pg.plot()
plt.setWindowTitle('pyqtgraph example: HDF5 big data')
plt.enableAutoRange(False, False)
plt.setXRange(0, 500)
class HDF5Plot(pg.PlotCurveItem):
def __init__(self, *args, **kwds):
self.hdf5 = None
self.limit = 10000 # maximum number of samples to be plotted
pg.PlotCurveItem.__init__(self, *args, **kwds)
def setHDF5(self, data):
self.hdf5 = data
self.updateHDF5Plot()
def viewRangeChanged(self):
self.updateHDF5Plot()
def updateHDF5Plot(self):
if self.hdf5 is None:
self.setData([])
return
vb = self.getViewBox()
if vb is None:
return # no ViewBox yet
# Determine what data range must be read from HDF5
xrange = vb.viewRange()[0]
start = max(0, int(xrange[0]) - 1)
stop = min(len(self.hdf5), int(xrange[1] + 2))
# Decide by how much we should downsample
ds = int((stop - start) / self.limit) + 1
if ds == 1:
# Small enough to display with no intervention.
visible = self.hdf5[start:stop]
scale = 1
else:
# Here convert data into a down-sampled array suitable for visualizing.
# Must do this piecewise to limit memory usage.
samples = 1 + ((stop - start) // ds)
visible = np.zeros(samples * 2, dtype=self.hdf5.dtype)
sourcePtr = start
targetPtr = 0
# read data in chunks of ~1M samples
chunkSize = (1000000 // ds) * ds
while sourcePtr < stop - 1:
chunk = self.hdf5[sourcePtr:min(stop, sourcePtr + chunkSize)]
sourcePtr += len(chunk)
# reshape chunk to be integral multiple of ds
chunk = chunk[:(len(chunk) // ds) * ds].reshape(len(chunk) // ds, ds)
# compute max and min
chunkMax = chunk.max(axis=1)
chunkMin = chunk.min(axis=1)
# interleave min and max into plot data to preserve envelope shape
visible[targetPtr:targetPtr + chunk.shape[0] * 2:2] = chunkMin
visible[1 + targetPtr:1 + targetPtr + chunk.shape[0] * 2:2] = chunkMax
targetPtr += chunk.shape[0] * 2
visible = visible[:targetPtr]
scale = ds * 0.5
self.setData(visible) # update the plot
self.setPos(start, 0) # shift to match starting index
self.resetTransform()
self.scale(scale, 1) # scale to match downsampling
f = h5py.File('test_1d.hdf5', 'r')
curve = HDF5Plot()
curve.setHDF5(f['data'])
plt.addItem(curve)
## Start Qt event loop unless running in interactive mode or using pyside.
if __name__ == '__main__':
import sys
if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
QtGui.QApplication.instance().exec_()
根据我的随机数据创建的图。
这是一个简单的示例,展示了如何从 HDF5 文件中提取数据(作为二维数组)并使用 pyqtgraph
绘图。它还从 NumPy 数组创建一个图(用于比较目的——这两种方法几乎相同)。
- 为前 2 个图中的
x=
和 y=
值生成“随机”NumPy 数组数据。
x=
和 y=
值是从 HDF5 文件中读取的第二对
地块。
只需稍作改动即可使用您的数据。需要更改:1) HDF5 文件名,和 2) 数据集名称。您将必须弄清楚如何将数据从 shape=(1038,1388)
重塑为 X 和 Y 数据的适当形状的数组。
代码如下:
from pyqtgraph.Qt import QtGui, QtCore
import numpy as np
import h5py
import pyqtgraph as pg
# create some HDF5 data in a 2-d array of X,Y pairs
with h5py.File('plot_2d_data.h5','w') as h5f:
data = h5f.create_dataset('data',shape=(100,2))
data[:,0] = np.arange(0.0,10.0,0.1) ## X data points
data[:,1] = np.random.normal(size=100) ## Y data points
app = QtGui.QApplication([])
win = pg.GraphicsLayoutWidget(show=True, title="2-D plot examples")
win.resize(1000,600)
win.setWindowTitle('pyqtgraph example: 2D Plotting')
# Enable antialiasing for prettier plots
pg.setConfigOptions(antialias=True)
p1 = win.addPlot(title="Plot of NumPy data",
x=np.arange(0.0,10.0,0.1), y=np.random.normal(size=100))
p2 = win.addPlot(title="NumPy data with Points",
x=np.arange(0.0,10.0,0.1), y=np.random.normal(size=100),
pen=(255,0,0), symbolBrush=(255,0,0))
win.nextRow()
with h5py.File('plot_2d_data.h5','r') as h5f:
p3 = win.addPlot(title="Plot of HDF5 data",
x=h5f['data'][:,0], y=h5f['data'][:,1])
p4 = win.addPlot(title="HDF5 data with Points",
x=h5f['data'][:,0], y=h5f['data'][:,1],
pen=(0,0,255), symbolBrush=(0,0,255))
## Start Qt event loop unless running in interactive mode or using pyside.
if __name__ == '__main__':
import sys
if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
QtGui.QApplication.instance().exec_()
我想在 pyqtgraph 的 ImageView() class 中显示 hdf5 文件的数据。显示 ImageView() 绘图的裸代码是:
from pyqtgraph.Qt import QtCore, QtGui
import pyqtgraph as pg
# Interpret image data as row-major instead of col-major
pg.setConfigOptions(leftButtonPan = False, imageAxisOrder='row-major')
app = QtGui.QApplication([])
## Create window with ImageView widget
win = QtGui.QMainWindow()
win.resize(800,800)
imv = pg.ImageView()
win.setCentralWidget(imv)
win.show()
win.setWindowTitle('pyqtgraph example: ImageView')
if __name__ == '__main__':
import sys
if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
QtGui.QApplication.instance().exec_()
然而在 pyqtgraph 示例集中也有一个 hdf5 示例。不幸的是,我无法让它工作。我对示例进行了一些更改以使其满足我的需要,但出现错误。首先是代码:
import numpy as np
import h5py
import pyqtgraph as pg
from pyqtgraph.Qt import QtCore, QtGui
pg.mkQApp()
plt = pg.plot()
plt.setWindowTitle('pyqtgraph example: HDF5 big data')
plt.enableAutoRange(False, False)
plt.setXRange(0, 500)
class HDF5Plot(pg.ImageItem):
def __init__(self, *args, **kwds):
self.hdf5 = None
self.limit = 10000 # maximum number of samples to be plotted
pg.ImageItem.__init__(self, *args, **kwds)
def setHDF5(self, data):
self.hdf5 = data
self.updateHDF5Plot()
def viewRangeChanged(self):
self.updateHDF5Plot()
def updateHDF5Plot(self):
if self.hdf5 is None:
self.setData([])
return
vb = self.getViewBox()
if vb is None:
return # no ViewBox yet
# Determine what data range must be read from HDF5
xrange = vb.viewRange()[0]
start = max(0, int(xrange[0]) - 1)
stop = min(len(self.hdf5), int(xrange[1] + 2))
# Decide by how much we should downsample
ds = int((stop - start) / self.limit) + 1
if ds == 1:
# Small enough to display with no intervention.
visible = self.hdf5[start:stop]
scale = 1
else:
# Here convert data into a down-sampled array suitable for visualizing.
# Must do this piecewise to limit memory usage.
samples = 1 + ((stop - start) // ds)
visible = np.zeros(samples * 2, dtype=self.hdf5.dtype)
sourcePtr = start
targetPtr = 0
# read data in chunks of ~1M samples
chunkSize = (1000000 // ds) * ds
while sourcePtr < stop - 1:
chunk = self.hdf5[sourcePtr:min(stop, sourcePtr + chunkSize)]
sourcePtr += len(chunk)
# reshape chunk to be integral multiple of ds
chunk = chunk[:(len(chunk) // ds) * ds].reshape(len(chunk) // ds, ds)
# compute max and min
chunkMax = chunk.max(axis=1)
chunkMin = chunk.min(axis=1)
# interleave min and max into plot data to preserve envelope shape
visible[targetPtr:targetPtr + chunk.shape[0] * 2:2] = chunkMin
visible[1 + targetPtr:1 + targetPtr + chunk.shape[0] * 2:2] = chunkMax
targetPtr += chunk.shape[0] * 2
visible = visible[:targetPtr]
scale = ds * 0.5
self.setData(visible) # update the plot
self.setPos(start, 0) # shift to match starting index
self.resetTransform()
self.scale(scale, 1) # scale to match downsampling
f = h5py.File('test.hdf5', 'r')
curve = HDF5Plot()
curve.setHDF5(f['data'])
plt.addItem(curve)
## Start Qt event loop unless running in interactive mode or using pyside.
if __name__ == '__main__':
import sys
if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
QtGui.QApplication.instance().exec_()
这里是错误:
Traceback (most recent call last):
File "pyqtg.py", line 206, in <module>
curve.setHDF5(f['data'])
File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
File "/home/anaconda3/envs/img/lib/python3.8/site-packages/h5py-3.3.0-py3.8-linux-x86_64.egg/h5py/_hl/group.py", line 305, in __getitem__
oid = h5o.open(self.id, self._e(name), lapl=self._lapl)
File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
File "h5py/h5o.pyx", line 190, in h5py.h5o.open
KeyError: "Unable to open object (object 'data' doesn't exist)"
问题是我不知道 what/how hdf5 文件的外观,所以我不确定如何用正确的术语替换 'data' 或者它本身是否完全不同。非常感谢任何帮助。
编辑 1:
我从 运行 python -m pyqtgraph.examples
得到了例子。一旦 GUI 在列表中弹出,您将看到“HDF5 大数据”。我的代码源于那个例子。从示例中,顶部的第三个 ImageView 是我想用来显示 HDF5 文件的代码。
编辑 2: 这是代码 kcw78 第二部分 运行 的结果: http://pastie.org/p/3scRyUm1ZFVJNMwTHQHCBv
编辑 3: 所以我 运行 上面的代码但是在 kcw78 的帮助下做了一个小改动。我改变了:
f = h5py.File('test.hdf5', 'r')
curve = HDF5Plot()
curve.setHDF5(f['data'])
plt.addItem(curve)
至:
with h5py.File('test.hdf5', 'r') as h5f:
curve = HDF5Plot()
curve.setHDF5(h5f['aggea'])
plt.addItem(curve)
并得到错误:
Traceback (most recent call last):
File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsObject.py", line 23, in itemChange
self.parentChanged()
File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsItem.py", line 458, in parentChanged
self._updateView()
File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsItem.py", line 514, in _updateView
self.viewRangeChanged()
File "pyqtg.py", line 25, in viewRangeChanged
self.updateHDF5Plot()
File "pyqtg.py", line 77, in updateHDF5Plot
self.setData(visible) # update the plot
TypeError: setData(self, int, Any): argument 1 has unexpected type 'numpy.ndarray'
Traceback (most recent call last):
File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsObject.py", line 23, in itemChange
self.parentChanged()
File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsItem.py", line 458, in parentChanged
self._updateView()
File "/home/anaconda3/envs/img/lib/python3.8/site-packages/pyqtgraph/graphicsItems/GraphicsItem.py", line 514, in _updateView
self.viewRangeChanged()
File "pyqtg.py", line 25, in viewRangeChanged
self.updateHDF5Plot()
File "pyqtg.py", line 77, in updateHDF5Plot
self.setData(visible) # update the plot
TypeError: setData(self, int, Any): argument 1 has unexpected type 'numpy.ndarray'
Traceback (most recent call last):
File "pyqtg.py", line 25, in viewRangeChanged
self.updateHDF5Plot()
File "pyqtg.py", line 77, in updateHDF5Plot
self.setData(visible) # update the plot
TypeError: setData(self, int, Any): argument 1 has unexpected type 'numpy.ndarray'
编辑 4:
这是结果的照片:https://imgur.com/a/tVHNdx9。我从创建 2d hdf5 文件和使用我的 2d 数据文件得到相同的空结果。
with h5py.File('mytest.hdf5', 'r') as h5fr, \
h5py.File('test_1d.hdf5', 'w') as h5fw:
arr = h5fr['aggea'][:].reshape(-1,)
h5fw.create_dataset('data', data=arr)
print(h5fw['data'].shape, h5fw['data'].dtype)
编辑 5:运行和绘图的代码
import sys, os
import numpy as np
import h5py
import pyqtgraph as pg
from pyqtgraph.Qt import QtCore, QtGui
pg.mkQApp()
plt = pg.plot()
plt.setWindowTitle('pyqtgraph example: HDF5 big data')
plt.enableAutoRange(False, False)
plt.setXRange(0, 500)
class HDF5Plot(pg.PlotCurveItem):
def __init__(self, *args, **kwds):
self.hdf5 = None
self.limit = 10000 # maximum number of samples to be plotted
pg.PlotCurveItem.__init__(self, *args, **kwds)
def setHDF5(self, data):
self.hdf5 = data
self.updateHDF5Plot()
def viewRangeChanged(self):
self.updateHDF5Plot()
def updateHDF5Plot(self):
if self.hdf5 is None:
self.setData([])
return
vb = self.getViewBox()
if vb is None:
return # no ViewBox yet
# Determine what data range must be read from HDF5
xrange = vb.viewRange()[0]
start = max(0, int(xrange[0]) - 1)
stop = min(len(self.hdf5), int(xrange[1] + 2))
# Decide by how much we should downsample
ds = int((stop - start) / self.limit) + 1
if ds == 1:
# Small enough to display with no intervention.
visible = self.hdf5[start:stop]
scale = 1
else:
# Here convert data into a down-sampled array suitable for visualizing.
# Must do this piecewise to limit memory usage.
samples = 1 + ((stop - start) // ds)
visible = np.zeros(samples * 2, dtype=self.hdf5.dtype)
sourcePtr = start
targetPtr = 0
# read data in chunks of ~1M samples
chunkSize = (1000000 // ds) * ds
while sourcePtr < stop - 1:
chunk = self.hdf5[sourcePtr:min(stop, sourcePtr + chunkSize)]
sourcePtr += len(chunk)
# reshape chunk to be integral multiple of ds
chunk = chunk[:(len(chunk) // ds) * ds].reshape(len(chunk) // ds, ds)
# compute max and min
chunkMax = chunk.max(axis=1)
chunkMin = chunk.min(axis=1)
# interleave min and max into plot data to preserve envelope shape
visible[targetPtr:targetPtr + chunk.shape[0] * 2:2] = chunkMin
visible[1 + targetPtr:1 + targetPtr + chunk.shape[0] * 2:2] = chunkMax
targetPtr += chunk.shape[0] * 2
visible = visible[:targetPtr]
scale = ds * 0.5
self.setData(visible) # update the plot
self.setPos(start, 0) # shift to match starting index
self.resetTransform()
self.scale(scale, 1) # scale to match downsampling
with h5py.File('mytest.hdf5', 'r') as h5fr, \
h5py.File('test_1d.hdf5', 'w') as h5fw:
arr = h5fr['aggea'][:].reshape(-1,)
h5fw.create_dataset('data', data=arr)
curve = HDF5Plot()
curve.setHDF5(h5fw['data'])
plt.addItem(curve)
## Start Qt event loop unless running in interactive mode or using pyside.
if __name__ == '__main__':
import sys
if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
QtGui.QApplication.instance().exec_()
编辑 6: 最终的效果:
from pyqtgraph.Qt import QtGui, QtCore
import numpy as np
import h5py
import pyqtgraph as pg
import matplotlib.pyplot as plt
app = QtGui.QApplication([])
win = QtGui.QMainWindow()
win.resize(800,800)
imv = pg.ImageView()
win.setCentralWidget(imv)
win.show()
win.setWindowTitle('pyqtgraph example: ImageView')
with h5py.File('test.hdf5', 'r') as h5fr:
data = h5fr.get('aggea')[()] #this gets the values. You can also use hf.get('dataset_name').value as this gives insight what `[()]` is doing, though it's deprecated
imv.setImage(data)
# hf = h5py.File('test.hdf5', 'r')
# n1 = np.array(hf['/pathtodata'][:])
# print(n1.shape)
## Set a custom color map
colors = [
(0, 0, 0),
(45, 5, 61),
(84, 42, 55),
(150, 87, 60),
(208, 171, 141),
(255, 255, 255)
]
cmap = pg.ColorMap(pos=np.linspace(0.0, 1.0, 6), color=colors)
imv.setColorMap(cmap)
## Start Qt event loop unless running in interactive mode.
if __name__ == '__main__':
import sys
if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
QtGui.QApplication.instance().exec_()
该错误表明您的 HDF5 文件中不存在数据集 'data'
。所以,我们必须弄清楚为什么它不存在。 :-) 你没有说你在哪里找到你的例子 运行。我在 pyqtgraph/examples
存储库中找到的那个具有在函数 def createFile(finalSize=2000000000):
.
中创建文件的代码
我假设您 运行 此代码创建 test.hdf5
?
如果您没有使用示例代码创建文件,那么您从哪里得到 test.hdf5
?
无论哪种方式,这里都有一些代码可以查询您的 HDF5 文件。它将为我们提供数据集名称和属性(形状和数据类型)。有了这些信息,我们就可以确定接下来的步骤。
import h5py
# define h5f as a h5py file object:
h5f = h5py.File('test.hdf5', 'r')
# print the dataset names and attributes
for ds in h5f.keys():
if isinstance(h5f[ds], h5py.Group):
print(f'{ds} is a Group')
elif isinstance(h5f[ds], h5py.Dataset):
print(f'{ds} is Dataset')
print(f'shape={h5f[ds].shape}, dtype={h5f[ds].dtype}')
else:
print(f'Object: {ds} is not a Group or a Dataset')
h5f.close()
示例中 test.hdf5 的预期输出:
# data is Dataset
# shape=(501000000,), dtype=float32
下面是一个更通用的方法,使用.visititems()
方法递归检查层次结构中的所有对象。我 prefer/recommend 当您将数据集组织在组中时。注意:它使用 Python 的 with/as
上下文管理器,这是首选。请注意如何 f.close()
不是必需的。它会自动处理安装和拆卸操作,因此如果您的代码意外退出,文件不会保持打开状态。
import h5py
def visitor_func(name, node):
if isinstance(node, h5py.Group):
print(node.name, 'is a Group')
elif isinstance(node, h5py.Dataset):
if (node.dtype == 'object') :
print (node.name, 'is an object Dataset')
else:
print(node.name, 'is a Dataset')
else:
print(node.name, 'is an unknown type')
##### main #####
with h5py.File('test.hdf5', 'r') as h5f:
h5f.visititems(visitor_func)
2021-08-20 添加:
与示例 .hdf5 文件相比,我们需要弄清楚为什么您的数据会出现不同的行为。 运行 这个小代码段与您的文件。
import h5py
h5f = h5py.File('test.hdf5', 'r')
ds = '/system.soft.avtcams.glasscellvert/absK40'
print(f'shape={h5f[ds].shape}, dtype={h5f[ds].dtype}')
print(f'type={type(h5f[ds])}')
h5f.close()
作为参考,这是我通过示例 test.hdf5 文件获得的输出:
# shape=(501000000,), dtype=float32
# type=h5py._hl.dataset.Dataset
2021-08-21 添加:
这是一个测试,看看您是否可以将 NumPy 数组用于曲线数据。它读取 HDF5 数据集并将二维数据集整形为一维数组。希望这有效。如果没有,那我就没办法了,你得找 pyqtgraph
有专业知识的人来诊断问题。
with h5py.File('test.hdf5', 'r') as h5f:
curve = HDF5Plot()
arr = h5f['/system.soft.avtcams.glasscellvert/absK40'][:].reshape(-1,)
curve.setHDF5(arr)
plt.addItem(curve)
这是基于上述评论中问答的新答案。
除了我评论中描述的编码错误外,示例中还有一个限制 (hdf5.py
)。它旨在展示如何读取“大数据”——当数据不适合内存时。它通过分块读取数据集和下采样(由 self.limit = 10000
定义)来实现。结果,它只从一维数据集中读取数据。有一种解决方法可以按原样使用代码处理您的数据:创建一个小实用程序来提取感兴趣的数据集,重塑为一维数组并复制到新的 hdf5 文件。 (这假设感兴趣的数据集适合内存。在某些时候,需要对 HDF5 数据与 NumPy 数组进行更长时间的讨论。)
为了演示此行为,我创建了一个新的 hdf5 文件,其中包含模仿 OP 数据的二维数据集 (shape=(1038, 1388), dtype=uint16
)。这是创建文件(名为 test_2d.hdf5
)的代码:
import h5py
import numpy as np
chunk = np.random.random_integers(65535,size=(1038,1388)).astype(np.uint16)
# chunk = np.random.normal(size=1038*1388).astype(np.uint16).reshape(1038, 1388)
with h5py.File('test_2d.hdf5', 'w') as h5f:
h5f.create_dataset('data', data=chunk)
print(h5f['data'].shape, h5f['data'].dtype)
创建此文件后,使用下面的代码将该数据提取到我们将用于绘制数据的新文件中。 (假设这有效,这就是“解决方案”。您可以修改它以从测试文件中提取数据,然后使用该文件从新文件中读取和绘图。)
import h5py
with h5py.File('test_2d.hdf5', 'r') as h5fr, \
h5py.File('test_1d.hdf5', 'w') as h5fw:
arr = h5fr['data'][:].reshape(-1,)
h5fw.create_dataset('data', data=arr)
print(h5fw['data'].shape, h5fw['data'].dtype)
最后,这是对原始 post 稍作修改的代码。它与您的类似,我在 class HDF5Plot()
声明的注释中描述了更改(使用 pg.PlotCurveItem
而不是 pg.ImageItem
)。该更改是触发此错误消息的原因:TypeError: setData(self, int, Any): argument 1 has unexpected type 'numpy.ndarray'
绘图代码:
import sys, os
import numpy as np
import h5py
import pyqtgraph as pg
from pyqtgraph.Qt import QtCore, QtGui
pg.mkQApp()
plt = pg.plot()
plt.setWindowTitle('pyqtgraph example: HDF5 big data')
plt.enableAutoRange(False, False)
plt.setXRange(0, 500)
class HDF5Plot(pg.PlotCurveItem):
def __init__(self, *args, **kwds):
self.hdf5 = None
self.limit = 10000 # maximum number of samples to be plotted
pg.PlotCurveItem.__init__(self, *args, **kwds)
def setHDF5(self, data):
self.hdf5 = data
self.updateHDF5Plot()
def viewRangeChanged(self):
self.updateHDF5Plot()
def updateHDF5Plot(self):
if self.hdf5 is None:
self.setData([])
return
vb = self.getViewBox()
if vb is None:
return # no ViewBox yet
# Determine what data range must be read from HDF5
xrange = vb.viewRange()[0]
start = max(0, int(xrange[0]) - 1)
stop = min(len(self.hdf5), int(xrange[1] + 2))
# Decide by how much we should downsample
ds = int((stop - start) / self.limit) + 1
if ds == 1:
# Small enough to display with no intervention.
visible = self.hdf5[start:stop]
scale = 1
else:
# Here convert data into a down-sampled array suitable for visualizing.
# Must do this piecewise to limit memory usage.
samples = 1 + ((stop - start) // ds)
visible = np.zeros(samples * 2, dtype=self.hdf5.dtype)
sourcePtr = start
targetPtr = 0
# read data in chunks of ~1M samples
chunkSize = (1000000 // ds) * ds
while sourcePtr < stop - 1:
chunk = self.hdf5[sourcePtr:min(stop, sourcePtr + chunkSize)]
sourcePtr += len(chunk)
# reshape chunk to be integral multiple of ds
chunk = chunk[:(len(chunk) // ds) * ds].reshape(len(chunk) // ds, ds)
# compute max and min
chunkMax = chunk.max(axis=1)
chunkMin = chunk.min(axis=1)
# interleave min and max into plot data to preserve envelope shape
visible[targetPtr:targetPtr + chunk.shape[0] * 2:2] = chunkMin
visible[1 + targetPtr:1 + targetPtr + chunk.shape[0] * 2:2] = chunkMax
targetPtr += chunk.shape[0] * 2
visible = visible[:targetPtr]
scale = ds * 0.5
self.setData(visible) # update the plot
self.setPos(start, 0) # shift to match starting index
self.resetTransform()
self.scale(scale, 1) # scale to match downsampling
f = h5py.File('test_1d.hdf5', 'r')
curve = HDF5Plot()
curve.setHDF5(f['data'])
plt.addItem(curve)
## Start Qt event loop unless running in interactive mode or using pyside.
if __name__ == '__main__':
import sys
if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
QtGui.QApplication.instance().exec_()
根据我的随机数据创建的图。
这是一个简单的示例,展示了如何从 HDF5 文件中提取数据(作为二维数组)并使用 pyqtgraph
绘图。它还从 NumPy 数组创建一个图(用于比较目的——这两种方法几乎相同)。
- 为前 2 个图中的
x=
和y=
值生成“随机”NumPy 数组数据。 x=
和y=
值是从 HDF5 文件中读取的第二对 地块。
只需稍作改动即可使用您的数据。需要更改:1) HDF5 文件名,和 2) 数据集名称。您将必须弄清楚如何将数据从 shape=(1038,1388)
重塑为 X 和 Y 数据的适当形状的数组。
代码如下:
from pyqtgraph.Qt import QtGui, QtCore
import numpy as np
import h5py
import pyqtgraph as pg
# create some HDF5 data in a 2-d array of X,Y pairs
with h5py.File('plot_2d_data.h5','w') as h5f:
data = h5f.create_dataset('data',shape=(100,2))
data[:,0] = np.arange(0.0,10.0,0.1) ## X data points
data[:,1] = np.random.normal(size=100) ## Y data points
app = QtGui.QApplication([])
win = pg.GraphicsLayoutWidget(show=True, title="2-D plot examples")
win.resize(1000,600)
win.setWindowTitle('pyqtgraph example: 2D Plotting')
# Enable antialiasing for prettier plots
pg.setConfigOptions(antialias=True)
p1 = win.addPlot(title="Plot of NumPy data",
x=np.arange(0.0,10.0,0.1), y=np.random.normal(size=100))
p2 = win.addPlot(title="NumPy data with Points",
x=np.arange(0.0,10.0,0.1), y=np.random.normal(size=100),
pen=(255,0,0), symbolBrush=(255,0,0))
win.nextRow()
with h5py.File('plot_2d_data.h5','r') as h5f:
p3 = win.addPlot(title="Plot of HDF5 data",
x=h5f['data'][:,0], y=h5f['data'][:,1])
p4 = win.addPlot(title="HDF5 data with Points",
x=h5f['data'][:,0], y=h5f['data'][:,1],
pen=(0,0,255), symbolBrush=(0,0,255))
## Start Qt event loop unless running in interactive mode or using pyside.
if __name__ == '__main__':
import sys
if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
QtGui.QApplication.instance().exec_()