pyqtgraph / PlotCurveItem 的实时可视化瓶颈
Realtime visualisation bottleneck with pyqtgraph / PlotCurveItem
我目前正在使用 pyqtgraph 可视化 64 个独立数据的实时数据 traces/plots。虽然速度确实不错,但我注意到如果样本缓冲区长度超过 2000 点,速度会严重下降。分析以下代码会发现 functions.py:1440(arrayToQPath) 似乎具有重大影响:
import numpy
import cProfile
import logging
import pyqtgraph as pg
from PyQt5 import QtCore,uic
from PyQt5.QtGui import *
from PyQt5.QtCore import QRect, QTimer
def program(columns=8, samples=10000, channels=64):
app = QApplication([])
win = pg.GraphicsWindow()
pg.setConfigOptions(imageAxisOrder='row-major')
win.resize(1280,768)
win.ci.layout.setSpacing(0)
win.ci.layout.setContentsMargins(0,0,0,0)
data = numpy.zeros((samples, channels+1))
plots = [win.addPlot(row=i/columns+1,col=i%columns) for i in range(channels)]
curves = list()
x = numpy.linspace(0, 1, samples, endpoint=True)
f = 2 # Frequency in Hz
A = 1 # Amplitude in Unit
y = A * numpy.sin(2*numpy.pi*f*x).reshape((samples,1)) # Signal
data[:,0] = x
data[:,1:] = numpy.repeat(y, channels, axis=1)
for chn_no,p in enumerate(plots, 1):
c = pg.PlotCurveItem(pen=(chn_no,channels * 1.3))
p.addItem(c)
curves.append((c, chn_no))
def update():
nonlocal data
data[:,1:] = numpy.roll(data[:,1:], 100, axis=0)
for curve,data_index in curves:
curve.setData(data[:,0],data[:,data_index])
timer = QTimer()
timer.timeout.connect(update)
timer.start(30)
return app.exec_()
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
cProfile.run("program()", sort="cumtime")
#program()
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 533.660 533.660 {built-in method builtins.exec}
1 0.053 0.053 533.660 533.660 <string>:1(<module>)
1 0.018 0.018 533.607 533.607 pyqtgraph_test.py:11(program)
1 9.181 9.181 532.209 532.209 {built-in method exec_}
2709 0.015 0.000 401.728 0.148 GraphicsView.py:153(paintEvent)
2709 15.572 0.006 401.696 0.148 {paintEvent}
173376 0.193 0.000 345.725 0.002 debug.py:89(w)
173376 1.599 0.000 345.532 0.002 PlotCurveItem.py:452(paint)
173312 0.671 0.000 271.973 0.002 PlotCurveItem.py:440(getPath)
173312 0.744 0.000 271.153 0.002 PlotCurveItem.py:416(generatePath)
173312 266.888 0.002 270.409 0.002 functions.py:1440(arrayToQPath)
2709 5.102 0.002 113.195 0.042 pyqtgraph_test.py:36(update)
173440 0.193 0.000 100.616 0.001 PlotCurveItem.py:297(setData)
173440 8.718 0.000 100.424 0.001 PlotCurveItem.py:337(updateData)
所以每次调用花费了将近 1.5 毫秒。玩弄 arrayToQPath 我注意到 arrayToQPath 中的唯一 ds >> path
似乎大部分时间都在消耗(该行的结果被注释掉):
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 190.847 190.847 {built-in method builtins.exec}
1 0.050 0.050 190.847 190.847 <string>:1(<module>)
1 0.017 0.017 190.796 190.796 pyqtgraph_test.py:11(program)
1 7.438 7.438 189.395 189.395 {built-in method exec_}
2221 4.165 0.002 88.497 0.040 pyqtgraph_test.py:36(update)
2221 0.010 0.000 86.830 0.039 GraphicsView.py:153(paintEvent)
2221 11.494 0.005 86.806 0.039 {paintEvent}
142208 0.152 0.000 77.941 0.001 PlotCurveItem.py:297(setData)
142208 4.500 0.000 77.789 0.001 PlotCurveItem.py:337(updateData)
ds 是一个 QtCore.QDataStream,路径是 QPainterPath。但是,我完全不明白 >> 操作花费这么多时间的原因。所以我正在寻找一种可能加快渲染速度的方法,并且想坚持使用 pyqtgraph,即不执行切换到例如现在很明显。
原functions.pyarrayToQPath:
def arrayToQPath(x, y, connect='all'):
"""Convert an array of x,y coordinats to QPainterPath as efficiently as possible.
The *connect* argument may be 'all', indicating that each point should be
connected to the next; 'pairs', indicating that each pair of points
should be connected, or an array of int32 values (0 or 1) indicating
connections.
"""
## Create all vertices in path. The method used below creates a binary format so that all
## vertices can be read in at once. This binary format may change in future versions of Qt,
## so the original (slower) method is left here for emergencies:
#path.moveTo(x[0], y[0])
#if connect == 'all':
#for i in range(1, y.shape[0]):
#path.lineTo(x[i], y[i])
#elif connect == 'pairs':
#for i in range(1, y.shape[0]):
#if i%2 == 0:
#path.lineTo(x[i], y[i])
#else:
#path.moveTo(x[i], y[i])
#elif isinstance(connect, np.ndarray):
#for i in range(1, y.shape[0]):
#if connect[i] == 1:
#path.lineTo(x[i], y[i])
#else:
#path.moveTo(x[i], y[i])
#else:
#raise Exception('connect argument must be "all", "pairs", or array')
## Speed this up using >> operator
## Format is:
## numVerts(i4) 0(i4)
## x(f8) y(f8) 0(i4) <-- 0 means this vertex does not connect
## x(f8) y(f8) 1(i4) <-- 1 means this vertex connects to the previous vertex
## ...
## 0(i4)
##
## All values are big endian--pack using struct.pack('>d') or struct.pack('>i')
path = QtGui.QPainterPath()
#profiler = debug.Profiler()
n = x.shape[0]
# create empty array, pad with extra space on either end
arr = np.empty(n+2, dtype=[('x', '>f8'), ('y', '>f8'), ('c', '>i4')])
# write first two integers
#profiler('allocate empty')
byteview = arr.view(dtype=np.ubyte)
byteview[:12] = 0
byteview.data[12:20] = struct.pack('>ii', n, 0)
#profiler('pack header')
# Fill array with vertex values
arr[1:-1]['x'] = x
arr[1:-1]['y'] = y
# decide which points are connected by lines
if eq(connect, 'all'):
arr[1:-1]['c'] = 1
elif eq(connect, 'pairs'):
arr[1:-1]['c'][::2] = 1
arr[1:-1]['c'][1::2] = 0
elif eq(connect, 'finite'):
arr[1:-1]['c'] = np.isfinite(x) & np.isfinite(y)
elif isinstance(connect, np.ndarray):
arr[1:-1]['c'] = connect
else:
raise Exception('connect argument must be "all", "pairs", "finite", or array')
#profiler('fill array')
# write last 0
lastInd = 20*(n+1)
byteview.data[lastInd:lastInd+4] = struct.pack('>i', 0)
#profiler('footer')
# create datastream object and stream into path
## Avoiding this method because QByteArray(str) leaks memory in PySide
#buf = QtCore.QByteArray(arr.data[12:lastInd+4]) # I think one unnecessary copy happens here
path.strn = byteview.data[12:lastInd+4] # make sure data doesn't run away
try:
buf = QtCore.QByteArray.fromRawData(path.strn)
except TypeError:
buf = QtCore.QByteArray(bytes(path.strn))
#profiler('create buffer')
ds = QtCore.QDataStream(buf)
ds >> path
#profiler('load')
return path
编辑:
仔细研究 QT,发现 C++ 中的 QDataStream >> 运算符相当慢。它是如此之慢,以至于覆盖旧 QtGui.QPainterPath() 中元素的位置而不是创建新元素更快:
import timeit
import struct
import numpy as np
from PyQt5 import QtGui,QtCore
no_trys = 1000
def test(pass_data, samples = 10000):
path = QtGui.QPainterPath()
n = samples
# create empty array, pad with extra space on either end
arr = np.zeros(n+2, dtype=[('x', '>f8'), ('y', '>f8'), ('c', '>i4')])
# write first two integers
byteview = arr.view(dtype=np.ubyte)
byteview.data[12:20] = struct.pack('>ii', n, 0)
# write last 0
lastInd = 20*(n+1)
# create datastream object and stream into path
path.strn = byteview.data[12:lastInd+4] # make sure data doesn't run away
buf = QtCore.QByteArray.fromRawData(path.strn)
ds = QtCore.QDataStream(buf)
path.reserve(n)
if pass_data:
ds >> path
def func1():
nonlocal path
ds = QtCore.QDataStream(buf)
ds >> path
def func2():
nonlocal path
values = [(i,i,i) for i in range(samples)]
map(path.setElementPositionAt, values)
print(timeit.timeit(func1, number=no_trys))
print(timeit.timeit(func2, number=no_trys))
test(True)
数据流的结果为 1.32 秒,地图的结果为 0.9 秒(path.setElementPositionAt,值)。
在我的机器上分析以下 C++ 片段会导致超过 8 秒:
#include <QtCore/QDataStream>
#include <QtGui/QPainterPath>
int function2(const int samples)
{
auto size = 8 + samples * 20 + 4;
std::vector<char> data(size, 0);
memcpy(data.data(), &samples, 4);
QByteArray buf(QByteArray::fromRawData(data.data(), size));
QDataStream ds(buf);
float ret;
for (int counter = 0; counter < samples; counter++)
{
int type = 1;
double x = 0, y = 0;
ds >> type >> x >> y;
ret = type + x + y;
}
return ret;
}
int main()
{
const int samples = 10000;
const int tries = 10000;
int ret = 0;
auto start = std::chrono::high_resolution_clock::now();
for (auto counter = 0; counter < tries; counter++)
{
ret += function2(samples);
}
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = end - start;
std::cout << "done\n";
std::cout << "Elapsed time: " << elapsed.count() << " s\n";
std::cout << ret;
return 0;
}
最简单的解决方案是激活 OpenGL 模式,即安装 PyOpenGL 和 PyOpenGL-accelerate 模块并启用 OpenGL 使用.这样就完全忽略了 createPath 部分。我只是在我的应用程序中添加了以下块:
try:
import OpenGL
pg.setConfigOption('useOpenGL', True)
pg.setConfigOption('enableExperimental', True)
except Exception as e:
print(f"Enabling OpenGL failed with {e}. Will result in slow rendering. Try installing PyOpenGL.")
有了它,我的 PC 可以毫不费力地用 30000 个数据点绘制 64 条轨迹。
我目前正在使用 pyqtgraph 可视化 64 个独立数据的实时数据 traces/plots。虽然速度确实不错,但我注意到如果样本缓冲区长度超过 2000 点,速度会严重下降。分析以下代码会发现 functions.py:1440(arrayToQPath) 似乎具有重大影响:
import numpy
import cProfile
import logging
import pyqtgraph as pg
from PyQt5 import QtCore,uic
from PyQt5.QtGui import *
from PyQt5.QtCore import QRect, QTimer
def program(columns=8, samples=10000, channels=64):
app = QApplication([])
win = pg.GraphicsWindow()
pg.setConfigOptions(imageAxisOrder='row-major')
win.resize(1280,768)
win.ci.layout.setSpacing(0)
win.ci.layout.setContentsMargins(0,0,0,0)
data = numpy.zeros((samples, channels+1))
plots = [win.addPlot(row=i/columns+1,col=i%columns) for i in range(channels)]
curves = list()
x = numpy.linspace(0, 1, samples, endpoint=True)
f = 2 # Frequency in Hz
A = 1 # Amplitude in Unit
y = A * numpy.sin(2*numpy.pi*f*x).reshape((samples,1)) # Signal
data[:,0] = x
data[:,1:] = numpy.repeat(y, channels, axis=1)
for chn_no,p in enumerate(plots, 1):
c = pg.PlotCurveItem(pen=(chn_no,channels * 1.3))
p.addItem(c)
curves.append((c, chn_no))
def update():
nonlocal data
data[:,1:] = numpy.roll(data[:,1:], 100, axis=0)
for curve,data_index in curves:
curve.setData(data[:,0],data[:,data_index])
timer = QTimer()
timer.timeout.connect(update)
timer.start(30)
return app.exec_()
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
cProfile.run("program()", sort="cumtime")
#program()
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 533.660 533.660 {built-in method builtins.exec}
1 0.053 0.053 533.660 533.660 <string>:1(<module>)
1 0.018 0.018 533.607 533.607 pyqtgraph_test.py:11(program)
1 9.181 9.181 532.209 532.209 {built-in method exec_}
2709 0.015 0.000 401.728 0.148 GraphicsView.py:153(paintEvent)
2709 15.572 0.006 401.696 0.148 {paintEvent}
173376 0.193 0.000 345.725 0.002 debug.py:89(w)
173376 1.599 0.000 345.532 0.002 PlotCurveItem.py:452(paint)
173312 0.671 0.000 271.973 0.002 PlotCurveItem.py:440(getPath)
173312 0.744 0.000 271.153 0.002 PlotCurveItem.py:416(generatePath)
173312 266.888 0.002 270.409 0.002 functions.py:1440(arrayToQPath)
2709 5.102 0.002 113.195 0.042 pyqtgraph_test.py:36(update)
173440 0.193 0.000 100.616 0.001 PlotCurveItem.py:297(setData)
173440 8.718 0.000 100.424 0.001 PlotCurveItem.py:337(updateData)
所以每次调用花费了将近 1.5 毫秒。玩弄 arrayToQPath 我注意到 arrayToQPath 中的唯一 ds >> path
似乎大部分时间都在消耗(该行的结果被注释掉):
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 190.847 190.847 {built-in method builtins.exec}
1 0.050 0.050 190.847 190.847 <string>:1(<module>)
1 0.017 0.017 190.796 190.796 pyqtgraph_test.py:11(program)
1 7.438 7.438 189.395 189.395 {built-in method exec_}
2221 4.165 0.002 88.497 0.040 pyqtgraph_test.py:36(update)
2221 0.010 0.000 86.830 0.039 GraphicsView.py:153(paintEvent)
2221 11.494 0.005 86.806 0.039 {paintEvent}
142208 0.152 0.000 77.941 0.001 PlotCurveItem.py:297(setData)
142208 4.500 0.000 77.789 0.001 PlotCurveItem.py:337(updateData)
ds 是一个 QtCore.QDataStream,路径是 QPainterPath。但是,我完全不明白 >> 操作花费这么多时间的原因。所以我正在寻找一种可能加快渲染速度的方法,并且想坚持使用 pyqtgraph,即不执行切换到例如现在很明显。
原functions.pyarrayToQPath:
def arrayToQPath(x, y, connect='all'):
"""Convert an array of x,y coordinats to QPainterPath as efficiently as possible.
The *connect* argument may be 'all', indicating that each point should be
connected to the next; 'pairs', indicating that each pair of points
should be connected, or an array of int32 values (0 or 1) indicating
connections.
"""
## Create all vertices in path. The method used below creates a binary format so that all
## vertices can be read in at once. This binary format may change in future versions of Qt,
## so the original (slower) method is left here for emergencies:
#path.moveTo(x[0], y[0])
#if connect == 'all':
#for i in range(1, y.shape[0]):
#path.lineTo(x[i], y[i])
#elif connect == 'pairs':
#for i in range(1, y.shape[0]):
#if i%2 == 0:
#path.lineTo(x[i], y[i])
#else:
#path.moveTo(x[i], y[i])
#elif isinstance(connect, np.ndarray):
#for i in range(1, y.shape[0]):
#if connect[i] == 1:
#path.lineTo(x[i], y[i])
#else:
#path.moveTo(x[i], y[i])
#else:
#raise Exception('connect argument must be "all", "pairs", or array')
## Speed this up using >> operator
## Format is:
## numVerts(i4) 0(i4)
## x(f8) y(f8) 0(i4) <-- 0 means this vertex does not connect
## x(f8) y(f8) 1(i4) <-- 1 means this vertex connects to the previous vertex
## ...
## 0(i4)
##
## All values are big endian--pack using struct.pack('>d') or struct.pack('>i')
path = QtGui.QPainterPath()
#profiler = debug.Profiler()
n = x.shape[0]
# create empty array, pad with extra space on either end
arr = np.empty(n+2, dtype=[('x', '>f8'), ('y', '>f8'), ('c', '>i4')])
# write first two integers
#profiler('allocate empty')
byteview = arr.view(dtype=np.ubyte)
byteview[:12] = 0
byteview.data[12:20] = struct.pack('>ii', n, 0)
#profiler('pack header')
# Fill array with vertex values
arr[1:-1]['x'] = x
arr[1:-1]['y'] = y
# decide which points are connected by lines
if eq(connect, 'all'):
arr[1:-1]['c'] = 1
elif eq(connect, 'pairs'):
arr[1:-1]['c'][::2] = 1
arr[1:-1]['c'][1::2] = 0
elif eq(connect, 'finite'):
arr[1:-1]['c'] = np.isfinite(x) & np.isfinite(y)
elif isinstance(connect, np.ndarray):
arr[1:-1]['c'] = connect
else:
raise Exception('connect argument must be "all", "pairs", "finite", or array')
#profiler('fill array')
# write last 0
lastInd = 20*(n+1)
byteview.data[lastInd:lastInd+4] = struct.pack('>i', 0)
#profiler('footer')
# create datastream object and stream into path
## Avoiding this method because QByteArray(str) leaks memory in PySide
#buf = QtCore.QByteArray(arr.data[12:lastInd+4]) # I think one unnecessary copy happens here
path.strn = byteview.data[12:lastInd+4] # make sure data doesn't run away
try:
buf = QtCore.QByteArray.fromRawData(path.strn)
except TypeError:
buf = QtCore.QByteArray(bytes(path.strn))
#profiler('create buffer')
ds = QtCore.QDataStream(buf)
ds >> path
#profiler('load')
return path
编辑:
仔细研究 QT,发现 C++ 中的 QDataStream >> 运算符相当慢。它是如此之慢,以至于覆盖旧 QtGui.QPainterPath() 中元素的位置而不是创建新元素更快:
import timeit
import struct
import numpy as np
from PyQt5 import QtGui,QtCore
no_trys = 1000
def test(pass_data, samples = 10000):
path = QtGui.QPainterPath()
n = samples
# create empty array, pad with extra space on either end
arr = np.zeros(n+2, dtype=[('x', '>f8'), ('y', '>f8'), ('c', '>i4')])
# write first two integers
byteview = arr.view(dtype=np.ubyte)
byteview.data[12:20] = struct.pack('>ii', n, 0)
# write last 0
lastInd = 20*(n+1)
# create datastream object and stream into path
path.strn = byteview.data[12:lastInd+4] # make sure data doesn't run away
buf = QtCore.QByteArray.fromRawData(path.strn)
ds = QtCore.QDataStream(buf)
path.reserve(n)
if pass_data:
ds >> path
def func1():
nonlocal path
ds = QtCore.QDataStream(buf)
ds >> path
def func2():
nonlocal path
values = [(i,i,i) for i in range(samples)]
map(path.setElementPositionAt, values)
print(timeit.timeit(func1, number=no_trys))
print(timeit.timeit(func2, number=no_trys))
test(True)
数据流的结果为 1.32 秒,地图的结果为 0.9 秒(path.setElementPositionAt,值)。
在我的机器上分析以下 C++ 片段会导致超过 8 秒:
#include <QtCore/QDataStream>
#include <QtGui/QPainterPath>
int function2(const int samples)
{
auto size = 8 + samples * 20 + 4;
std::vector<char> data(size, 0);
memcpy(data.data(), &samples, 4);
QByteArray buf(QByteArray::fromRawData(data.data(), size));
QDataStream ds(buf);
float ret;
for (int counter = 0; counter < samples; counter++)
{
int type = 1;
double x = 0, y = 0;
ds >> type >> x >> y;
ret = type + x + y;
}
return ret;
}
int main()
{
const int samples = 10000;
const int tries = 10000;
int ret = 0;
auto start = std::chrono::high_resolution_clock::now();
for (auto counter = 0; counter < tries; counter++)
{
ret += function2(samples);
}
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = end - start;
std::cout << "done\n";
std::cout << "Elapsed time: " << elapsed.count() << " s\n";
std::cout << ret;
return 0;
}
最简单的解决方案是激活 OpenGL 模式,即安装 PyOpenGL 和 PyOpenGL-accelerate 模块并启用 OpenGL 使用.这样就完全忽略了 createPath 部分。我只是在我的应用程序中添加了以下块:
try:
import OpenGL
pg.setConfigOption('useOpenGL', True)
pg.setConfigOption('enableExperimental', True)
except Exception as e:
print(f"Enabling OpenGL failed with {e}. Will result in slow rendering. Try installing PyOpenGL.")
有了它,我的 PC 可以毫不费力地用 30000 个数据点绘制 64 条轨迹。