为 Pandas 模型优化 PyQt5 QAbstractView
Optimize PyQt5 QAbstractView for Pandas Model
我目前正在通过Python编写一个SQL查询接口。
在将数据写入QTableView时,我使用QAbstractTableModel来写入查询结果。
这适用于较小的查询,但在尝试显示许多行和列时会变得非常慢。有没有办法提高数据帧加载到 QTableView 的速度?
这是我的 QAbtractTableModel 的代码 class:
class SQLConnection_PandasModel(QtCore.QAbstractTableModel):
def __init__(self, reason, df, parent=None):
QtCore.QAbstractTableModel.__init__(self, parent)
self._df = df.copy()
self.original_df = df
self.reason = reason
# PyQt5 Slots and Signals
if self.reason == 'Read':
self.conSig = sqlWindow()
self.conSig.dataChanged.connect(self.conSig.sql_table_updated)
# set the shortcut ctrl+F for find in menu
self.find_list_row = []
# setup menu options
def toDataFrame(self):
return self._df.copy()
def headerData(self, section, orientation, role=QtCore.Qt.DisplayRole):
if role != QtCore.Qt.DisplayRole:
return QtCore.QVariant()
if orientation == QtCore.Qt.Horizontal:
try:
return self._df.columns.tolist()[section]
except (IndexError,):
return QtCore.QVariant()
elif orientation == QtCore.Qt.Vertical:
try:
# return self.df.index.tolist()
return self._df.index.tolist()[section]
except (IndexError,):
return QtCore.QVariant()
def data(self, index, role=QtCore.Qt.DisplayRole):
if not index.isValid():
return QtCore.QVariant()
if role == QtCore.Qt.DisplayRole or role == QtCore.Qt.EditRole:
#print(type(self._df.iloc[index.row(), index.column()]))
if isinstance(self._df.iloc[index.row(), index.column()], bytes):
return QtCore.QVariant('(BLOB)')
else:
return QtCore.QVariant(str(self._df.iloc[index.row(), index.column()]))
def setData(self, index, value, role):
row = self._df.index[index.row()]
col = self._df.columns[index.column()]
if hasattr(value, 'toPyObject'):
# PyQt4 gets a QVariant
value = value.toPyObject()
else:
# PySide gets an unicode
dtype = self._df[col].dtype
if dtype != object:
if ((np.issubdtype(dtype, np.integer)) or isinstance(dtype, int)) and value.isnumeric():
value = None if value == '' else dtype.type(value)
self._df.loc[row, col] = value
# This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
if role == QtCore.Qt.EditRole and self.reason == 'Read':
#print('Emitting 1',row , col)
self.conSig.dataChanged.emit(row, col, value)
return True
elif ((np.issubdtype(dtype, np.integer)) or isinstance(dtype, int)) and not(value.isnumeric()):
return False
else:
value = None if value == '' else dtype.type(value)
self._df.loc[row, col] = value
# This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
if role == QtCore.Qt.EditRole and self.reason == 'Read':
#print('Emitting 2', row, col)
self.conSig.dataChanged.emit(row, col, value)
return True
else:
self._df.loc[row, col] = value
# This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
if role == QtCore.Qt.EditRole and self.reason == 'Read':
#print('Emitting 3', row, col)
self.original_df.at[row, col] = value
#print(self.original_df)
self.conSig.dataChanged.emit(row, col, self.original_df)
return True
def rowCount(self, parent=QtCore.QModelIndex()):
return len(self._df.index)
def columnCount(self, parent=QtCore.QModelIndex()):
return len(self._df.columns)
def sort(self, column, order):
colname = self._df.columns.tolist()[column]
self.layoutAboutToBeChanged.emit()
self._df.sort_values(colname, ascending=order == QtCore.Qt.AscendingOrder, inplace=True)
self._df.reset_index(inplace=True, drop=True)
self.layoutChanged.emit()
def flags(self, index):
return Qt.ItemIsEnabled | Qt.ItemIsSelectable | Qt.ItemIsEditable
如果我能在提高数据帧加载速度方面得到任何帮助,我将不胜感激。
谢谢。
好吧,我回答了我自己的问题。对于那些将来 运行 遇到问题的人,您正在尝试使用具有大型数据集的 QAbstractTableModel 来填充 QTableViews ...在这里你去:
class SQLConnection_PandasModel(QtCore.QAbstractTableModel):
def __init__(self, reason, df, parent=None):
QtCore.QAbstractTableModel.__init__(self, parent)
self._df = np.array(df.values)
self.original_df = df.copy()
self._cols = df.columns
self.r, self.c = np.shape(self._df)
self.reason = reason
# PyQt5 Slots and Signals
if self.reason == 'Read':
self.conSig = sqlWindow()
self.conSig.dataChanged.connect(self.conSig.sql_table_updated)
# set the shortcut ctrl+F for find in menu
self.find_list_row = []
def rowCount(self, parent=None):
return self.r
def columnCount(self, parent=None):
return self.c
def data(self, index, role=QtCore.Qt.DisplayRole):
if index.isValid():
if role == QtCore.Qt.DisplayRole:
if isinstance(self._df[index.row(), index.column()], bytes):
row = self._df[index.row()][0] - 1
col = self._df[index.column()][0] -1
self._df[row, col] = '(BLOB)'
return str('(BLOB)')
return str(self._df[index.row(),index.column()])
return None
def setData(self, index, value, role):
row = self._df[index.row()]
col = self._df[index.column()]
if hasattr(value, 'toPyObject'):
# PyQt4 gets a QVariant
value = value.toPyObject()
else:
# PySide gets an unicode
dtype = self._df.dtype
if dtype != object:
value = None if value == '' else dtype.type(value)
table_row = row[0]-1
table_col = col[0]-1
self._df[table_row, table_col] = value
# This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
if role == QtCore.Qt.EditRole and self.reason == 'Read':
column_name = self.original_df.columns[table_col]
self.original_df.loc[table_row ,column_name] = value
my_df = pd.DataFrame(self._df)
my_df.columns = self.original_df.columns
self.conSig.dataChanged.emit(table_row, table_col, my_df)
return True
def headerData(self, p_int, orientation, role):
if role == QtCore.Qt.DisplayRole:
if orientation == QtCore.Qt.Horizontal:
return self._cols[p_int]
elif orientation == QtCore.Qt.Vertical:
return p_int
return None
def sort(self, column, order):
colname = self._df.columns.tolist()[column]
self.layoutAboutToBeChanged.emit()
self._df.sort_values(colname, ascending=order == QtCore.Qt.AscendingOrder, inplace=True)
self._df.reset_index(inplace=True, drop=True)
self.layoutChanged.emit()
def flags(self, index):
return Qt.ItemIsEnabled | Qt.ItemIsSelectable | Qt.ItemIsEditable
我将 pandas df 转换为一个 numpy 数组,这确实加快了速度。最后,当我需要打印时,我将 numpy 转换回 pandas。这大大提高了速度。
Pandas 12 万行和 10 列的加载时间:~280 秒。
120k 行和 10 列的 Numpy 加载时间:6.634 秒。
我目前正在通过Python编写一个SQL查询接口。 在将数据写入QTableView时,我使用QAbstractTableModel来写入查询结果。
这适用于较小的查询,但在尝试显示许多行和列时会变得非常慢。有没有办法提高数据帧加载到 QTableView 的速度?
这是我的 QAbtractTableModel 的代码 class:
class SQLConnection_PandasModel(QtCore.QAbstractTableModel):
def __init__(self, reason, df, parent=None):
QtCore.QAbstractTableModel.__init__(self, parent)
self._df = df.copy()
self.original_df = df
self.reason = reason
# PyQt5 Slots and Signals
if self.reason == 'Read':
self.conSig = sqlWindow()
self.conSig.dataChanged.connect(self.conSig.sql_table_updated)
# set the shortcut ctrl+F for find in menu
self.find_list_row = []
# setup menu options
def toDataFrame(self):
return self._df.copy()
def headerData(self, section, orientation, role=QtCore.Qt.DisplayRole):
if role != QtCore.Qt.DisplayRole:
return QtCore.QVariant()
if orientation == QtCore.Qt.Horizontal:
try:
return self._df.columns.tolist()[section]
except (IndexError,):
return QtCore.QVariant()
elif orientation == QtCore.Qt.Vertical:
try:
# return self.df.index.tolist()
return self._df.index.tolist()[section]
except (IndexError,):
return QtCore.QVariant()
def data(self, index, role=QtCore.Qt.DisplayRole):
if not index.isValid():
return QtCore.QVariant()
if role == QtCore.Qt.DisplayRole or role == QtCore.Qt.EditRole:
#print(type(self._df.iloc[index.row(), index.column()]))
if isinstance(self._df.iloc[index.row(), index.column()], bytes):
return QtCore.QVariant('(BLOB)')
else:
return QtCore.QVariant(str(self._df.iloc[index.row(), index.column()]))
def setData(self, index, value, role):
row = self._df.index[index.row()]
col = self._df.columns[index.column()]
if hasattr(value, 'toPyObject'):
# PyQt4 gets a QVariant
value = value.toPyObject()
else:
# PySide gets an unicode
dtype = self._df[col].dtype
if dtype != object:
if ((np.issubdtype(dtype, np.integer)) or isinstance(dtype, int)) and value.isnumeric():
value = None if value == '' else dtype.type(value)
self._df.loc[row, col] = value
# This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
if role == QtCore.Qt.EditRole and self.reason == 'Read':
#print('Emitting 1',row , col)
self.conSig.dataChanged.emit(row, col, value)
return True
elif ((np.issubdtype(dtype, np.integer)) or isinstance(dtype, int)) and not(value.isnumeric()):
return False
else:
value = None if value == '' else dtype.type(value)
self._df.loc[row, col] = value
# This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
if role == QtCore.Qt.EditRole and self.reason == 'Read':
#print('Emitting 2', row, col)
self.conSig.dataChanged.emit(row, col, value)
return True
else:
self._df.loc[row, col] = value
# This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
if role == QtCore.Qt.EditRole and self.reason == 'Read':
#print('Emitting 3', row, col)
self.original_df.at[row, col] = value
#print(self.original_df)
self.conSig.dataChanged.emit(row, col, self.original_df)
return True
def rowCount(self, parent=QtCore.QModelIndex()):
return len(self._df.index)
def columnCount(self, parent=QtCore.QModelIndex()):
return len(self._df.columns)
def sort(self, column, order):
colname = self._df.columns.tolist()[column]
self.layoutAboutToBeChanged.emit()
self._df.sort_values(colname, ascending=order == QtCore.Qt.AscendingOrder, inplace=True)
self._df.reset_index(inplace=True, drop=True)
self.layoutChanged.emit()
def flags(self, index):
return Qt.ItemIsEnabled | Qt.ItemIsSelectable | Qt.ItemIsEditable
如果我能在提高数据帧加载速度方面得到任何帮助,我将不胜感激。 谢谢。
好吧,我回答了我自己的问题。对于那些将来 运行 遇到问题的人,您正在尝试使用具有大型数据集的 QAbstractTableModel 来填充 QTableViews ...在这里你去:
class SQLConnection_PandasModel(QtCore.QAbstractTableModel):
def __init__(self, reason, df, parent=None):
QtCore.QAbstractTableModel.__init__(self, parent)
self._df = np.array(df.values)
self.original_df = df.copy()
self._cols = df.columns
self.r, self.c = np.shape(self._df)
self.reason = reason
# PyQt5 Slots and Signals
if self.reason == 'Read':
self.conSig = sqlWindow()
self.conSig.dataChanged.connect(self.conSig.sql_table_updated)
# set the shortcut ctrl+F for find in menu
self.find_list_row = []
def rowCount(self, parent=None):
return self.r
def columnCount(self, parent=None):
return self.c
def data(self, index, role=QtCore.Qt.DisplayRole):
if index.isValid():
if role == QtCore.Qt.DisplayRole:
if isinstance(self._df[index.row(), index.column()], bytes):
row = self._df[index.row()][0] - 1
col = self._df[index.column()][0] -1
self._df[row, col] = '(BLOB)'
return str('(BLOB)')
return str(self._df[index.row(),index.column()])
return None
def setData(self, index, value, role):
row = self._df[index.row()]
col = self._df[index.column()]
if hasattr(value, 'toPyObject'):
# PyQt4 gets a QVariant
value = value.toPyObject()
else:
# PySide gets an unicode
dtype = self._df.dtype
if dtype != object:
value = None if value == '' else dtype.type(value)
table_row = row[0]-1
table_col = col[0]-1
self._df[table_row, table_col] = value
# This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
if role == QtCore.Qt.EditRole and self.reason == 'Read':
column_name = self.original_df.columns[table_col]
self.original_df.loc[table_row ,column_name] = value
my_df = pd.DataFrame(self._df)
my_df.columns = self.original_df.columns
self.conSig.dataChanged.emit(table_row, table_col, my_df)
return True
def headerData(self, p_int, orientation, role):
if role == QtCore.Qt.DisplayRole:
if orientation == QtCore.Qt.Horizontal:
return self._cols[p_int]
elif orientation == QtCore.Qt.Vertical:
return p_int
return None
def sort(self, column, order):
colname = self._df.columns.tolist()[column]
self.layoutAboutToBeChanged.emit()
self._df.sort_values(colname, ascending=order == QtCore.Qt.AscendingOrder, inplace=True)
self._df.reset_index(inplace=True, drop=True)
self.layoutChanged.emit()
def flags(self, index):
return Qt.ItemIsEnabled | Qt.ItemIsSelectable | Qt.ItemIsEditable
我将 pandas df 转换为一个 numpy 数组,这确实加快了速度。最后,当我需要打印时,我将 numpy 转换回 pandas。这大大提高了速度。
Pandas 12 万行和 10 列的加载时间:~280 秒。 120k 行和 10 列的 Numpy 加载时间:6.634 秒。