PySide GUI freezes during filtering using QSortFilterProxyModel

61 Views Asked by At

I'm developing a PySide6/Python3 application, that contains a QTableView with a custom model DataFrameTableModel. I want to support filtering the rendered table. Hence, I'm using QSortFilterProxyModel in addition.

One requirement is to filter based on columns with different operators, e.g. filter all rows with a value >= 5 in column x. For representing a filter I implemented the class DataFrameFilter which basically just stores something like {column: 'Price', operator: 'eq', value: 12}. To apply the custom filter format, I created a class DataFrameSortFilterProxyModel that is inheriting from QSortFilterProxyModel.

from enum import Enum
from PySide6.QtCore import QAbstractTableModel, QSortFilterProxyModel, QModelIndex, Qt
import pandas as pd

class DataFrameTableModel(QAbstractTableModel):
    def __init__(self, df: pd.DataFrame = None):
        super(DataFrameTableModel, self).__init__()

        self._df: pd.DataFrame = df

    def rowCount(self, parent: QModelIndex = ...) -> int:
        if parent.isValid() or self._df is None:
            return 0
        
        return self._df.shape[0]
        

    def columnCount(self, parent: QModelIndex = ...) -> int:
        if parent.isValid() or self._df is None:
            return 0

        return self._df.shape[1]

    def data(self, index: QModelIndex, role: int = ...) -> object:
        if index.isValid() and self._df is not None:
            value = self._df.iloc[index.row(), index.column()]

            if role == Qt.ItemDataRole.DisplayRole:
                return str(value)
            elif role == Qt.ItemDataRole.UserRole:
                return value

    def headerData(self, section: int, orientation: Qt.Orientation, role: int = ...) -> object:
        if self._df is not None:
            if role == Qt.ItemDataRole.DisplayRole:
                if orientation == Qt.Orientation.Horizontal:
                    return str(self._df.columns[section])
                else:
                    return str(self._df.index[section])
            elif role == Qt.ItemDataRole.UserRole:
                if orientation == Qt.Orientation.Horizontal:
                    return self._df.columns[section]
                else:
                    return self._df.index[section]

    def flags(self, index: QModelIndex) -> Qt.ItemFlag:
        return Qt.ItemFlag.ItemIsSelectable | Qt.ItemFlag.ItemIsEnabled
    
    @property
    def df(self) -> pd.DataFrame:
        return self._df
    
    @df.setter
    def df(self, value: pd.DataFrame):
        self._df = value
        self.layoutChanged.emit()

class DataFrameFilterOperation(Enum):
    EQUAL = "eq"
    NOT_EQUAL = "ne"
    GREATER_THAN = "gt"
    GREATER_THAN_OR_EQUAL = "ge"
    LESS_THAN = "lt"
    LESS_THAN_OR_EQUAL = "le"

class DataFrameFilter:
    def __init__(self, column: str, column_index: int, operation: DataFrameFilterOperation, value):
        self._column = column
        self._column_index = column_index
        self._operation = operation
        self._value = value

    @property
    def column(self) -> str:
        return self._column
    
    @property
    def column_index(self) -> int:
        return self._column_index

    @property
    def operation(self) -> DataFrameFilterOperation:
        return self._operation

    @property
    def value(self):
        return self._value
    
    def __eq__(self, value: object) -> bool:
        if not isinstance(value, DataFrameFilter):
            return False
        
        return self._column == value.column and self._column_index == value.column_index and self._operation == value.operation and self._value == value.value
    
    def __ne__(self, __value: object) -> bool:
        return not self.__eq__(__value)

class DataFrameSortFilterProxyModel(QSortFilterProxyModel):
    OPERATIONS = {
        DataFrameFilterOperation.EQUAL: lambda x, y: x == y,
        DataFrameFilterOperation.NOT_EQUAL: lambda x, y: x != y,
        DataFrameFilterOperation.GREATER_THAN: lambda x, y: x > y,
        DataFrameFilterOperation.GREATER_THAN_OR_EQUAL: lambda x, y: x >= y,
        DataFrameFilterOperation.LESS_THAN: lambda x, y: x < y,
        DataFrameFilterOperation.LESS_THAN_OR_EQUAL: lambda x, y: x <= y
    }

    def __init__(self):
        super(DataFrameSortFilterProxyModel, self).__init__()

        self._filters = []

    def filterAcceptsRow(self, source_row: int, source_parent: QModelIndex) -> bool:
        result = []

        for filter in self._filters:
            value = self.sourceModel().index(source_row, filter.column_index, source_parent).data(Qt.ItemDataRole.UserRole)
            result.append(self.OPERATIONS[filter.operation](value, filter.value))
        
        return all(result)
    
    def lessThan(self, left: QModelIndex, right: QModelIndex) -> bool:
        left_value = left.data(Qt.ItemDataRole.UserRole)
        right_value = right.data(Qt.ItemDataRole.UserRole)

        return left_value < right_value
    
    def add_filter(self, filter: DataFrameFilter):
        self._filters.append(filter)
        self.invalidate()

    def remove_filter(self, filter: DataFrameFilter):
        self._filters.remove(filter)
        self.invalidate()
    
    def clear_filters(self):
        self._filters.clear()
        self.invalidate()

Problem: Basically, everything works wonderfully for small data sets. The problem is that for larger data sets (~60000 rows) filtering obviously takes so long that the GUI freezes for a couple of seconds. I thought about moving the filtering logic to a second thread (QThread), but the UI should only be manipulated in the GUI thread, and since editing the model also changes the UI, I cannot adapt the model from a second thread.

It's not a problem if the filtering takes a few seconds, it's just that the UI shouldn't freeze during this time so that you can display a progress bar or something like that. Any suggesstions or solutions?

EDIT 03/11/24

I came up with a custom solution by implementing a custom QTableModel without using a QSortFilterProxyModel at alll. The idea is to outsource filtering to a second QThread controlled by a method of the custom model. The model itself isn't altered until filtering is finished. The separate thread will return the finally filtered DataFrame and it will be applied to the model instance that has started the thread. This means the UI no longer freezes and loading animations can be displayed and controlled via Qt Signals.

model.py:

from enum import Enum
from PySide6.QtCore import QAbstractTableModel, QModelIndex, Qt, QThreadPool, Signal
import pandas as pd
from thread import DataFrameFilterTask

class DataFrameTableModel(QAbstractTableModel):
    beginFiltering = Signal()
    endFiltering = Signal()
    beginSorting = Signal()
    endSorting = Signal()
    beginTransforming = Signal()
    endTransforming = Signal()

    def __init__(self, base_df: pd.DataFrame = None):
        super(DataFrameTableModel, self).__init__()

        self._base_df: pd.DataFrame = base_df
        self._transformed_df: pd.DataFrame = None
        self._filters: list[DataFrameFilter] = []
        self._is_filtering = False
        self._is_sorting = False
    
    def rowCount(self, parent: QModelIndex = ...) -> int:
        if parent.isValid() or self._current_df is None:
            return 0
        
        return self._current_df.shape[0]

    def columnCount(self, parent: QModelIndex = ...) -> int:
        if parent.isValid() or self._current_df is None:
            return 0

        return self._current_df.shape[1]

    def data(self, index: QModelIndex, role: int = ...) -> object:
        if index.isValid() and self._current_df is not None:
            value = self._current_df.iloc[index.row(), index.column()]

            if role == Qt.ItemDataRole.DisplayRole:
                return str(value)
            elif role == Qt.ItemDataRole.UserRole:
                return value

    def headerData(self, section: int, orientation: Qt.Orientation, role: int = ...) -> object:
        if self._current_df is not None:
            if role == Qt.ItemDataRole.DisplayRole:
                if orientation == Qt.Orientation.Horizontal:
                    return str(self._current_df.columns[section])
                else:
                    return str(self._current_df.index[section])
            elif role == Qt.ItemDataRole.UserRole:
                if orientation == Qt.Orientation.Horizontal:
                    return self._current_df.columns[section]
                else:
                    return self._current_df.index[section]

    def flags(self, index: QModelIndex) -> Qt.ItemFlag:
        return Qt.ItemFlag.ItemIsSelectable | Qt.ItemFlag.ItemIsEnabled

    @property
    def base_df(self) -> pd.DataFrame:
        return self._base_df
    
    @base_df.setter
    def base_df(self, value: pd.DataFrame):
        self._base_df = value
        self._transformed_df = None
        self.layoutChanged.emit()

    @property
    def transformed_df(self) -> pd.DataFrame:
        return self._transformed_df
    
    @property
    def filters(self) -> list[DataFrameFilter]:
        return self._filters
    
    @property
    def is_filtering(self) -> bool:
        return self._is_filtering
    
    @property
    def is_sorting(self) -> bool:
        return self._is_sorting
    
    @property
    def is_transforming(self) -> bool:
        return self._is_filtering or self._is_sorting
    
    @property
    def _current_df(self) -> pd.DataFrame:
        return self._base_df if self._transformed_df is None else self._transformed_df
    
    def add_filter(self, filter: DataFrameFilter):
        self._filters.append(filter)
        self._apply_filters()

    def remove_filter(self, filter: DataFrameFilter):
        self._filters.remove(filter)
        self._apply_filters()
    
    def clear_filters(self):
        self._filters.clear()
        self._apply_filters()
    
    def _apply_filters(self):
        self.beginFiltering.emit()
        self._is_filtering = True

        task = DataFrameFilterTask(self._base_df.copy(deep=True), self._filters)
        task.signals.data.connect(self._on_filter_task_data)
        task.signals.finished.connect(self._on_filter_task_finished)
        task.signals.error.connect(self._on_filter_task_error)

        QThreadPool.globalInstance().start(task)
    
    def _on_filter_task_data(self, df: pd.DataFrame):
        self.beginResetModel()
        self._transformed_df = df
        self.endResetModel()
    
    def _on_filter_task_finished(self):
        self._is_filtering = False
        self.endFiltering.emit()
    
    def _on_filter_task_error(self, error: tuple[Exception, type, str]):
        raise error[0]

thread.py:

import sys
import traceback
from PySide6.QtCore import QRunnable, Signal, QObject
import pandas as pd

class DataFrameFilterTaskSignals(QObject):
    finished = Signal()
    error = Signal(Exception)
    data = Signal(pd.DataFrame)

class DataFrameFilterTask(QRunnable):
    OPERATIONS = {
        "eq": lambda x, y: x == y,
        "ne": lambda x, y: x != y,
        "lt": lambda x, y: x < y,
        "le": lambda x, y: x <= y,
        "gt": lambda x, y: x > y,
        "ge": lambda x, y: x >= y
    }

    def __init__(self, df: pd.DataFrame, filters: list):
        super(DataFrameFilterTask, self).__init__()

        self.signals = DataFrameFilterTaskSignals()
        self._df = df
        self._filters = filters
    
    def run(self):
        try:
            for filter in self._filters:
                if self._df[filter.column].dtype != type(filter.value):
                    self._df = self._df[self._df[filter.column].apply(lambda x: type(x) == type(filter.value))]
                    self._df = self._df[self.OPERATIONS[filter.operation.value](self._df[filter.column], filter.value)]
                else:
                    self._df = self._df[self.OPERATIONS[filter.operation.value](self._df[filter.column], filter.value)]

            self.signals.data.emit(self._df)
        except:
            traceback.print_exc()
            exctype, value = sys.exc_info()[:2]
            self.signals.error.emit((value, exctype, traceback.format_exc()))
        finally:
            self.signals.finished.emit()

Any comments on my solution? It does work without any freezing issues, but I'm not sure if this is common practice or more a quick and dirty solution... Is it better practice to keep DataFrameTableModel untouched and create a second DataFrameSortFilterProxyModel inheriting from QAbstractProxyModel that contains the thread handling logic and updates the sourceModel() (a DataFrameTableModel instance) from outside of the class by setting the filtered DataFrame to fulfill the separation of concerns principle?

1

There are 1 best solutions below

6
Nedim Memišević On

In Qt, main thread is reserved for GUI only. As you mentioned before for small datasets, freezing doesn't happen, it is because it operates so fast, you can't see the freezing itself but the same thing happens. You only notice once the dataset is huge so it requires more time to calculate it. Your only, and proper, way out is to move calculations to different thread.

What you do actually is you create a Worker class, a class where you will instantiate your class which does the calculations work. I will imitate your working class and GUI.

Let's say your class that does calculation is DataFrameSortFilterProxyModel and you don't want to touch or change actual inheritance of the class

class DataFrameSortFilterProxyModel():
      progress = QtCore.Signal(int)
      finished = QtCore.Signal()
      def filtering(self):
          datasetLength = 1000000000
          for i in range(datasetLength):
              # do the actual work
              emit progress(i/datasetLength)
          emit finished()

Now in your worker class you instantiate your class:

class Worker(QObject):
    finished = QtCore.Signal()
    progress = QtCore.Signal(int)

    def __init__(self):
        self.mySortingModel = DataFrameSortFilterProxyModel()
        self.mySortingModel.progress.connect(self.updateProgress)
        self.mySortingModel.finished.connect(self.onFinished)
        self.mySortingMode.filtering()

    def updateProgress(self, numProgress):
        emit progress(numProgress)

    def onFinished(self):
        emit finished()

Now let's go back to your UI class where you said it should have some kind of progress bar, for simplicity I will use QLabel, but you are free to use QProgressBar or maybe a widget with some GIF animation:

    from PySide2.QtCore import QThread
    ...
    ...
    self.progressText = QLabel("0%")
    ...
    def calculate():
        # instantiate different thread
        self.thread = QThread()
        # create a worker
        self.worker = Worker()
        # move worker to the thread
        self.worker.moveToThread(self.thread)
        # connect all signals and slots
        self.thread.started.connect(self.worker.run)
        self.worker.finished.connect(self.thread.quit)
        self.worker.finished.connect(self.worker.deleteLater)
        self.thread.finished.connect(self.thread.deleteLater)
        self.worker.progress.connect(self.updateProgress)
        
        # start the thread so the actual work is started
        self.thread.start()

        # if you don't want to calculations be bullied by user you can disable action that leads to calculations for example if you have button you would say
        self.calculateBtn.setEnabled(False)
    
        #connect signals what happens in between and after the calcs are over
        self.thread.finished.connect(
            lambda: self.calculateBtn.setEnabled(True)
        )
        self.thread.finished.connect(
            lambda: self.progressText.setText("0%")
        )

def updateProgress(self, progressNum):
    self.progressText.setText(f'{progressNum}%')

This should solve your GUI freezing problem, but unfortunately you will need to restructure your calculations class to enable getting info about the progress from it. And don't forget to update your actual graph from self.thread.finished to actually display data.