For classification purposes I am using a python class that holds multiple sklearn pipelines and implements the sklearn api.

To store the model after fitting, I want to use the ONNX format. I have implemented a shape calculator function and a converter function, as it is described in the onnx tutorial. But this doesn't seem to work under the given circumstances.

In the following code snippet I have provided a minimal example of how the model looks like. It is basically a wrapper class, that holds multiple sklearn pipelines, that contain classifiers. I have left out code details.

from sklearn.pipeline import Pipeline
from sklearn.base import ClassifierMixin, BaseEstimator
from skl2onnx.algebra.onnx_operator import OnnxSubEstimator
from skl2onnx import to_onnx, update_registered_converter, get_model_alias
from skl2onnx.common.data_types import FloatTensorType, Int64TensorType
from skl2onnx import convert_sklearn
from sklearn.datasets import load_iris
from sklearn import svm


class MyPipelineWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self):
        BaseEstimator.__init__(self)
        ClassifierMixin.__init__(self)
        self.pipeline = Pipeline(
            steps=[
                ("classifier", svm.SVC(kernel="rbf", gamma="scale")),
            ]
        )

    def fit(self, X, y):
        self.pipeline.fit(X, y)

    def predict(self, X):
        y = self.pipeline.predict(X)
        return y


def pipe_wrapper_shape_calculator(operator):
    """
    Calculate the shape of the outputs based on the inputs
    """

    # inputs in ONNX graph
    input = operator.inputs[0]
    outputs = operator.outputs

    n_measurements = input.type.shape[0]  # number of samples

    # new output definition
    outputs[0].type = Int64TensorType([n_measurements])


def pipe_wrapper_converter(scope, operator, container):
    """
    https://onnx.ai/sklearn-onnx/auto_tutorial/plot_abegin_convert_pipeline.html
    https://onnx.ai/sklearn-onnx/auto_tutorial/plot_kcustom_converter_wrapper.html#conversion-into-onnx
    https://onnx.ai/sklearn-onnx/auto_examples/plot_custom_parser_alternative.html#custom-converter
    """
    op = operator.raw_operator  # scikit-learn model (must be fitted)
    opv = container.target_opset
    outputs = operator.outputs

    input = operator.inputs[0]
    print(input)

    Y = OnnxSubEstimator(op.pipeline, input, output_names=outputs, op_version=opv, options={"zipmap": False})
    Y.add_to(scope, container, operator=operator, run_converters=True)


def pipe_wrapper_parser(scope, model, inputs, custom_parsers=None):
    alias = get_model_alias(type(model))
    this_operator = scope.declare_local_operator(alias, model)

    # inputs
    this_operator.inputs.append(inputs[0])

    # outputs
    y_pred = scope.declare_local_variable("y_pred", Int64TensorType([None]))
    this_operator.outputs.append(y_pred)

    return this_operator.outputs


update_registered_converter(
    model=MyPipelineWrapper,
    alias="MyPipelineWrapper",
    shape_fct=pipe_wrapper_shape_calculator,
    convert_fct=pipe_wrapper_converter,
    parser=pipe_wrapper_parser
)


model = MyPipelineWrapper()

data = load_iris()
X, y = data.data, data.target

model.fit(X, y)

onx = to_onnx(model, initial_types=[('X', Int64TensorType([None, 4]))])

When I run this, I bump into this: RuntimeError: Mismatch between pipeline output 2 and last step outputs 1.

After a long time of trial and error and debugging, I still don't know how I can save this problem. I don't know why the pipeline has 2 outputs and this mismatch occurs.

Also, if I do the same with a regressor instead of a classifier inside of a pipeline inside of a wrapper, it works. The pipeline containing the classifier without the wrapper class also works.

0

There are 0 best solutions below