Source code for ads.model.serde.model_serializer

#!/usr/bin/env python
# -*- coding: utf-8; -*-

# Copyright (c) 2023 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
import cloudpickle
import numpy as np
import pandas as pd
from ads.model.serde.common import Serializer, Deserializer
from ads.common.decorator.runtime_dependency import (
    runtime_dependency,
    OptionalDependency,
)
from ads.common import logger
from pandas.api.types import is_numeric_dtype, is_string_dtype
from typing import Any, Dict, List, Optional, Tuple, Union
from joblib import dump


MODEL_SERIALIZATION_TYPE_ONNX = "onnx"
MODEL_SERIALIZATION_TYPE_CLOUDPICKLE = "cloudpickle"
MODEL_SERIALIZATION_TYPE_TORHCSCRIPT = "torchscript"
MODEL_SERIALIZATION_TYPE_TORCH = "torch"
MODEL_SERIALIZATION_TYPE_TORCH_ONNX = "torch_onnx"
MODEL_SERIALIZATION_TYPE_TF = "tf"
MODEL_SERIALIZATION_TYPE_TF_ONNX = "tf_onnx"
MODEL_SERIALIZATION_TYPE_JOBLIB = "joblib"
MODEL_SERIALIZATION_TYPE_SKLEARN_ONNX = "sklearn_onnx"
MODEL_SERIALIZATION_TYPE_LIGHTGBM = "lightgbm"
MODEL_SERIALIZATION_TYPE_LIGHTGBM_ONNX = "lightgbm_onnx"
MODEL_SERIALIZATION_TYPE_XGBOOST = "xgboost"
MODEL_SERIALIZATION_TYPE_XGBOOST_UBJ = "xgboost_ubj"
MODEL_SERIALIZATION_TYPE_XGBOOST_TXT = "xgboost_txt"
MODEL_SERIALIZATION_TYPE_XGBOOST_ONNX = "xgboost_onnx"
MODEL_SERIALIZATION_TYPE_SPARK = "spark"
MODEL_SERIALIZATION_TYPE_HUGGINGFACE = "huggingface"


SUPPORTED_MODEL_SERIALIZERS = [
    MODEL_SERIALIZATION_TYPE_ONNX,
    MODEL_SERIALIZATION_TYPE_CLOUDPICKLE,
    MODEL_SERIALIZATION_TYPE_TORHCSCRIPT,
    MODEL_SERIALIZATION_TYPE_TORCH,
    MODEL_SERIALIZATION_TYPE_TORCH_ONNX,
    MODEL_SERIALIZATION_TYPE_TF,
    MODEL_SERIALIZATION_TYPE_TF_ONNX,
    MODEL_SERIALIZATION_TYPE_JOBLIB,
    MODEL_SERIALIZATION_TYPE_SKLEARN_ONNX,
    MODEL_SERIALIZATION_TYPE_LIGHTGBM,
    MODEL_SERIALIZATION_TYPE_LIGHTGBM_ONNX,
    MODEL_SERIALIZATION_TYPE_XGBOOST,
    MODEL_SERIALIZATION_TYPE_XGBOOST_ONNX,
    MODEL_SERIALIZATION_TYPE_SPARK,
    MODEL_SERIALIZATION_TYPE_HUGGINGFACE,
]



[docs]
class ModelSerializerType:
    CLOUDPICKLE = MODEL_SERIALIZATION_TYPE_CLOUDPICKLE
    ONNX = MODEL_SERIALIZATION_TYPE_ONNX




[docs]
class PyTorchModelSerializerType:
    TORCH = MODEL_SERIALIZATION_TYPE_TORCH
    TORCHSCRIPT = MODEL_SERIALIZATION_TYPE_TORHCSCRIPT
    ONNX = MODEL_SERIALIZATION_TYPE_TORCH_ONNX




[docs]
class TensorflowModelSerializerType:
    TENSORFLOW = MODEL_SERIALIZATION_TYPE_TF
    ONNX = MODEL_SERIALIZATION_TYPE_TF_ONNX




[docs]
class LightGBMModelSerializerType:
    LIGHTGBM = MODEL_SERIALIZATION_TYPE_LIGHTGBM
    ONNX = MODEL_SERIALIZATION_TYPE_LIGHTGBM_ONNX




[docs]
class SklearnModelSerializerType:
    JOBLIB = MODEL_SERIALIZATION_TYPE_JOBLIB
    CLOUDPICKLE = MODEL_SERIALIZATION_TYPE_CLOUDPICKLE
    ONNX = MODEL_SERIALIZATION_TYPE_SKLEARN_ONNX




[docs]
class XgboostModelSerializerType:
    XGBOOST = MODEL_SERIALIZATION_TYPE_XGBOOST
    ONNX = MODEL_SERIALIZATION_TYPE_XGBOOST_ONNX




[docs]
class SparkModelSerializerType:
    SPARK = MODEL_SERIALIZATION_TYPE_SPARK




[docs]
class HuggingFaceSerializerType:
    HUGGINGFACE = MODEL_SERIALIZATION_TYPE_HUGGINGFACE




[docs]
class ModelSerializer(Serializer):
    """Base class for creation of new model serializers."""

    def __init__(self, model_file_suffix):
        super().__init__()
        self.model_file_suffix = model_file_suffix




[docs]
class ModelDeserializer(Deserializer):
    """Base class for creation of new model deserializers."""


[docs]
    def deserialize(self, **kwargs):
        raise NotImplementedError





[docs]
class CloudPickleModelSerializer(ModelSerializer):
    """Uses `Cloudpickle` to save model."""

    def __init__(self, model_file_suffix="pkl"):
        super().__init__(model_file_suffix=model_file_suffix)


[docs]
    def serialize(self, estimator, model_path, **kwargs):
        """Uses `cloudpickle.dump` to save model. See https://docs.python.org/3/library/pickle.html#pickle.dump for more details.

        Args:
            estimator: The model to be saved.
            model_path: The file object or path of the model in which it is to be stored.
            kwargs:
                model_save: (dict, optional).
                    The dictionary where contains the availiable options to be passed to `cloudpickle.dump`.
        """
        cloudpickle_kwargs = kwargs.pop("model_save", {})
        with open(model_path, "wb") as f:
            cloudpickle.dump(estimator, f, **cloudpickle_kwargs)





[docs]
class JobLibModelSerializer(ModelSerializer):
    """Uses `Joblib` to save model."""

    def __init__(self, model_file_suffix="joblib"):
        super().__init__(model_file_suffix=model_file_suffix)


[docs]
    def serialize(self, estimator, model_path, **kwargs):
        """Uses `joblib.dump` to save model. See https://joblib.readthedocs.io/en/latest/generated/joblib.dump.html for more details.

        Args:
            estimator: The model to be saved.
            model_path: The file object or path of the model in which it is to be stored.
            kwargs:
                model_save: (dict, optional).
                    The dictionary where contains the availiable options to be passed to `joblib.dump`.
        """
        joblib_kwargs = kwargs.pop("model_save", {})
        dump(estimator, model_path, **joblib_kwargs)





[docs]
class SparkModelSerializer(ModelSerializer):
    """Save Spark Model."""

    def __init__(self, model_file_suffix=""):
        super().__init__(model_file_suffix=model_file_suffix)


[docs]
    def serialize(self, estimator, model_path, **kwargs):
        estimator.write().overwrite().save(model_path)





[docs]
class PyTorchModelSerializer(ModelSerializer):
    """Save PyTorch Model using torch.save(). See https://pytorch.org/docs/stable/generated/torch.save.html for more details."""

    def __init__(self, model_file_suffix="pt"):
        super().__init__(model_file_suffix=model_file_suffix)


[docs]
    @runtime_dependency(module="torch", install_from=OptionalDependency.PYTORCH)
    def serialize(self, estimator, model_path, **kwarg):
        torch.save(estimator.state_dict(), model_path)





[docs]
class TorchScriptModelSerializer(ModelSerializer):
    """Save PyTorch Model using torchscript. See https://pytorch.org/tutorials/beginner/saving_loading_models.html#export-load-model-in-torchscript-format for more details."""

    def __init__(self, model_file_suffix="pt"):
        super().__init__(model_file_suffix=model_file_suffix)


[docs]
    @runtime_dependency(module="torch", install_from=OptionalDependency.PYTORCH)
    def serialize(self, estimator, model_path, **kwargs):
        compiled_model = torch.jit.script(estimator)
        torch.jit.save(compiled_model, model_path)





[docs]
class LightGBMModelSerializer(ModelSerializer):
    """Save LightGBM Model through save_model into txt."""

    def __init__(self, model_file_suffix="txt"):
        super().__init__(model_file_suffix=model_file_suffix)


[docs]
    def serialize(self, estimator, model_path, **kwargs):
        estimator.save_model(model_path)





[docs]
class XgboostJsonModelSerializer(ModelSerializer):
    """Save Xgboost Model through xgboost.save_model into JSON."""

    def __init__(self, model_file_suffix="json"):
        super().__init__(model_file_suffix=model_file_suffix)


[docs]
    def serialize(self, estimator, model_path, **kwargs):
        """Save Xgboost Model through xgboost.save_model .See
        https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.Booster.save_model
        for more details.

        Args:
            estimator: The model to be saved.
            model_path: The file object or path of the model in which it is to be stored.
        """
        estimator.save_model(model_path)





[docs]
class XgboostTxtModelSerializer(ModelSerializer):
    """Save Xgboost Model through xgboost.save_model into txt."""

    def __init__(self, model_file_suffix="txt"):
        super().__init__(model_file_suffix=model_file_suffix)


[docs]
    def serialize(self, estimator, model_path, **kwargs):
        """Save Xgboost Model through xgboost.save_model .See
        https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.Booster.save_model
        for more details.

        Args:
            estimator: The model to be saved.
            model_path: The file object or path of the model in which it is to be stored.
        """
        estimator.save_model(model_path)





[docs]
class XgboostUbjModelSerializer(ModelSerializer):
    """Save Xgboost Model through xgboost.save_model into binary JSON."""

    def __init__(self, model_file_suffix="ubj"):
        super().__init__(model_file_suffix=model_file_suffix)


[docs]
    def serialize(self, estimator, model_path, **kwargs):
        """Save Xgboost Model through xgboost.save_model .See
        https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.Booster.save_model
        for more details.

        Args:
            estimator: The model to be saved.
            model_path: The file object or path of the model in which it is to be stored.
        """
        estimator.save_model(model_path)





[docs]
class TensorFlowModelSerializer(ModelSerializer):
    """Save Tensorflow Model."""

    def __init__(self, model_file_suffix="h5"):
        super().__init__(model_file_suffix=model_file_suffix)


[docs]
    def serialize(self, estimator, model_path, **kwargs):
        estimator.save(model_path)





[docs]
class HuggingFaceModelSerializer(ModelSerializer):
    """Save HuggingFace Pipeline."""

    def __init__(self, model_file_suffix=""):
        super().__init__(model_file_suffix=model_file_suffix)


[docs]
    def serialize(self, estimator, model_path, **kwargs):
        estimator.save_pretrained(save_directory=model_path)
        estimator.model.config.use_pretrained_backbone = False
        estimator.model.config.save_pretrained(save_directory=model_path)





[docs]
class OnnxModelSerializer(ModelSerializer):
    """Base class for creation of onnx converter for each model framework."""

    def __init__(self, model_file_suffix="onnx"):
        super().__init__(model_file_suffix=model_file_suffix)


[docs]
    def serialize(
        self,
        estimator,
        model_path,
        initial_types: List[Tuple] = None,
        X_sample: Optional[
            Union[
                Dict,
                str,
                List,
                Tuple,
                np.ndarray,
                pd.core.series.Series,
                pd.core.frame.DataFrame,
            ]
        ] = None,
        **kwargs,
    ):
        """Save model into onnx format.

        Args:
            estimator: The model to be saved.
            model_path: The file object or path of the model in which it is to be stored.
            initial_types: (List[Tuple], optional)
                a python list. Each element is a tuple of a variable name and a data type.
            X_sample: (any, optional). Defaults to None.
                Contains model inputs such that model(X_sample) is a valid
                invocation of the model, used to valid model input type.
        """
        self.estimator = estimator
        onx = self._to_onnx(
            initial_types=initial_types,
            X_sample=X_sample,
            **kwargs,
        )
        with open(model_path, "wb") as f:
            f.write(onx.SerializeToString())


    def _to_onnx(
        self,
        initial_types: List[Tuple] = None,
        X_sample: Optional[
            Union[
                Dict,
                str,
                List,
                Tuple,
                np.ndarray,
                pd.core.series.Series,
                pd.core.frame.DataFrame,
            ]
        ] = None,
        **kwargs,
    ):
        raise NotImplementedError




[docs]
class SklearnOnnxModelSerializer(OnnxModelSerializer):
    """Converts Skearn Model into Onnx."""

    def __init__(self):
        super().__init__()

    @runtime_dependency(module="onnx", install_from=OptionalDependency.ONNX)
    @runtime_dependency(module="xgboost", install_from=OptionalDependency.BOOSTED)
    @runtime_dependency(module="lightgbm", install_from=OptionalDependency.BOOSTED)
    @runtime_dependency(module="skl2onnx", install_from=OptionalDependency.ONNX)
    @runtime_dependency(module="onnxmltools", install_from=OptionalDependency.ONNX)
    @runtime_dependency(
        module="onnxmltools.convert.xgboost.operator_converters.XGBoost",
        object="convert_xgboost",
        install_from=OptionalDependency.ONNX,
    )
    @runtime_dependency(
        module="onnxmltools.convert.lightgbm.operator_converters.LightGbm",
        object="convert_lightgbm",
        install_from=OptionalDependency.ONNX,
    )
    def _to_onnx(
        self,
        initial_types: List[Tuple] = None,
        X_sample: Optional[
            Union[
                Dict,
                str,
                List,
                Tuple,
                np.ndarray,
                pd.core.series.Series,
                pd.core.frame.DataFrame,
            ]
        ] = None,
        **kwargs,
    ):
        """
        Produces an equivalent ONNX model of the given scikit-learn model.

        Parameters
        ----------
        initial_types: (List[Tuple], optional). Defaults to None.
            Each element is a tuple of a variable name and a type.
        X_sample: Union[Dict, str, List, np.ndarray, pd.core.series.Series, pd.core.frame.DataFrame,]. Defaults to None.
            Contains model inputs such that model(X_sample) is a valid invocation of the model.
            Used to generate initial_types.

        Returns
        -------
        onnx.onnx_ml_pb2.ModelProto
            An ONNX model (type: ModelProto) which is equivalent to the input scikit-learn model.
        """
        auto_generated_initial_types = None
        if not initial_types:
            if X_sample is None:
                raise ValueError(
                    " At least one of `X_sample` or `initial_types` must be provided."
                )
            auto_generated_initial_types = self._generate_initial_types(X_sample)
        if str(type(self.estimator)).startswith("<class 'sklearn.pipeline"):
            model_types = []
            model_types = [type(val[1]) for val in self.estimator.steps]
            if xgboost.sklearn.XGBClassifier in model_types:
                skl2onnx.update_registered_converter(
                    xgboost.XGBClassifier,
                    "XGBoostXGBClassifier",
                    skl2onnx.common.shape_calculator.calculate_linear_classifier_output_shapes,
                    convert_xgboost,
                    options=kwargs.pop(
                        "options", {"nocl": [True, False], "zipmap": [True, False]}
                    ),
                )

            if xgboost.sklearn.XGBRegressor in model_types:
                skl2onnx.update_registered_converter(
                    xgboost.XGBRegressor,
                    "XGBoostXGBRegressor",
                    skl2onnx.common.shape_calculator.calculate_linear_regressor_output_shapes,
                    convert_xgboost,
                )

            if lightgbm.sklearn.LGBMClassifier in model_types:
                skl2onnx.update_registered_converter(
                    lightgbm.LGBMClassifier,
                    "LightGbmLGBMClassifier",
                    skl2onnx.common.shape_calculator.calculate_linear_classifier_output_shapes,
                    convert_lightgbm,
                    options=kwargs.pop(
                        "options",
                        {"nocl": [True, False], "zipmap": [True, False, "columns"]},
                    ),
                )

            if lightgbm.sklearn.LGBMRegressor in model_types:

                def skl2onnx_convert_lightgbm(scope, operator, container):
                    options = scope.get_options(operator.raw_operator)
                    if "split" in options:
                        if StrictVersion(onnxmltools.__version__) < StrictVersion(
                            "1.9.2"
                        ):
                            logger.warnings(
                                "Option split was released in version 1.9.2 but %s is "
                                "installed. It will be ignored."
                                % onnxmltools.__version__
                            )
                        operator.split = options["split"]
                    else:
                        operator.split = None
                    convert_lightgbm(scope, operator, container)

                skl2onnx.update_registered_converter(
                    lightgbm.LGBMRegressor,
                    "LightGbmLGBMRegressor",
                    skl2onnx.common.shape_calculator.calculate_linear_regressor_output_shapes,
                    skl2onnx_convert_lightgbm,
                    options=kwargs.pop("options", {"split": None}),
                )
            if initial_types:
                return skl2onnx.convert_sklearn(
                    self.estimator, initial_types=initial_types, **kwargs
                )
            else:
                try:
                    return skl2onnx.convert_sklearn(
                        self.estimator,
                        initial_types=auto_generated_initial_types,
                        target_opset=None,
                        **kwargs,
                    )
                except Exception as e:
                    raise ValueError(
                        "`initial_types` can not be autodetected. Please directly pass `initial_types`."
                    )
        else:
            if initial_types:
                return onnxmltools.convert_sklearn(
                    self.estimator,
                    initial_types=initial_types,
                    targeted_onnx=onnx.__version__,
                    **kwargs,
                )
            else:
                try:
                    return onnxmltools.convert_sklearn(
                        self.estimator,
                        initial_types=auto_generated_initial_types,
                        targeted_onnx=onnx.__version__,
                        **kwargs,
                    )
                except Exception as e:
                    raise ValueError(
                        "`initial_types` can not be detected. Please directly pass initial_types."
                    )

    @runtime_dependency(module="skl2onnx", install_from=OptionalDependency.ONNX)
    def _generate_initial_types(self, X_sample: Any) -> List:
        """Auto generate intial types.

        Parameters
        ----------
        X_sample: (Any)
            Train data.

        Returns
        -------
        List
            Initial types.
        """
        if self._is_all_numerical_array_dataframe(X_sample):
            # if it's a dataframe and all the columns are numerical. Or
            # it's not a dataframe, also try this.
            if hasattr(X_sample, "shape") and len(X_sample.shape) >= 2:
                auto_generated_initial_types = [
                    (
                        "input",
                        skl2onnx.common.data_types.FloatTensorType(
                            [None, X_sample.shape[1]]
                        ),
                    )
                ]
            elif hasattr(self.estimator, "n_features_in_"):
                n_cols = self.estimator.n_features_in_
                auto_generated_initial_types = [
                    (
                        "input",
                        skl2onnx.common.data_types.FloatTensorType([None, n_cols]),
                    )
                ]
            else:
                raise ValueError(
                    "`initial_types` can not be detected. Please directly pass initial_types."
                )
        elif self.is_either_numerical_or_string_dataframe(X_sample):
            # for dataframe and not all the columns are numerical, then generate
            # the input types of all the columns one by one.
            auto_generated_initial_types = []

            for i, col in X_sample.items():
                if is_numeric_dtype(col.dtypes):
                    auto_generated_initial_types.append(
                        (
                            col.name,
                            skl2onnx.common.data_types.FloatTensorType([None, 1]),
                        )
                    )
                else:
                    auto_generated_initial_types.append(
                        (
                            col.name,
                            skl2onnx.common.data_types.StringTensorType([None, 1]),
                        )
                    )
        else:
            try:
                auto_generated_initial_types = (
                    skl2onnx.common.data_types.guess_data_type(
                        np.array(X_sample) if isinstance(X_sample, list) else X_sample
                    )
                )
            except:
                auto_generated_initial_types = None
        return auto_generated_initial_types

    @staticmethod
    def _is_all_numerical_array_dataframe(
        data: Union[pd.DataFrame, np.ndarray]
    ) -> bool:
        """Check whether all the columns are numerical for numpy array and dataframe.
        For data with any other data types, it will return False.

        Parameters
        ----------
        data: Union[pd.DataFrame, np.ndarray]

        Returns
        -------
        bool
            Whether all the columns in a pandas dataframe or numpy array are all numerical.
        """
        return (
            isinstance(data, pd.DataFrame)
            and all([is_numeric_dtype(dtype) for dtype in data.dtypes])
            or (isinstance(data, np.ndarray) and is_numeric_dtype(data.dtype))
        )


[docs]
    @staticmethod
    def is_either_numerical_or_string_dataframe(data: pd.DataFrame) -> bool:
        """Check whether all the columns are either numerical or string for dataframe."""
        return isinstance(data, pd.DataFrame) and all(
            [
                is_numeric_dtype(col.dtypes) or is_string_dtype(col.dtypes)
                for _, col in data.items()
            ]
        )





[docs]
class LightGBMOnnxModelSerializer(OnnxModelSerializer):
    """Converts LightGBM model into onnx format."""

    def __init__(self):
        super().__init__()

    @runtime_dependency(
        module="skl2onnx.common.data_types",
        object="FloatTensorType",
        install_from=OptionalDependency.ONNX,
    )
    @runtime_dependency(
        module="onnxmltools.convert",
        object="convert_lightgbm",
        install_from=OptionalDependency.ONNX,
    )
    def _to_onnx(
        self,
        initial_types: List[Tuple] = None,
        X_sample: Optional[
            Union[
                Dict,
                str,
                List,
                Tuple,
                np.ndarray,
                pd.core.series.Series,
                pd.core.frame.DataFrame,
            ]
        ] = None,
        **kwargs,
    ):
        """
        Produces an equivalent ONNX model of the given LightGBM model.

        Parameters
        ----------
        initial_types: (List[Tuple], optional). Defaults to None.
            Each element is a tuple of a variable name and a type.
        X_sample: Union[Dict, str, List, np.ndarray, pd.core.series.Series, pd.core.frame.DataFrame,]. Defaults to None.
            Contains model inputs such that model(X_sample) is a valid invocation of the model.
            Used to generate initial_types.

        Returns
        ------
            An ONNX model (type: ModelProto) which is equivalent to the input LightGBM model.
        """
        auto_generated_initial_types = None
        if not initial_types:
            auto_generated_initial_types = self._generate_initial_types(X_sample)
            try:
                return convert_lightgbm(
                    self.estimator,
                    initial_types=auto_generated_initial_types,
                    target_opset=kwargs.pop("target_opset", None),
                    **kwargs,
                )
            except:
                raise ValueError(
                    "`initial_types` can not be detected. Please directly pass initial_types."
                )
        else:
            return convert_lightgbm(
                self.estimator,
                initial_types=initial_types,
                target_opset=kwargs.pop("target_opset", None),
                **kwargs,
            )

    @runtime_dependency(
        module="skl2onnx.common.data_types",
        object="FloatTensorType",
        install_from=OptionalDependency.ONNX,
    )
    def _generate_initial_types(self, X_sample: Any) -> List:
        """Auto generate intial types.

        Parameters
        ----------
        X_sample: (Any)
            Train data.

        Returns
        -------
        List
            Initial types.
        """
        if X_sample is not None and hasattr(X_sample, "shape"):
            auto_generated_initial_types = [
                ("input", FloatTensorType([None, X_sample.shape[1]]))
            ]
        elif hasattr(self.estimator, "num_feature"):
            n_cols = self.estimator.num_feature()
            auto_generated_initial_types = [("input", FloatTensorType([None, n_cols]))]
        elif hasattr(self.estimator, "n_features_in_"):
            n_cols = self.estimator.n_features_in_
            auto_generated_initial_types = [("input", FloatTensorType([None, n_cols]))]
        else:
            raise ValueError(
                "`initial_types` can not be detected. Please directly pass initial_types."
            )
        return auto_generated_initial_types




[docs]
class XgboostOnnxModelSerializer(OnnxModelSerializer):
    """Converts Xgboost model into onnx format."""

    def __init__(self):
        super().__init__()

    @runtime_dependency(module="onnx", install_from=OptionalDependency.ONNX)
    @runtime_dependency(module="xgboost", install_from=OptionalDependency.BOOSTED)
    @runtime_dependency(
        module="skl2onnx",
        object="convert_sklearn",
        install_from=OptionalDependency.ONNX,
    )
    @runtime_dependency(
        module="skl2onnx",
        object="update_registered_converter",
        install_from=OptionalDependency.ONNX,
    )
    @runtime_dependency(
        module="skl2onnx.common.data_types",
        object="FloatTensorType",
        install_from=OptionalDependency.ONNX,
    )
    @runtime_dependency(
        module="skl2onnx.common.shape_calculator",
        object="calculate_linear_classifier_output_shapes",
        install_from=OptionalDependency.ONNX,
    )
    @runtime_dependency(
        module="skl2onnx.common.shape_calculator",
        object="calculate_linear_regressor_output_shapes",
        install_from=OptionalDependency.ONNX,
    )
    @runtime_dependency(module="onnxmltools", install_from=OptionalDependency.ONNX)
    @runtime_dependency(
        module="onnxmltools.convert.xgboost.operator_converters.XGBoost",
        object="convert_xgboost",
        install_from=OptionalDependency.ONNX,
    )
    def _to_onnx(
        self,
        initial_types: List[Tuple] = None,
        X_sample: Union[list, tuple, pd.DataFrame, pd.Series, np.ndarray] = None,
        **kwargs,
    ):
        """
        Produces an equivalent ONNX model of the given Xgboost model.

        Parameters
        ----------
        initial_types: (List[Tuple], optional). Defaults to None.
            Each element is a tuple of a variable name and a type.
        X_sample: Union[Dict, str, List, np.ndarray, pd.core.series.Series, pd.core.frame.DataFrame,]. Defaults to None.
            Contains model inputs such that model(X_sample) is a valid invocation of the model.
            Used to generate initial_types.

        Returns
        -------
        onnx.onnx_ml_pb2.ModelProto
            An ONNX model (type: ModelProto) which is equivalent to the input xgboost model.
        """
        auto_generated_initial_types = None
        if not initial_types:
            auto_generated_initial_types = self._generate_initial_types(X_sample)

        model_types = []
        if str(type(self.estimator)).startswith("<class 'xgboost.sklearn."):
            model_types.append(type(self.estimator))

        if model_types:
            if xgboost.sklearn.XGBClassifier in model_types:
                update_registered_converter(
                    xgboost.XGBClassifier,
                    "XGBoostXGBClassifier",
                    calculate_linear_classifier_output_shapes,
                    convert_xgboost,
                    options={"nocl": [True, False], "zipmap": [True, False]},
                )
            elif xgboost.sklearn.XGBRegressor in model_types:
                update_registered_converter(
                    xgboost.XGBRegressor,
                    "XGBoostXGBRegressor",
                    calculate_linear_regressor_output_shapes,
                    convert_xgboost,
                )
            if initial_types:
                return convert_sklearn(
                    self.estimator, initial_types=initial_types, **kwargs
                )
            else:
                try:
                    return convert_sklearn(
                        self.estimator,
                        initial_types=auto_generated_initial_types,
                        **kwargs,
                    )
                except:
                    raise ValueError(
                        "`initial_types` can not be autodetected. Please directly pass `initial_types`."
                    )
        else:
            # xgboost api
            if initial_types:
                return onnxmltools.convert_xgboost(
                    self.estimator,
                    initial_types=initial_types,
                    target_opset=kwargs.pop("target_opset", None),
                    targeted_onnx=onnx.__version__,
                    **kwargs,
                )
            else:
                try:
                    return onnxmltools.convert_xgboost(
                        self.estimator,
                        initial_types=auto_generated_initial_types,
                        target_opset=kwargs.pop("target_opset", None),
                        targeted_onnx=onnx.__version__,
                        **kwargs,
                    )
                except:
                    raise ValueError(
                        "`initial_types` can not be autodetected. Please directly pass `initial_types`."
                    )

    @runtime_dependency(
        module="skl2onnx.common.data_types",
        object="FloatTensorType",
        install_from=OptionalDependency.ONNX,
    )
    def _generate_initial_types(self, X_sample: Any) -> List:
        """Auto generate intial types.

        Parameters
        ----------
        X_sample: (Any)
            Train data.

        Returns
        -------
        List
            Initial types.
        """
        if hasattr(self.estimator, "n_features_in_"):
            # sklearn api
            n_cols = self.estimator.n_features_in_
            return [("input", FloatTensorType([None, n_cols]))]
        elif hasattr(self.estimator, "feature_names") and self.estimator.feature_names:
            # xgboost learning api
            n_cols = len(self.estimator.feature_names)
            return [("input", FloatTensorType([None, n_cols]))]
        if X_sample is None:
            raise ValueError(
                " At least one of `X_sample` or `initial_types` must be provided."
            )
        if (
            X_sample is not None
            and hasattr(X_sample, "shape")
            and len(X_sample.shape) >= 2
        ):
            auto_generated_initial_types = [
                ("input", FloatTensorType([None, X_sample.shape[1]]))
            ]
        else:
            raise ValueError(
                "`initial_types` can not be detected. Please directly pass initial_types."
            )
        return auto_generated_initial_types




[docs]
class PytorchOnnxModelSerializer(OnnxModelSerializer):
    """Converts Pytorch model into onnx format."""

    def __init__(self):
        super().__init__()


[docs]
    @runtime_dependency(module="torch", install_from=OptionalDependency.PYTORCH)
    def serialize(
        self,
        estimator,
        model_path: str,
        X_sample: Optional[
            Union[
                Dict,
                str,
                List,
                Tuple,
                np.ndarray,
                pd.core.series.Series,
                pd.core.frame.DataFrame,
            ]
        ] = None,
        **kwargs,
    ):
        """
        Exports the given Pytorch model into ONNX format.

        Parameters
        ----------
        path: str, default to None
            Path to save the serialized model.
        onnx_args: (tuple or torch.Tensor), default to None
            Contains model inputs such that model(onnx_args) is a valid
            invocation of the model. Can be structured either as: 1) ONLY A
            TUPLE OF ARGUMENTS; 2) A TENSOR; 3) A TUPLE OF ARGUMENTS ENDING
            WITH A DICTIONARY OF NAMED ARGUMENTS
        X_sample: Union[list, tuple, pd.Series, np.ndarray, pd.DataFrame]. Defaults to None.
            A sample of input data that will be used to generate input schema and detect onnx_args.
        kwargs:
            input_names: (List[str], optional). Defaults to ["input"].
                Names to assign to the input nodes of the graph, in order.
            output_names: (List[str], optional). Defaults to ["output"].
                Names to assign to the output nodes of the graph, in order.
            dynamic_axes: (dict, optional). Defaults to None.
                Specify axes of tensors as dynamic (i.e. known only at run-time).

        Returns
        -------
        None
            Nothing

        Raises
        ------
        AssertionError
            if onnx module is not support by the current version of torch
        ValueError
            if X_sample is not provided
            if path is not provided
        """
        onnx_args = kwargs.get("onnx_args", None)
        input_names = kwargs.get("input_names", ["input"])
        output_names = kwargs.get("output_names", ["output"])
        dynamic_axes = kwargs.get("dynamic_axes", None)

        assert hasattr(torch, "onnx"), (
            f"This version of pytorch {torch.__version__} does not appear to support onnx "
            "conversion."
        )

        if onnx_args is None:
            if X_sample is not None:
                logger.warning(
                    "Since `onnx_args` is not provided, `onnx_args` is "
                    "detected from `X_sample` to export pytorch model as onnx."
                )
                onnx_args = X_sample
            else:
                raise ValueError(
                    "`onnx_args` can not be detected. The parameter `onnx_args` must be provided to export pytorch model as onnx."
                )

        if not model_path:
            raise ValueError(
                "The parameter `model_path` must be provided to save the model file."
            )

        torch.onnx.export(
            estimator,
            args=onnx_args,
            f=model_path,
            input_names=input_names,
            output_names=output_names,
            dynamic_axes=dynamic_axes,
        )





[docs]
class TensorFlowOnnxModelSerializer(OnnxModelSerializer):
    """Converts Tensorflow model into onnx format."""

    def __init__(self):
        super().__init__()


[docs]
    @runtime_dependency(module="tf2onnx", install_from=OptionalDependency.ONNX)
    @runtime_dependency(
        module="tensorflow",
        short_name="tf",
        install_from=OptionalDependency.TENSORFLOW,
    )
    def serialize(
        self,
        estimator,
        model_path: str = None,
        X_sample: Optional[
            Union[
                Dict,
                str,
                List,
                Tuple,
                np.ndarray,
                pd.core.series.Series,
                pd.core.frame.DataFrame,
            ]
        ] = None,
        **kwargs,
    ):
        """
        Exports the given Tensorflow model into ONNX format.

        Parameters
        ----------
        model_path: str, default to None
            Path to save the serialized model.
        X_sample: Union[list, tuple, pd.Series, np.ndarray, pd.DataFrame]. Defaults to None.
            A sample of input data that will be used to generate input schema and detect input_signature.


        Returns
        -------
        None
            Nothing

        Raises
        ------
        ValueError
            if model_path is not provided
        """
        opset_version = kwargs.get("opset_version", None)
        input_signature = kwargs.get("input_signature", None)

        if not model_path:
            raise ValueError(
                "The parameter `model_path` must be provided to save the model file."
            )
        if input_signature is None:
            if hasattr(estimator, "input_shape"):
                if not isinstance(estimator.input, list):
                    # single input
                    detected_input_signature = (
                        tf.TensorSpec(
                            estimator.input_shape,
                            dtype=estimator.input.dtype,
                            name="input",
                        ),
                    )
                else:
                    # multiple input
                    detected_input_signature = []
                    for i in range(len(estimator.input)):
                        detected_input_signature.append(
                            tf.TensorSpec(
                                estimator.input_shape[i],
                                dtype=estimator.input[i].dtype,
                            )
                        )

            elif X_sample is not None and hasattr(X_sample, "shape"):
                logger.warning(
                    "Since `input_signature` is not provided, `input_signature` is "
                    "detected from `X_sample` to export tensorflow model as "
                    "onnx."
                )
                X_sample_shape = list(X_sample.shape)
                X_sample_shape[0] = None
                detected_input_signature = (
                    tf.TensorSpec(X_sample_shape, dtype=X_sample.dtype, name="input"),
                )
            else:
                raise ValueError(
                    "The parameter `input_signature` must be provided to export "
                    "tensorflow model as onnx."
                )
            try:
                tf2onnx.convert.from_keras(
                    estimator,
                    input_signature=detected_input_signature,
                    opset=opset_version,
                    output_path=model_path,
                )
            except:
                raise ValueError(
                    "`input_signature` can not be autodetected. The parameter `input_signature` must be provided to export "
                    "tensorflow model as onnx."
                )

        else:
            tf2onnx.convert.from_keras(
                estimator,
                input_signature=input_signature,
                opset=opset_version,
                output_path=model_path,
            )





[docs]
class OnnxModelSaveSERDE(OnnxModelSerializer, ModelDeserializer):
    name = MODEL_SERIALIZATION_TYPE_ONNX




[docs]
class CloudpickleModelSaveSERDE(CloudPickleModelSerializer, ModelDeserializer):
    name = MODEL_SERIALIZATION_TYPE_CLOUDPICKLE




[docs]
class JoblibModelSaveSERDE(JobLibModelSerializer, ModelDeserializer):
    name = MODEL_SERIALIZATION_TYPE_JOBLIB




[docs]
class SparkModelSaveSERDE(SparkModelSerializer, ModelDeserializer):
    name = MODEL_SERIALIZATION_TYPE_SPARK




[docs]
class HuggingFacePipelineSaveSERDE(HuggingFaceModelSerializer, ModelDeserializer):
    name = MODEL_SERIALIZATION_TYPE_HUGGINGFACE




[docs]
class TorchScriptModelSaveSERDE(TorchScriptModelSerializer, ModelDeserializer):
    name = MODEL_SERIALIZATION_TYPE_TORHCSCRIPT




[docs]
class PyTorchModelSaveSERDE(PyTorchModelSerializer, ModelDeserializer):
    name = MODEL_SERIALIZATION_TYPE_TORCH




[docs]
class PyTorchOnnxModelSaveSERDE(PytorchOnnxModelSerializer, ModelDeserializer):
    name = MODEL_SERIALIZATION_TYPE_TORCH_ONNX




[docs]
class TensorFlowModelSaveSERDE(TensorFlowModelSerializer, ModelDeserializer):
    name = MODEL_SERIALIZATION_TYPE_TF




[docs]
class TensorFlowOnnxModelSaveSERDE(TensorFlowOnnxModelSerializer, ModelDeserializer):
    name = MODEL_SERIALIZATION_TYPE_TF_ONNX




[docs]
class SklearnOnnxModelSaveSERDE(SklearnOnnxModelSerializer, ModelDeserializer):
    name = MODEL_SERIALIZATION_TYPE_SKLEARN_ONNX




[docs]
class LightGBMModelSaveSERDE(LightGBMModelSerializer, ModelDeserializer):
    name = MODEL_SERIALIZATION_TYPE_LIGHTGBM




[docs]
class LightGBMOnnxModelSaveSERDE(LightGBMOnnxModelSerializer, ModelDeserializer):
    name = MODEL_SERIALIZATION_TYPE_LIGHTGBM_ONNX




[docs]
class XgboostJsonModelSaveSERDE(XgboostJsonModelSerializer, ModelDeserializer):
    name = MODEL_SERIALIZATION_TYPE_XGBOOST




[docs]
class XgboostUbjModelSaveSERDE(XgboostUbjModelSerializer, ModelDeserializer):
    name = MODEL_SERIALIZATION_TYPE_XGBOOST_UBJ




[docs]
class XgboostTxtModelSaveSERDE(XgboostTxtModelSerializer, ModelDeserializer):
    name = MODEL_SERIALIZATION_TYPE_XGBOOST_TXT




[docs]
class XgboostOnnxModelSaveSERDE(XgboostOnnxModelSerializer, ModelDeserializer):
    name = MODEL_SERIALIZATION_TYPE_XGBOOST_ONNX




[docs]
class ModelSerializerFactory:
    """Model Serializer Factory.

    Returns
    -------
    model_save_serde: Intance of `ads.model.SERDE`".
    """

    _factory = {}
    _factory[MODEL_SERIALIZATION_TYPE_CLOUDPICKLE] = CloudpickleModelSaveSERDE
    _factory[MODEL_SERIALIZATION_TYPE_ONNX] = OnnxModelSaveSERDE
    _factory[MODEL_SERIALIZATION_TYPE_TORHCSCRIPT] = TorchScriptModelSaveSERDE
    _factory[MODEL_SERIALIZATION_TYPE_TORCH] = PyTorchModelSaveSERDE
    _factory[MODEL_SERIALIZATION_TYPE_TORCH_ONNX] = PyTorchOnnxModelSaveSERDE
    _factory[MODEL_SERIALIZATION_TYPE_TF] = TensorFlowModelSaveSERDE
    _factory[MODEL_SERIALIZATION_TYPE_TF_ONNX] = TensorFlowOnnxModelSaveSERDE
    _factory[MODEL_SERIALIZATION_TYPE_JOBLIB] = JoblibModelSaveSERDE
    _factory[MODEL_SERIALIZATION_TYPE_SKLEARN_ONNX] = SklearnOnnxModelSaveSERDE
    _factory[MODEL_SERIALIZATION_TYPE_LIGHTGBM] = LightGBMModelSaveSERDE
    _factory[MODEL_SERIALIZATION_TYPE_LIGHTGBM_ONNX] = LightGBMOnnxModelSaveSERDE
    _factory[MODEL_SERIALIZATION_TYPE_XGBOOST] = XgboostJsonModelSaveSERDE
    _factory[MODEL_SERIALIZATION_TYPE_XGBOOST_UBJ] = XgboostUbjModelSaveSERDE
    _factory[MODEL_SERIALIZATION_TYPE_XGBOOST_TXT] = XgboostTxtModelSaveSERDE
    _factory[MODEL_SERIALIZATION_TYPE_XGBOOST_ONNX] = XgboostOnnxModelSaveSERDE
    _factory[MODEL_SERIALIZATION_TYPE_SPARK] = SparkModelSaveSERDE
    _factory[MODEL_SERIALIZATION_TYPE_HUGGINGFACE] = HuggingFacePipelineSaveSERDE


[docs]
    @classmethod
    def get(cls, se: str):
        serde = cls._factory.get(se, None)
        if serde:
            return serde()
        else:
            raise ValueError(
                f"This {se} format is not supported."
                f"Currently support the following format: {SUPPORTED_MODEL_SERIALIZERS}."
            )