Source code for ads.model.serde.model_serializer

#!/usr/bin/env python
# -*- coding: utf-8; -*-

# Copyright (c) 2023 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
import cloudpickle
import numpy as np
import pandas as pd
from ads.model.serde.common import Serializer, Deserializer
from ads.common.decorator.runtime_dependency import (
    runtime_dependency,
    OptionalDependency,
)
from ads.common import logger
from pandas.api.types import is_numeric_dtype, is_string_dtype
from typing import Any, Dict, List, Optional, Tuple, Union
from joblib import dump


MODEL_SERIALIZATION_TYPE_ONNX = "onnx"
MODEL_SERIALIZATION_TYPE_CLOUDPICKLE = "cloudpickle"
MODEL_SERIALIZATION_TYPE_TORHCSCRIPT = "torchscript"
MODEL_SERIALIZATION_TYPE_TORCH = "torch"
MODEL_SERIALIZATION_TYPE_TORCH_ONNX = "torch_onnx"
MODEL_SERIALIZATION_TYPE_TF = "tf"
MODEL_SERIALIZATION_TYPE_TF_ONNX = "tf_onnx"
MODEL_SERIALIZATION_TYPE_JOBLIB = "joblib"
MODEL_SERIALIZATION_TYPE_SKLEARN_ONNX = "sklearn_onnx"
MODEL_SERIALIZATION_TYPE_LIGHTGBM = "lightgbm"
MODEL_SERIALIZATION_TYPE_LIGHTGBM_ONNX = "lightgbm_onnx"
MODEL_SERIALIZATION_TYPE_XGBOOST = "xgboost"
MODEL_SERIALIZATION_TYPE_XGBOOST_UBJ = "xgboost_ubj"
MODEL_SERIALIZATION_TYPE_XGBOOST_TXT = "xgboost_txt"
MODEL_SERIALIZATION_TYPE_XGBOOST_ONNX = "xgboost_onnx"
MODEL_SERIALIZATION_TYPE_SPARK = "spark"
MODEL_SERIALIZATION_TYPE_HUGGINGFACE = "huggingface"


SUPPORTED_MODEL_SERIALIZERS = [
    MODEL_SERIALIZATION_TYPE_ONNX,
    MODEL_SERIALIZATION_TYPE_CLOUDPICKLE,
    MODEL_SERIALIZATION_TYPE_TORHCSCRIPT,
    MODEL_SERIALIZATION_TYPE_TORCH,
    MODEL_SERIALIZATION_TYPE_TORCH_ONNX,
    MODEL_SERIALIZATION_TYPE_TF,
    MODEL_SERIALIZATION_TYPE_TF_ONNX,
    MODEL_SERIALIZATION_TYPE_JOBLIB,
    MODEL_SERIALIZATION_TYPE_SKLEARN_ONNX,
    MODEL_SERIALIZATION_TYPE_LIGHTGBM,
    MODEL_SERIALIZATION_TYPE_LIGHTGBM_ONNX,
    MODEL_SERIALIZATION_TYPE_XGBOOST,
    MODEL_SERIALIZATION_TYPE_XGBOOST_ONNX,
    MODEL_SERIALIZATION_TYPE_SPARK,
    MODEL_SERIALIZATION_TYPE_HUGGINGFACE,
]


[docs] class ModelSerializerType: CLOUDPICKLE = MODEL_SERIALIZATION_TYPE_CLOUDPICKLE ONNX = MODEL_SERIALIZATION_TYPE_ONNX
[docs] class PyTorchModelSerializerType: TORCH = MODEL_SERIALIZATION_TYPE_TORCH TORCHSCRIPT = MODEL_SERIALIZATION_TYPE_TORHCSCRIPT ONNX = MODEL_SERIALIZATION_TYPE_TORCH_ONNX
[docs] class TensorflowModelSerializerType: TENSORFLOW = MODEL_SERIALIZATION_TYPE_TF ONNX = MODEL_SERIALIZATION_TYPE_TF_ONNX
[docs] class LightGBMModelSerializerType: LIGHTGBM = MODEL_SERIALIZATION_TYPE_LIGHTGBM ONNX = MODEL_SERIALIZATION_TYPE_LIGHTGBM_ONNX
[docs] class SklearnModelSerializerType: JOBLIB = MODEL_SERIALIZATION_TYPE_JOBLIB CLOUDPICKLE = MODEL_SERIALIZATION_TYPE_CLOUDPICKLE ONNX = MODEL_SERIALIZATION_TYPE_SKLEARN_ONNX
[docs] class XgboostModelSerializerType: XGBOOST = MODEL_SERIALIZATION_TYPE_XGBOOST ONNX = MODEL_SERIALIZATION_TYPE_XGBOOST_ONNX
[docs] class SparkModelSerializerType: SPARK = MODEL_SERIALIZATION_TYPE_SPARK
[docs] class HuggingFaceSerializerType: HUGGINGFACE = MODEL_SERIALIZATION_TYPE_HUGGINGFACE
[docs] class ModelSerializer(Serializer): """Base class for creation of new model serializers.""" def __init__(self, model_file_suffix): super().__init__() self.model_file_suffix = model_file_suffix
[docs] class ModelDeserializer(Deserializer): """Base class for creation of new model deserializers."""
[docs] def deserialize(self, **kwargs): raise NotImplementedError
[docs] class CloudPickleModelSerializer(ModelSerializer): """Uses `Cloudpickle` to save model.""" def __init__(self, model_file_suffix="pkl"): super().__init__(model_file_suffix=model_file_suffix)
[docs] def serialize(self, estimator, model_path, **kwargs): """Uses `cloudpickle.dump` to save model. See https://docs.python.org/3/library/pickle.html#pickle.dump for more details. Args: estimator: The model to be saved. model_path: The file object or path of the model in which it is to be stored. kwargs: model_save: (dict, optional). The dictionary where contains the availiable options to be passed to `cloudpickle.dump`. """ cloudpickle_kwargs = kwargs.pop("model_save", {}) with open(model_path, "wb") as f: cloudpickle.dump(estimator, f, **cloudpickle_kwargs)
[docs] class JobLibModelSerializer(ModelSerializer): """Uses `Joblib` to save model.""" def __init__(self, model_file_suffix="joblib"): super().__init__(model_file_suffix=model_file_suffix)
[docs] def serialize(self, estimator, model_path, **kwargs): """Uses `joblib.dump` to save model. See https://joblib.readthedocs.io/en/latest/generated/joblib.dump.html for more details. Args: estimator: The model to be saved. model_path: The file object or path of the model in which it is to be stored. kwargs: model_save: (dict, optional). The dictionary where contains the availiable options to be passed to `joblib.dump`. """ joblib_kwargs = kwargs.pop("model_save", {}) dump(estimator, model_path, **joblib_kwargs)
[docs] class SparkModelSerializer(ModelSerializer): """Save Spark Model.""" def __init__(self, model_file_suffix=""): super().__init__(model_file_suffix=model_file_suffix)
[docs] def serialize(self, estimator, model_path, **kwargs): estimator.write().overwrite().save(model_path)
[docs] class PyTorchModelSerializer(ModelSerializer): """Save PyTorch Model using torch.save(). See https://pytorch.org/docs/stable/generated/torch.save.html for more details.""" def __init__(self, model_file_suffix="pt"): super().__init__(model_file_suffix=model_file_suffix)
[docs] @runtime_dependency(module="torch", install_from=OptionalDependency.PYTORCH) def serialize(self, estimator, model_path, **kwarg): torch.save(estimator.state_dict(), model_path)
[docs] class TorchScriptModelSerializer(ModelSerializer): """Save PyTorch Model using torchscript. See https://pytorch.org/tutorials/beginner/saving_loading_models.html#export-load-model-in-torchscript-format for more details.""" def __init__(self, model_file_suffix="pt"): super().__init__(model_file_suffix=model_file_suffix)
[docs] @runtime_dependency(module="torch", install_from=OptionalDependency.PYTORCH) def serialize(self, estimator, model_path, **kwargs): compiled_model = torch.jit.script(estimator) torch.jit.save(compiled_model, model_path)
[docs] class LightGBMModelSerializer(ModelSerializer): """Save LightGBM Model through save_model into txt.""" def __init__(self, model_file_suffix="txt"): super().__init__(model_file_suffix=model_file_suffix)
[docs] def serialize(self, estimator, model_path, **kwargs): estimator.save_model(model_path)
[docs] class XgboostJsonModelSerializer(ModelSerializer): """Save Xgboost Model through xgboost.save_model into JSON.""" def __init__(self, model_file_suffix="json"): super().__init__(model_file_suffix=model_file_suffix)
[docs] def serialize(self, estimator, model_path, **kwargs): """Save Xgboost Model through xgboost.save_model .See https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.Booster.save_model for more details. Args: estimator: The model to be saved. model_path: The file object or path of the model in which it is to be stored. """ estimator.save_model(model_path)
[docs] class XgboostTxtModelSerializer(ModelSerializer): """Save Xgboost Model through xgboost.save_model into txt.""" def __init__(self, model_file_suffix="txt"): super().__init__(model_file_suffix=model_file_suffix)
[docs] def serialize(self, estimator, model_path, **kwargs): """Save Xgboost Model through xgboost.save_model .See https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.Booster.save_model for more details. Args: estimator: The model to be saved. model_path: The file object or path of the model in which it is to be stored. """ estimator.save_model(model_path)
[docs] class XgboostUbjModelSerializer(ModelSerializer): """Save Xgboost Model through xgboost.save_model into binary JSON.""" def __init__(self, model_file_suffix="ubj"): super().__init__(model_file_suffix=model_file_suffix)
[docs] def serialize(self, estimator, model_path, **kwargs): """Save Xgboost Model through xgboost.save_model .See https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.Booster.save_model for more details. Args: estimator: The model to be saved. model_path: The file object or path of the model in which it is to be stored. """ estimator.save_model(model_path)
[docs] class TensorFlowModelSerializer(ModelSerializer): """Save Tensorflow Model.""" def __init__(self, model_file_suffix="h5"): super().__init__(model_file_suffix=model_file_suffix)
[docs] def serialize(self, estimator, model_path, **kwargs): estimator.save(model_path)
[docs] class HuggingFaceModelSerializer(ModelSerializer): """Save HuggingFace Pipeline.""" def __init__(self, model_file_suffix=""): super().__init__(model_file_suffix=model_file_suffix)
[docs] def serialize(self, estimator, model_path, **kwargs): estimator.save_pretrained(save_directory=model_path) estimator.model.config.use_pretrained_backbone = False estimator.model.config.save_pretrained(save_directory=model_path)
[docs] class OnnxModelSerializer(ModelSerializer): """Base class for creation of onnx converter for each model framework.""" def __init__(self, model_file_suffix="onnx"): super().__init__(model_file_suffix=model_file_suffix)
[docs] def serialize( self, estimator, model_path, initial_types: List[Tuple] = None, X_sample: Optional[ Union[ Dict, str, List, Tuple, np.ndarray, pd.core.series.Series, pd.core.frame.DataFrame, ] ] = None, **kwargs, ): """Save model into onnx format. Args: estimator: The model to be saved. model_path: The file object or path of the model in which it is to be stored. initial_types: (List[Tuple], optional) a python list. Each element is a tuple of a variable name and a data type. X_sample: (any, optional). Defaults to None. Contains model inputs such that model(X_sample) is a valid invocation of the model, used to valid model input type. """ self.estimator = estimator onx = self._to_onnx( initial_types=initial_types, X_sample=X_sample, **kwargs, ) with open(model_path, "wb") as f: f.write(onx.SerializeToString())
def _to_onnx( self, initial_types: List[Tuple] = None, X_sample: Optional[ Union[ Dict, str, List, Tuple, np.ndarray, pd.core.series.Series, pd.core.frame.DataFrame, ] ] = None, **kwargs, ): raise NotImplementedError
[docs] class SklearnOnnxModelSerializer(OnnxModelSerializer): """Converts Skearn Model into Onnx.""" def __init__(self): super().__init__() @runtime_dependency(module="onnx", install_from=OptionalDependency.ONNX) @runtime_dependency(module="xgboost", install_from=OptionalDependency.BOOSTED) @runtime_dependency(module="lightgbm", install_from=OptionalDependency.BOOSTED) @runtime_dependency(module="skl2onnx", install_from=OptionalDependency.ONNX) @runtime_dependency(module="onnxmltools", install_from=OptionalDependency.ONNX) @runtime_dependency( module="onnxmltools.convert.xgboost.operator_converters.XGBoost", object="convert_xgboost", install_from=OptionalDependency.ONNX, ) @runtime_dependency( module="onnxmltools.convert.lightgbm.operator_converters.LightGbm", object="convert_lightgbm", install_from=OptionalDependency.ONNX, ) def _to_onnx( self, initial_types: List[Tuple] = None, X_sample: Optional[ Union[ Dict, str, List, Tuple, np.ndarray, pd.core.series.Series, pd.core.frame.DataFrame, ] ] = None, **kwargs, ): """ Produces an equivalent ONNX model of the given scikit-learn model. Parameters ---------- initial_types: (List[Tuple], optional). Defaults to None. Each element is a tuple of a variable name and a type. X_sample: Union[Dict, str, List, np.ndarray, pd.core.series.Series, pd.core.frame.DataFrame,]. Defaults to None. Contains model inputs such that model(X_sample) is a valid invocation of the model. Used to generate initial_types. Returns ------- onnx.onnx_ml_pb2.ModelProto An ONNX model (type: ModelProto) which is equivalent to the input scikit-learn model. """ auto_generated_initial_types = None if not initial_types: if X_sample is None: raise ValueError( " At least one of `X_sample` or `initial_types` must be provided." ) auto_generated_initial_types = self._generate_initial_types(X_sample) if str(type(self.estimator)).startswith("<class 'sklearn.pipeline"): model_types = [] model_types = [type(val[1]) for val in self.estimator.steps] if xgboost.sklearn.XGBClassifier in model_types: skl2onnx.update_registered_converter( xgboost.XGBClassifier, "XGBoostXGBClassifier", skl2onnx.common.shape_calculator.calculate_linear_classifier_output_shapes, convert_xgboost, options=kwargs.pop( "options", {"nocl": [True, False], "zipmap": [True, False]} ), ) if xgboost.sklearn.XGBRegressor in model_types: skl2onnx.update_registered_converter( xgboost.XGBRegressor, "XGBoostXGBRegressor", skl2onnx.common.shape_calculator.calculate_linear_regressor_output_shapes, convert_xgboost, ) if lightgbm.sklearn.LGBMClassifier in model_types: skl2onnx.update_registered_converter( lightgbm.LGBMClassifier, "LightGbmLGBMClassifier", skl2onnx.common.shape_calculator.calculate_linear_classifier_output_shapes, convert_lightgbm, options=kwargs.pop( "options", {"nocl": [True, False], "zipmap": [True, False, "columns"]}, ), ) if lightgbm.sklearn.LGBMRegressor in model_types: def skl2onnx_convert_lightgbm(scope, operator, container): options = scope.get_options(operator.raw_operator) if "split" in options: if StrictVersion(onnxmltools.__version__) < StrictVersion( "1.9.2" ): logger.warnings( "Option split was released in version 1.9.2 but %s is " "installed. It will be ignored." % onnxmltools.__version__ ) operator.split = options["split"] else: operator.split = None convert_lightgbm(scope, operator, container) skl2onnx.update_registered_converter( lightgbm.LGBMRegressor, "LightGbmLGBMRegressor", skl2onnx.common.shape_calculator.calculate_linear_regressor_output_shapes, skl2onnx_convert_lightgbm, options=kwargs.pop("options", {"split": None}), ) if initial_types: return skl2onnx.convert_sklearn( self.estimator, initial_types=initial_types, **kwargs ) else: try: return skl2onnx.convert_sklearn( self.estimator, initial_types=auto_generated_initial_types, target_opset=None, **kwargs, ) except Exception as e: raise ValueError( "`initial_types` can not be autodetected. Please directly pass `initial_types`." ) else: if initial_types: return onnxmltools.convert_sklearn( self.estimator, initial_types=initial_types, targeted_onnx=onnx.__version__, **kwargs, ) else: try: return onnxmltools.convert_sklearn( self.estimator, initial_types=auto_generated_initial_types, targeted_onnx=onnx.__version__, **kwargs, ) except Exception as e: raise ValueError( "`initial_types` can not be detected. Please directly pass initial_types." ) @runtime_dependency(module="skl2onnx", install_from=OptionalDependency.ONNX) def _generate_initial_types(self, X_sample: Any) -> List: """Auto generate intial types. Parameters ---------- X_sample: (Any) Train data. Returns ------- List Initial types. """ if self._is_all_numerical_array_dataframe(X_sample): # if it's a dataframe and all the columns are numerical. Or # it's not a dataframe, also try this. if hasattr(X_sample, "shape") and len(X_sample.shape) >= 2: auto_generated_initial_types = [ ( "input", skl2onnx.common.data_types.FloatTensorType( [None, X_sample.shape[1]] ), ) ] elif hasattr(self.estimator, "n_features_in_"): n_cols = self.estimator.n_features_in_ auto_generated_initial_types = [ ( "input", skl2onnx.common.data_types.FloatTensorType([None, n_cols]), ) ] else: raise ValueError( "`initial_types` can not be detected. Please directly pass initial_types." ) elif self.is_either_numerical_or_string_dataframe(X_sample): # for dataframe and not all the columns are numerical, then generate # the input types of all the columns one by one. auto_generated_initial_types = [] for i, col in X_sample.items(): if is_numeric_dtype(col.dtypes): auto_generated_initial_types.append( ( col.name, skl2onnx.common.data_types.FloatTensorType([None, 1]), ) ) else: auto_generated_initial_types.append( ( col.name, skl2onnx.common.data_types.StringTensorType([None, 1]), ) ) else: try: auto_generated_initial_types = ( skl2onnx.common.data_types.guess_data_type( np.array(X_sample) if isinstance(X_sample, list) else X_sample ) ) except: auto_generated_initial_types = None return auto_generated_initial_types @staticmethod def _is_all_numerical_array_dataframe( data: Union[pd.DataFrame, np.ndarray] ) -> bool: """Check whether all the columns are numerical for numpy array and dataframe. For data with any other data types, it will return False. Parameters ---------- data: Union[pd.DataFrame, np.ndarray] Returns ------- bool Whether all the columns in a pandas dataframe or numpy array are all numerical. """ return ( isinstance(data, pd.DataFrame) and all([is_numeric_dtype(dtype) for dtype in data.dtypes]) or (isinstance(data, np.ndarray) and is_numeric_dtype(data.dtype)) )
[docs] @staticmethod def is_either_numerical_or_string_dataframe(data: pd.DataFrame) -> bool: """Check whether all the columns are either numerical or string for dataframe.""" return isinstance(data, pd.DataFrame) and all( [ is_numeric_dtype(col.dtypes) or is_string_dtype(col.dtypes) for _, col in data.items() ] )
[docs] class LightGBMOnnxModelSerializer(OnnxModelSerializer): """Converts LightGBM model into onnx format.""" def __init__(self): super().__init__() @runtime_dependency( module="skl2onnx.common.data_types", object="FloatTensorType", install_from=OptionalDependency.ONNX, ) @runtime_dependency( module="onnxmltools.convert", object="convert_lightgbm", install_from=OptionalDependency.ONNX, ) def _to_onnx( self, initial_types: List[Tuple] = None, X_sample: Optional[ Union[ Dict, str, List, Tuple, np.ndarray, pd.core.series.Series, pd.core.frame.DataFrame, ] ] = None, **kwargs, ): """ Produces an equivalent ONNX model of the given LightGBM model. Parameters ---------- initial_types: (List[Tuple], optional). Defaults to None. Each element is a tuple of a variable name and a type. X_sample: Union[Dict, str, List, np.ndarray, pd.core.series.Series, pd.core.frame.DataFrame,]. Defaults to None. Contains model inputs such that model(X_sample) is a valid invocation of the model. Used to generate initial_types. Returns ------ An ONNX model (type: ModelProto) which is equivalent to the input LightGBM model. """ auto_generated_initial_types = None if not initial_types: auto_generated_initial_types = self._generate_initial_types(X_sample) try: return convert_lightgbm( self.estimator, initial_types=auto_generated_initial_types, target_opset=kwargs.pop("target_opset", None), **kwargs, ) except: raise ValueError( "`initial_types` can not be detected. Please directly pass initial_types." ) else: return convert_lightgbm( self.estimator, initial_types=initial_types, target_opset=kwargs.pop("target_opset", None), **kwargs, ) @runtime_dependency( module="skl2onnx.common.data_types", object="FloatTensorType", install_from=OptionalDependency.ONNX, ) def _generate_initial_types(self, X_sample: Any) -> List: """Auto generate intial types. Parameters ---------- X_sample: (Any) Train data. Returns ------- List Initial types. """ if X_sample is not None and hasattr(X_sample, "shape"): auto_generated_initial_types = [ ("input", FloatTensorType([None, X_sample.shape[1]])) ] elif hasattr(self.estimator, "num_feature"): n_cols = self.estimator.num_feature() auto_generated_initial_types = [("input", FloatTensorType([None, n_cols]))] elif hasattr(self.estimator, "n_features_in_"): n_cols = self.estimator.n_features_in_ auto_generated_initial_types = [("input", FloatTensorType([None, n_cols]))] else: raise ValueError( "`initial_types` can not be detected. Please directly pass initial_types." ) return auto_generated_initial_types
[docs] class XgboostOnnxModelSerializer(OnnxModelSerializer): """Converts Xgboost model into onnx format.""" def __init__(self): super().__init__() @runtime_dependency(module="onnx", install_from=OptionalDependency.ONNX) @runtime_dependency(module="xgboost", install_from=OptionalDependency.BOOSTED) @runtime_dependency( module="skl2onnx", object="convert_sklearn", install_from=OptionalDependency.ONNX, ) @runtime_dependency( module="skl2onnx", object="update_registered_converter", install_from=OptionalDependency.ONNX, ) @runtime_dependency( module="skl2onnx.common.data_types", object="FloatTensorType", install_from=OptionalDependency.ONNX, ) @runtime_dependency( module="skl2onnx.common.shape_calculator", object="calculate_linear_classifier_output_shapes", install_from=OptionalDependency.ONNX, ) @runtime_dependency( module="skl2onnx.common.shape_calculator", object="calculate_linear_regressor_output_shapes", install_from=OptionalDependency.ONNX, ) @runtime_dependency(module="onnxmltools", install_from=OptionalDependency.ONNX) @runtime_dependency( module="onnxmltools.convert.xgboost.operator_converters.XGBoost", object="convert_xgboost", install_from=OptionalDependency.ONNX, ) def _to_onnx( self, initial_types: List[Tuple] = None, X_sample: Union[list, tuple, pd.DataFrame, pd.Series, np.ndarray] = None, **kwargs, ): """ Produces an equivalent ONNX model of the given Xgboost model. Parameters ---------- initial_types: (List[Tuple], optional). Defaults to None. Each element is a tuple of a variable name and a type. X_sample: Union[Dict, str, List, np.ndarray, pd.core.series.Series, pd.core.frame.DataFrame,]. Defaults to None. Contains model inputs such that model(X_sample) is a valid invocation of the model. Used to generate initial_types. Returns ------- onnx.onnx_ml_pb2.ModelProto An ONNX model (type: ModelProto) which is equivalent to the input xgboost model. """ auto_generated_initial_types = None if not initial_types: auto_generated_initial_types = self._generate_initial_types(X_sample) model_types = [] if str(type(self.estimator)).startswith("<class 'xgboost.sklearn."): model_types.append(type(self.estimator)) if model_types: if xgboost.sklearn.XGBClassifier in model_types: update_registered_converter( xgboost.XGBClassifier, "XGBoostXGBClassifier", calculate_linear_classifier_output_shapes, convert_xgboost, options={"nocl": [True, False], "zipmap": [True, False]}, ) elif xgboost.sklearn.XGBRegressor in model_types: update_registered_converter( xgboost.XGBRegressor, "XGBoostXGBRegressor", calculate_linear_regressor_output_shapes, convert_xgboost, ) if initial_types: return convert_sklearn( self.estimator, initial_types=initial_types, **kwargs ) else: try: return convert_sklearn( self.estimator, initial_types=auto_generated_initial_types, **kwargs, ) except: raise ValueError( "`initial_types` can not be autodetected. Please directly pass `initial_types`." ) else: # xgboost api if initial_types: return onnxmltools.convert_xgboost( self.estimator, initial_types=initial_types, target_opset=kwargs.pop("target_opset", None), targeted_onnx=onnx.__version__, **kwargs, ) else: try: return onnxmltools.convert_xgboost( self.estimator, initial_types=auto_generated_initial_types, target_opset=kwargs.pop("target_opset", None), targeted_onnx=onnx.__version__, **kwargs, ) except: raise ValueError( "`initial_types` can not be autodetected. Please directly pass `initial_types`." ) @runtime_dependency( module="skl2onnx.common.data_types", object="FloatTensorType", install_from=OptionalDependency.ONNX, ) def _generate_initial_types(self, X_sample: Any) -> List: """Auto generate intial types. Parameters ---------- X_sample: (Any) Train data. Returns ------- List Initial types. """ if hasattr(self.estimator, "n_features_in_"): # sklearn api n_cols = self.estimator.n_features_in_ return [("input", FloatTensorType([None, n_cols]))] elif hasattr(self.estimator, "feature_names") and self.estimator.feature_names: # xgboost learning api n_cols = len(self.estimator.feature_names) return [("input", FloatTensorType([None, n_cols]))] if X_sample is None: raise ValueError( " At least one of `X_sample` or `initial_types` must be provided." ) if ( X_sample is not None and hasattr(X_sample, "shape") and len(X_sample.shape) >= 2 ): auto_generated_initial_types = [ ("input", FloatTensorType([None, X_sample.shape[1]])) ] else: raise ValueError( "`initial_types` can not be detected. Please directly pass initial_types." ) return auto_generated_initial_types
[docs] class PytorchOnnxModelSerializer(OnnxModelSerializer): """Converts Pytorch model into onnx format.""" def __init__(self): super().__init__()
[docs] @runtime_dependency(module="torch", install_from=OptionalDependency.PYTORCH) def serialize( self, estimator, model_path: str, X_sample: Optional[ Union[ Dict, str, List, Tuple, np.ndarray, pd.core.series.Series, pd.core.frame.DataFrame, ] ] = None, **kwargs, ): """ Exports the given Pytorch model into ONNX format. Parameters ---------- path: str, default to None Path to save the serialized model. onnx_args: (tuple or torch.Tensor), default to None Contains model inputs such that model(onnx_args) is a valid invocation of the model. Can be structured either as: 1) ONLY A TUPLE OF ARGUMENTS; 2) A TENSOR; 3) A TUPLE OF ARGUMENTS ENDING WITH A DICTIONARY OF NAMED ARGUMENTS X_sample: Union[list, tuple, pd.Series, np.ndarray, pd.DataFrame]. Defaults to None. A sample of input data that will be used to generate input schema and detect onnx_args. kwargs: input_names: (List[str], optional). Defaults to ["input"]. Names to assign to the input nodes of the graph, in order. output_names: (List[str], optional). Defaults to ["output"]. Names to assign to the output nodes of the graph, in order. dynamic_axes: (dict, optional). Defaults to None. Specify axes of tensors as dynamic (i.e. known only at run-time). Returns ------- None Nothing Raises ------ AssertionError if onnx module is not support by the current version of torch ValueError if X_sample is not provided if path is not provided """ onnx_args = kwargs.get("onnx_args", None) input_names = kwargs.get("input_names", ["input"]) output_names = kwargs.get("output_names", ["output"]) dynamic_axes = kwargs.get("dynamic_axes", None) assert hasattr(torch, "onnx"), ( f"This version of pytorch {torch.__version__} does not appear to support onnx " "conversion." ) if onnx_args is None: if X_sample is not None: logger.warning( "Since `onnx_args` is not provided, `onnx_args` is " "detected from `X_sample` to export pytorch model as onnx." ) onnx_args = X_sample else: raise ValueError( "`onnx_args` can not be detected. The parameter `onnx_args` must be provided to export pytorch model as onnx." ) if not model_path: raise ValueError( "The parameter `model_path` must be provided to save the model file." ) torch.onnx.export( estimator, args=onnx_args, f=model_path, input_names=input_names, output_names=output_names, dynamic_axes=dynamic_axes, )
[docs] class TensorFlowOnnxModelSerializer(OnnxModelSerializer): """Converts Tensorflow model into onnx format.""" def __init__(self): super().__init__()
[docs] @runtime_dependency(module="tf2onnx", install_from=OptionalDependency.ONNX) @runtime_dependency( module="tensorflow", short_name="tf", install_from=OptionalDependency.TENSORFLOW, ) def serialize( self, estimator, model_path: str = None, X_sample: Optional[ Union[ Dict, str, List, Tuple, np.ndarray, pd.core.series.Series, pd.core.frame.DataFrame, ] ] = None, **kwargs, ): """ Exports the given Tensorflow model into ONNX format. Parameters ---------- model_path: str, default to None Path to save the serialized model. X_sample: Union[list, tuple, pd.Series, np.ndarray, pd.DataFrame]. Defaults to None. A sample of input data that will be used to generate input schema and detect input_signature. Returns ------- None Nothing Raises ------ ValueError if model_path is not provided """ opset_version = kwargs.get("opset_version", None) input_signature = kwargs.get("input_signature", None) if not model_path: raise ValueError( "The parameter `model_path` must be provided to save the model file." ) if input_signature is None: if hasattr(estimator, "input_shape"): if not isinstance(estimator.input, list): # single input detected_input_signature = ( tf.TensorSpec( estimator.input_shape, dtype=estimator.input.dtype, name="input", ), ) else: # multiple input detected_input_signature = [] for i in range(len(estimator.input)): detected_input_signature.append( tf.TensorSpec( estimator.input_shape[i], dtype=estimator.input[i].dtype, ) ) elif X_sample is not None and hasattr(X_sample, "shape"): logger.warning( "Since `input_signature` is not provided, `input_signature` is " "detected from `X_sample` to export tensorflow model as " "onnx." ) X_sample_shape = list(X_sample.shape) X_sample_shape[0] = None detected_input_signature = ( tf.TensorSpec(X_sample_shape, dtype=X_sample.dtype, name="input"), ) else: raise ValueError( "The parameter `input_signature` must be provided to export " "tensorflow model as onnx." ) try: tf2onnx.convert.from_keras( estimator, input_signature=detected_input_signature, opset=opset_version, output_path=model_path, ) except: raise ValueError( "`input_signature` can not be autodetected. The parameter `input_signature` must be provided to export " "tensorflow model as onnx." ) else: tf2onnx.convert.from_keras( estimator, input_signature=input_signature, opset=opset_version, output_path=model_path, )
[docs] class OnnxModelSaveSERDE(OnnxModelSerializer, ModelDeserializer): name = MODEL_SERIALIZATION_TYPE_ONNX
[docs] class CloudpickleModelSaveSERDE(CloudPickleModelSerializer, ModelDeserializer): name = MODEL_SERIALIZATION_TYPE_CLOUDPICKLE
[docs] class JoblibModelSaveSERDE(JobLibModelSerializer, ModelDeserializer): name = MODEL_SERIALIZATION_TYPE_JOBLIB
[docs] class SparkModelSaveSERDE(SparkModelSerializer, ModelDeserializer): name = MODEL_SERIALIZATION_TYPE_SPARK
[docs] class HuggingFacePipelineSaveSERDE(HuggingFaceModelSerializer, ModelDeserializer): name = MODEL_SERIALIZATION_TYPE_HUGGINGFACE
[docs] class TorchScriptModelSaveSERDE(TorchScriptModelSerializer, ModelDeserializer): name = MODEL_SERIALIZATION_TYPE_TORHCSCRIPT
[docs] class PyTorchModelSaveSERDE(PyTorchModelSerializer, ModelDeserializer): name = MODEL_SERIALIZATION_TYPE_TORCH
[docs] class PyTorchOnnxModelSaveSERDE(PytorchOnnxModelSerializer, ModelDeserializer): name = MODEL_SERIALIZATION_TYPE_TORCH_ONNX
[docs] class TensorFlowModelSaveSERDE(TensorFlowModelSerializer, ModelDeserializer): name = MODEL_SERIALIZATION_TYPE_TF
[docs] class TensorFlowOnnxModelSaveSERDE(TensorFlowOnnxModelSerializer, ModelDeserializer): name = MODEL_SERIALIZATION_TYPE_TF_ONNX
[docs] class SklearnOnnxModelSaveSERDE(SklearnOnnxModelSerializer, ModelDeserializer): name = MODEL_SERIALIZATION_TYPE_SKLEARN_ONNX
[docs] class LightGBMModelSaveSERDE(LightGBMModelSerializer, ModelDeserializer): name = MODEL_SERIALIZATION_TYPE_LIGHTGBM
[docs] class LightGBMOnnxModelSaveSERDE(LightGBMOnnxModelSerializer, ModelDeserializer): name = MODEL_SERIALIZATION_TYPE_LIGHTGBM_ONNX
[docs] class XgboostJsonModelSaveSERDE(XgboostJsonModelSerializer, ModelDeserializer): name = MODEL_SERIALIZATION_TYPE_XGBOOST
[docs] class XgboostUbjModelSaveSERDE(XgboostUbjModelSerializer, ModelDeserializer): name = MODEL_SERIALIZATION_TYPE_XGBOOST_UBJ
[docs] class XgboostTxtModelSaveSERDE(XgboostTxtModelSerializer, ModelDeserializer): name = MODEL_SERIALIZATION_TYPE_XGBOOST_TXT
[docs] class XgboostOnnxModelSaveSERDE(XgboostOnnxModelSerializer, ModelDeserializer): name = MODEL_SERIALIZATION_TYPE_XGBOOST_ONNX
[docs] class ModelSerializerFactory: """Model Serializer Factory. Returns ------- model_save_serde: Intance of `ads.model.SERDE`". """ _factory = {} _factory[MODEL_SERIALIZATION_TYPE_CLOUDPICKLE] = CloudpickleModelSaveSERDE _factory[MODEL_SERIALIZATION_TYPE_ONNX] = OnnxModelSaveSERDE _factory[MODEL_SERIALIZATION_TYPE_TORHCSCRIPT] = TorchScriptModelSaveSERDE _factory[MODEL_SERIALIZATION_TYPE_TORCH] = PyTorchModelSaveSERDE _factory[MODEL_SERIALIZATION_TYPE_TORCH_ONNX] = PyTorchOnnxModelSaveSERDE _factory[MODEL_SERIALIZATION_TYPE_TF] = TensorFlowModelSaveSERDE _factory[MODEL_SERIALIZATION_TYPE_TF_ONNX] = TensorFlowOnnxModelSaveSERDE _factory[MODEL_SERIALIZATION_TYPE_JOBLIB] = JoblibModelSaveSERDE _factory[MODEL_SERIALIZATION_TYPE_SKLEARN_ONNX] = SklearnOnnxModelSaveSERDE _factory[MODEL_SERIALIZATION_TYPE_LIGHTGBM] = LightGBMModelSaveSERDE _factory[MODEL_SERIALIZATION_TYPE_LIGHTGBM_ONNX] = LightGBMOnnxModelSaveSERDE _factory[MODEL_SERIALIZATION_TYPE_XGBOOST] = XgboostJsonModelSaveSERDE _factory[MODEL_SERIALIZATION_TYPE_XGBOOST_UBJ] = XgboostUbjModelSaveSERDE _factory[MODEL_SERIALIZATION_TYPE_XGBOOST_TXT] = XgboostTxtModelSaveSERDE _factory[MODEL_SERIALIZATION_TYPE_XGBOOST_ONNX] = XgboostOnnxModelSaveSERDE _factory[MODEL_SERIALIZATION_TYPE_SPARK] = SparkModelSaveSERDE _factory[MODEL_SERIALIZATION_TYPE_HUGGINGFACE] = HuggingFacePipelineSaveSERDE
[docs] @classmethod def get(cls, se: str): serde = cls._factory.get(se, None) if serde: return serde() else: raise ValueError( f"This {se} format is not supported." f"Currently support the following format: {SUPPORTED_MODEL_SERIALIZERS}." )