Source code for ads.model.framework.lightgbm_model

#!/usr/bin/env python
# -*- coding: utf-8 -*--

# Copyright (c) 2022, 2023 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/


from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import numpy as np
import pandas as pd
from ads.common import logger
from ads.model.extractor.lightgbm_extractor import LightgbmExtractor
from ads.model.generic_model import FrameworkSpecificModel
from ads.model.model_properties import ModelProperties
from ads.model.serde.model_serializer import LightGBMModelSerializerType
from ads.model.common.utils import DEPRECATE_AS_ONNX_WARNING
from ads.model.serde.common import SERDE


[docs] class LightGBMModel(FrameworkSpecificModel): """LightGBMModel class for estimators from Lightgbm framework. Attributes ---------- algorithm: str The algorithm of the model. artifact_dir: str Artifact directory to store the files needed for deployment. auth: Dict Default authentication is set using the `ads.set_auth` API. To override the default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create an authentication signer to instantiate an IdentityClient object. estimator: Callable A trained lightgbm estimator/model using Lightgbm. framework: str "lightgbm", the framework name of the model. hyperparameter: dict The hyperparameters of the estimator. metadata_custom: ModelCustomMetadata The model custom metadata. metadata_provenance: ModelProvenanceMetadata The model provenance metadata. metadata_taxonomy: ModelTaxonomyMetadata The model taxonomy metadata. model_artifact: ModelArtifact This is built by calling prepare. model_deployment: ModelDeployment A ModelDeployment instance. model_file_name: str Name of the serialized model. model_id: str The model ID. properties: ModelProperties ModelProperties object required to save and deploy model. For more details, check https://accelerated-data-science.readthedocs.io/en/latest/ads.model.html#module-ads.model.model_properties. runtime_info: RuntimeInfo A RuntimeInfo instance. schema_input: Schema Schema describes the structure of the input data. schema_output: Schema Schema describes the structure of the output data. serialize: bool Whether to serialize the model to pkl file by default. If False, you need to serialize the model manually, save it under artifact_dir and update the score.py manually. version: str The framework version of the model. Methods ------- delete_deployment(...) Deletes the current model deployment. deploy(..., **kwargs) Deploys a model. from_model_artifact(uri, model_file_name, artifact_dir, ..., **kwargs) Loads model from the specified folder, or zip/tar archive. from_model_catalog(model_id, model_file_name, artifact_dir, ..., **kwargs) Loads model from model catalog. introspect(...) Runs model introspection. predict(data, ...) Returns prediction of input data run against the model deployment endpoint. prepare(..., **kwargs) Prepare and save the score.py, serialized model and runtime.yaml file. reload(...) Reloads the model artifact files: `score.py` and the `runtime.yaml`. save(..., **kwargs) Saves model artifacts to the model catalog. summary_status(...) Gets a summary table of the current status. verify(data, ...) Tests if deployment works in local environment. Examples -------- >>> import lightgbm as lgb >>> import tempfile >>> from sklearn.model_selection import train_test_split >>> from sklearn.datasets import load_iris >>> from ads.model.framework.lightgbm_model import LightGBMModel >>> iris = load_iris() >>> X, y = iris.data, iris.target >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) >>> train = lgb.Dataset(X_train, label=y_train) >>> param = { ... 'objective': 'multiclass', 'num_class': 3, ... } >>> lightgbm_estimator = lgb.train(param, train) >>> lightgbm_model = LightGBMModel(estimator=lightgbm_estimator, ... artifact_dir=tempfile.mkdtemp()) >>> lightgbm_model.prepare(inference_conda_env="generalml_p37_cpu_v1", force_overwrite=True) >>> lightgbm_model.reload() >>> lightgbm_model.verify(X_test) >>> lightgbm_model.save() >>> model_deployment = lightgbm_model.deploy(wait_for_completion=False) >>> lightgbm_model.predict(X_test) """ _PREFIX = "lightgbm" model_save_serializer_type = LightGBMModelSerializerType def __init__( self, estimator: Callable, artifact_dir: Optional[str] = None, properties: Optional[ModelProperties] = None, auth: Dict = None, model_save_serializer: Optional[SERDE] = None, model_input_serializer: Optional[SERDE] = None, **kwargs, ): """ Initiates a LightGBMModel instance. This class wraps the Lightgbm model as estimator. It's primary purpose is to hold the trained model and do serialization. Parameters ---------- estimator: any model object generated by Lightgbm framework artifact_dir: str Directory for generate artifact. properties: (ModelProperties, optional). Defaults to None. ModelProperties object required to save and deploy model. auth :(Dict, optional). Defaults to None. The default authetication is set using `ads.set_auth` API. If you need to override the default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate authentication signer and kwargs required to instantiate IdentityClient object. model_save_serializer: (SERDE or str, optional). Defaults to None. Instance of ads.model.SERDE. Used for serialize/deserialize model. model_input_serializer: (SERDE, optional). Defaults to None. Instance of ads.model.SERDE. Used for serialize/deserialize data. Returns ------- LightGBMModel LightGBMModel instance. Raises ------ TypeError: If the input model is not a Lightgbm model or not supported for serialization. Examples -------- >>> import lightgbm as lgb >>> import tempfile >>> from sklearn.model_selection import train_test_split >>> from sklearn.datasets import load_iris >>> from ads.model.framework.lightgbm_model import LightGBMModel >>> iris = load_iris() >>> X, y = iris.data, iris.target >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) >>> train = lgb.Dataset(X_train, label=y_train) >>> param = { ... 'objective': 'multiclass', 'num_class': 3, ... } >>> lightgbm_estimator = lgb.train(param, train) >>> lightgbm_model = LightGBMModel(estimator=lightgbm_estimator, artifact_dir=tempfile.mkdtemp()) >>> lightgbm_model.prepare(inference_conda_env="generalml_p37_cpu_v1") >>> lightgbm_model.verify(X_test) >>> lightgbm_model.save() >>> model_deployment = lightgbm_model.deploy() >>> lightgbm_model.predict(X_test) >>> lightgbm_model.delete_deployment() """ model_type = str(type(estimator)) if not ( model_type.startswith("<class 'lightgbm.basic.") or model_type.startswith("<class 'lightgbm.sklearn.") or model_type.startswith("<class 'onnxruntime.") ): raise TypeError(f"{model_type} is not supported in LightGBMModel.") default_model_save_serializer = "joblib" if model_type.startswith("<class 'lightgbm.basic."): default_model_save_serializer = "lightgbm" super().__init__( estimator=estimator, artifact_dir=artifact_dir, properties=properties, auth=auth, model_save_serializer=model_save_serializer or default_model_save_serializer, model_input_serializer=model_input_serializer, **kwargs, ) self._extractor = LightgbmExtractor(estimator) self.framework = self._extractor.framework self.algorithm = self._extractor.algorithm self.version = self._extractor.version self.hyperparameter = self._extractor.hyperparameter
[docs] def serialize_model( self, as_onnx: bool = False, initial_types: List[Tuple] = None, force_overwrite: bool = False, X_sample: Optional[ Union[ Dict, str, List, Tuple, np.ndarray, pd.core.series.Series, pd.core.frame.DataFrame, ] ] = None, **kwargs: Dict, ): """ Serialize and save Lightgbm model. Parameters ---------- as_onnx: (boolean, optional). Defaults to False. If set as True, provide `initial_types` or `X_sample` to convert into ONNX. initial_types: (List[Tuple], optional). Defaults to None. Each element is a tuple of a variable name and a type. force_overwrite: (boolean, optional). Defaults to False. If set as True, overwrite serialized model if exists. X_sample: Union[Dict, str, List, np.ndarray, pd.core.series.Series, pd.core.frame.DataFrame,]. Defaults to None. Contains model inputs such that model(`X_sample`) is a valid invocation of the model. Used to generate `initial_types`. Returns ------- None Nothing. """ if as_onnx: logger.warning(DEPRECATE_AS_ONNX_WARNING) self.set_model_save_serializer("lightgbm_onnx") super().serialize_model( as_onnx=as_onnx, initial_types=initial_types, force_overwrite=force_overwrite, X_sample=X_sample, **kwargs, )