#!/usr/bin/env python
# -*- coding: utf-8 -*--
# Copyright (c) 2022, 2023 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import numpy as np
import pandas as pd
from ads.common import logger
from ads.model.extractor.lightgbm_extractor import LightgbmExtractor
from ads.model.generic_model import FrameworkSpecificModel
from ads.model.model_properties import ModelProperties
from ads.model.serde.model_serializer import LightGBMModelSerializerType
from ads.model.common.utils import DEPRECATE_AS_ONNX_WARNING
from ads.model.serde.common import SERDE
[docs]
class LightGBMModel(FrameworkSpecificModel):
"""LightGBMModel class for estimators from Lightgbm framework.
Attributes
----------
algorithm: str
The algorithm of the model.
artifact_dir: str
Artifact directory to store the files needed for deployment.
auth: Dict
Default authentication is set using the `ads.set_auth` API. To override the
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create
an authentication signer to instantiate an IdentityClient object.
estimator: Callable
A trained lightgbm estimator/model using Lightgbm.
framework: str
"lightgbm", the framework name of the model.
hyperparameter: dict
The hyperparameters of the estimator.
metadata_custom: ModelCustomMetadata
The model custom metadata.
metadata_provenance: ModelProvenanceMetadata
The model provenance metadata.
metadata_taxonomy: ModelTaxonomyMetadata
The model taxonomy metadata.
model_artifact: ModelArtifact
This is built by calling prepare.
model_deployment: ModelDeployment
A ModelDeployment instance.
model_file_name: str
Name of the serialized model.
model_id: str
The model ID.
properties: ModelProperties
ModelProperties object required to save and deploy model.
For more details, check https://accelerated-data-science.readthedocs.io/en/latest/ads.model.html#module-ads.model.model_properties.
runtime_info: RuntimeInfo
A RuntimeInfo instance.
schema_input: Schema
Schema describes the structure of the input data.
schema_output: Schema
Schema describes the structure of the output data.
serialize: bool
Whether to serialize the model to pkl file by default. If False, you need to serialize the model manually,
save it under artifact_dir and update the score.py manually.
version: str
The framework version of the model.
Methods
-------
delete_deployment(...)
Deletes the current model deployment.
deploy(..., **kwargs)
Deploys a model.
from_model_artifact(uri, model_file_name, artifact_dir, ..., **kwargs)
Loads model from the specified folder, or zip/tar archive.
from_model_catalog(model_id, model_file_name, artifact_dir, ..., **kwargs)
Loads model from model catalog.
introspect(...)
Runs model introspection.
predict(data, ...)
Returns prediction of input data run against the model deployment endpoint.
prepare(..., **kwargs)
Prepare and save the score.py, serialized model and runtime.yaml file.
reload(...)
Reloads the model artifact files: `score.py` and the `runtime.yaml`.
save(..., **kwargs)
Saves model artifacts to the model catalog.
summary_status(...)
Gets a summary table of the current status.
verify(data, ...)
Tests if deployment works in local environment.
Examples
--------
>>> import lightgbm as lgb
>>> import tempfile
>>> from sklearn.model_selection import train_test_split
>>> from sklearn.datasets import load_iris
>>> from ads.model.framework.lightgbm_model import LightGBMModel
>>> iris = load_iris()
>>> X, y = iris.data, iris.target
>>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
>>> train = lgb.Dataset(X_train, label=y_train)
>>> param = {
... 'objective': 'multiclass', 'num_class': 3,
... }
>>> lightgbm_estimator = lgb.train(param, train)
>>> lightgbm_model = LightGBMModel(estimator=lightgbm_estimator,
... artifact_dir=tempfile.mkdtemp())
>>> lightgbm_model.prepare(inference_conda_env="generalml_p37_cpu_v1", force_overwrite=True)
>>> lightgbm_model.reload()
>>> lightgbm_model.verify(X_test)
>>> lightgbm_model.save()
>>> model_deployment = lightgbm_model.deploy(wait_for_completion=False)
>>> lightgbm_model.predict(X_test)
"""
_PREFIX = "lightgbm"
model_save_serializer_type = LightGBMModelSerializerType
def __init__(
self,
estimator: Callable,
artifact_dir: Optional[str] = None,
properties: Optional[ModelProperties] = None,
auth: Dict = None,
model_save_serializer: Optional[SERDE] = None,
model_input_serializer: Optional[SERDE] = None,
**kwargs,
):
"""
Initiates a LightGBMModel instance. This class wraps the Lightgbm model as estimator.
It's primary purpose is to hold the trained model and do serialization.
Parameters
----------
estimator:
any model object generated by Lightgbm framework
artifact_dir: str
Directory for generate artifact.
properties: (ModelProperties, optional). Defaults to None.
ModelProperties object required to save and deploy model.
auth :(Dict, optional). Defaults to None.
The default authetication is set using `ads.set_auth` API. If you need to override the
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
authentication signer and kwargs required to instantiate IdentityClient object.
model_save_serializer: (SERDE or str, optional). Defaults to None.
Instance of ads.model.SERDE. Used for serialize/deserialize model.
model_input_serializer: (SERDE, optional). Defaults to None.
Instance of ads.model.SERDE. Used for serialize/deserialize data.
Returns
-------
LightGBMModel
LightGBMModel instance.
Raises
------
TypeError: If the input model is not a Lightgbm model or not supported for serialization.
Examples
--------
>>> import lightgbm as lgb
>>> import tempfile
>>> from sklearn.model_selection import train_test_split
>>> from sklearn.datasets import load_iris
>>> from ads.model.framework.lightgbm_model import LightGBMModel
>>> iris = load_iris()
>>> X, y = iris.data, iris.target
>>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
>>> train = lgb.Dataset(X_train, label=y_train)
>>> param = {
... 'objective': 'multiclass', 'num_class': 3,
... }
>>> lightgbm_estimator = lgb.train(param, train)
>>> lightgbm_model = LightGBMModel(estimator=lightgbm_estimator, artifact_dir=tempfile.mkdtemp())
>>> lightgbm_model.prepare(inference_conda_env="generalml_p37_cpu_v1")
>>> lightgbm_model.verify(X_test)
>>> lightgbm_model.save()
>>> model_deployment = lightgbm_model.deploy()
>>> lightgbm_model.predict(X_test)
>>> lightgbm_model.delete_deployment()
"""
model_type = str(type(estimator))
if not (
model_type.startswith("<class 'lightgbm.basic.")
or model_type.startswith("<class 'lightgbm.sklearn.")
or model_type.startswith("<class 'onnxruntime.")
):
raise TypeError(f"{model_type} is not supported in LightGBMModel.")
default_model_save_serializer = "joblib"
if model_type.startswith("<class 'lightgbm.basic."):
default_model_save_serializer = "lightgbm"
super().__init__(
estimator=estimator,
artifact_dir=artifact_dir,
properties=properties,
auth=auth,
model_save_serializer=model_save_serializer
or default_model_save_serializer,
model_input_serializer=model_input_serializer,
**kwargs,
)
self._extractor = LightgbmExtractor(estimator)
self.framework = self._extractor.framework
self.algorithm = self._extractor.algorithm
self.version = self._extractor.version
self.hyperparameter = self._extractor.hyperparameter
[docs]
def serialize_model(
self,
as_onnx: bool = False,
initial_types: List[Tuple] = None,
force_overwrite: bool = False,
X_sample: Optional[
Union[
Dict,
str,
List,
Tuple,
np.ndarray,
pd.core.series.Series,
pd.core.frame.DataFrame,
]
] = None,
**kwargs: Dict,
):
"""
Serialize and save Lightgbm model.
Parameters
----------
as_onnx: (boolean, optional). Defaults to False.
If set as True, provide `initial_types` or `X_sample` to convert into ONNX.
initial_types: (List[Tuple], optional). Defaults to None.
Each element is a tuple of a variable name and a type.
force_overwrite: (boolean, optional). Defaults to False.
If set as True, overwrite serialized model if exists.
X_sample: Union[Dict, str, List, np.ndarray, pd.core.series.Series, pd.core.frame.DataFrame,]. Defaults to None.
Contains model inputs such that model(`X_sample`) is a valid invocation of the model.
Used to generate `initial_types`.
Returns
-------
None
Nothing.
"""
if as_onnx:
logger.warning(DEPRECATE_AS_ONNX_WARNING)
self.set_model_save_serializer("lightgbm_onnx")
super().serialize_model(
as_onnx=as_onnx,
initial_types=initial_types,
force_overwrite=force_overwrite,
X_sample=X_sample,
**kwargs,
)