Source code for ads.model.model_introspect

#!/usr/bin/env python
# -*- coding: utf-8 -*--

# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

"""
The module that helps to minimize the number of errors of the model post-deployment process.
The model provides a simple testing harness to ensure that model artifacts are
thoroughly tested before being saved to the model catalog.

Classes
--------
ModelIntrospect
    Class to introspect model artifacts.

Examples
--------
>>> model_introspect = ModelIntrospect(artifact=model_artifact)
>>> model_introspect()
... Test key         Test name            Result              Message
... ----------------------------------------------------------------------------
... test_key_1       test_name_1          Passed              test passed
... test_key_2       test_name_2          Not passed          some error occured
>>> model_introspect.status
... Passed
"""
from enum import Enum
import errno
import importlib
import json
import os
from abc import ABC
from copy import copy
from dataclasses import dataclass
from typing import List

import pandas as pd
from ads.model.model_metadata import MetadataTaxonomyKeys
from ads.common.object_storage_details import ObjectStorageDetails


[docs]class IntrospectionNotPassed(ValueError):
    pass


[docs]class TEST_STATUS(str):
    PASSED = "Passed"
    NOT_PASSED = "Failed"
    NOT_TESTED = "Skipped"


_PATH_TO_MODEL_ARTIFACT_VALIDATOR = "ads.model.model_artifact_boilerplate.artifact_introspection_test.model_artifact_validate"
_INTROSPECT_METHOD_NAME = "validate_artifact"
_INTROSPECT_RESULT_FILE_NAME = "test_json_output.json"


class _PRINT_COLUMNS(Enum):
    KEY = "Test key"
    CASE = "Test name"
    RESULT = "Result"
    MESSAGE = "Message"


class _TEST_COLUMNS(str):
    CATEGORY = "category"
    DESCRIPTION = "description"
    ERROR_MSG = "error_msg"
    SUCCESS = "success"


_TEST_STATUS_MAP = {
    True: TEST_STATUS.PASSED,
    False: TEST_STATUS.NOT_PASSED,
    None: TEST_STATUS.NOT_TESTED,
}


class _ERROR_MESSAGES(str):
    MODEL_ARTIFACT_NOT_SET = "A model artifact is required."
    MODEL_ARTIFACT_INVALID_TYPE = (
        "The model artifact must be an instance of the class ModelArtifact."
    )


[docs]@dataclass
class PrintItem:
    """Class represents the model introspection print item."""

    key: str = ""
    case: str = ""
    result: str = ""
    message: str = ""

[docs]    def to_list(self) -> List[str]:
        """Converts instance to a list representation.

        Returns
        -------
        List[str]
            The instance in a list representation.
        """
        return [self.key, self.case, self.result, self.message]


[docs]class Introspectable(ABC):
    """Base class that represents an introspectable object."""

    pass


[docs]class ModelIntrospect:
    """Class to introspect model artifacts.

    Parameters
    ----------
    status: str
        Returns the current status of model introspection.
        The possible variants: `Passed`, `Not passed`, `Not tested`.
    failures: int
        Returns the number of failures of introspection result.

    Methods
    -------
    run(self) -> None
        Invokes model artifacts introspection.
    to_dataframe(self) -> pd.DataFrame
        Serializes model introspection result into a DataFrame.

    Examples
    --------
    >>> model_introspect = ModelIntrospect(artifact=model_artifact)
    >>> result = model_introspect()
    ... Test key         Test name            Result              Message
    ... ----------------------------------------------------------------------------
    ... test_key_1       test_name_1          Passed              test passed
    ... test_key_2       test_name_2          Not passed          some error occured
    """

    def __init__(self, artifact: Introspectable):
        """Initializes the Model Introspect.

        Parameters
        ----------
        artifact: Introspectable
            The instance of ModelArtifact object.

        Raises
        ------
            ValueError: If model artifact object not provided.
            TypeError: If provided input paramater not a ModelArtifact instance.
        """
        if not artifact:
            raise ValueError(_ERROR_MESSAGES.MODEL_ARTIFACT_NOT_SET)

        if not isinstance(artifact, Introspectable):
            raise TypeError(_ERROR_MESSAGES.MODEL_ARTIFACT_INVALID_TYPE)

        self._artifact = artifact
        self._reset()

    def _reset(self) -> None:
        """Resets test result to initial state."""
        self._status = TEST_STATUS.NOT_TESTED
        self._result = None
        self._prepared_result = []

    def _save_result_to_artifacts(self) -> None:
        """Saves introspection result into the model artifacts folder.

        Returns
        -------
        None
            Nothing.

        Raises
        ------
            FileNotFoundError: If path to model artifacts does not exist.
        """
        artifact_dir = (
            self._artifact.artifact_dir
            if not ObjectStorageDetails.is_oci_path(self._artifact.artifact_dir)
            else self._artifact.local_copy_dir
        )
        if not os.path.isdir(artifact_dir):
            raise FileNotFoundError(
                errno.ENOENT, os.strerror(errno.ENOENT), artifact_dir
            )

        output_file = f"{artifact_dir}/{_INTROSPECT_RESULT_FILE_NAME}"
        with open(output_file, "w") as f:
            json.dump(self._result, f, indent=4)

    def _save_result_to_metadata(self) -> None:
        """Saves the result of introspection to the model metadata."""
        self._artifact.metadata_taxonomy[
            MetadataTaxonomyKeys.ARTIFACT_TEST_RESULT
        ].update(value=self._result)

    def _import_and_run_validator(self) -> None:
        """Imports and run model artifact validator.

        The validator provided as one of the modules of model artifacts boilerplate.
        The importlib API is used to load validator and to invoke test method.

        Returns
        -------
        None
            Nothing.

        Raises
        ------
        FileNotFoundError: If path to model artifacts does not exist.
        """
        artifact_dir = (
            self._artifact.artifact_dir
            if not ObjectStorageDetails.is_oci_path(self._artifact.artifact_dir)
            else self._artifact.local_copy_dir
        )
        if not os.path.isdir(artifact_dir):
            raise FileNotFoundError(
                errno.ENOENT,
                os.strerror(errno.ENOENT),
            )

        module = importlib.import_module(_PATH_TO_MODEL_ARTIFACT_VALIDATOR)
        importlib.reload(module)
        method = getattr(module, _INTROSPECT_METHOD_NAME)
        params = {"artifact": artifact_dir}
        test_result, _ = method(**params)

        self._status = _TEST_STATUS_MAP.get(test_result)
        self._result = copy(module.TESTS)
        self._prepared_result = self._prepare_result()

    @property
    def status(self) -> str:
        """Gets the current status of model introspection."""
        return self._status

[docs]    def run(self) -> pd.DataFrame:
        """Invokes introspection.

        Returns
        -------
        pd.DataFrame
           The introspection result in a DataFrame format.
        """
        self._reset()
        self._import_and_run_validator()
        self._save_result_to_metadata()
        self._save_result_to_artifacts()
        return self.to_dataframe()

    def _prepare_result(self) -> List[PrintItem]:
        """Prepares introspection result information to display to user.

        Returns
        -------
        List[PrintItem]
            The list of prepared to print data items.
        """
        if not self._result:
            return []

        result = []
        for key, item in self._result.items():
            error_msg = (
                item.get(_TEST_COLUMNS.ERROR_MSG)
                if (
                    item.get(_TEST_COLUMNS.SUCCESS) == False
                    or (
                        item.get(_TEST_COLUMNS.SUCCESS) == None
                        and "WARNING" in item.get(_TEST_COLUMNS.ERROR_MSG, "")
                    )
                )
                else ""
            )
            result.append(
                PrintItem(
                    key,
                    item.get(_TEST_COLUMNS.DESCRIPTION, ""),
                    _TEST_STATUS_MAP.get(item.get(_TEST_COLUMNS.SUCCESS)),
                    error_msg,
                )
            )
        return result

[docs]    def to_dataframe(self) -> pd.DataFrame:
        """Serializes model introspection result into a DataFrame.

        Returns
        -------
        `pandas.DataFrame`
            The model introspection result in a DataFrame representation.
        """
        return (
            pd.DataFrame(
                (item.to_list() for item in self._prepared_result),
                columns=[item.value for item in _PRINT_COLUMNS],
            )
            .sort_values(by=[_PRINT_COLUMNS.KEY.value, _PRINT_COLUMNS.CASE.value])
            .reset_index(drop=True)
        )

    @property
    def failures(self) -> int:
        """Calculates the number of failures.

        Returns
        -------
        int
            The number of failures.
        """
        return len(
            [
                item
                for item in self._prepared_result
                if item.result == TEST_STATUS.NOT_PASSED
            ]
        )

    def __call__(self) -> pd.DataFrame:
        """Invokes introspection.

        Returns
        -------
        pd.DataFrame
           The introspection result in a DataFrame format.
        """
        return self.run()