Source code for ads.model.model_metadata

#!/usr/bin/env python
# -*- coding: utf-8 -*--

# Copyright (c) 2021, 2024 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

import json
import logging
import os
import sys
from abc import ABC, abstractclassmethod, abstractmethod
from dataclasses import dataclass, field, fields
from pathlib import Path
from typing import Dict, List, Tuple

import ads.dataset.factory as factory
import fsspec
import git
import oci.data_science.models
import pandas as pd
import yaml
from ads.common import logger
from ads.common.error import ChangesNotCommitted
from ads.common.extended_enum import ExtendedEnumMeta
from ads.common.serializer import DataClassSerializable
from ads.common.object_storage_details import ObjectStorageDetails
from oci.util import to_dict

try:
    from yaml import CDumper as dumper
except:
    from yaml import Dumper as dumper


logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logger = logging.getLogger("ADS")

METADATA_SIZE_LIMIT = 32000
METADATA_VALUE_LENGTH_LIMIT = 255
METADATA_DESCRIPTION_LENGTH_LIMIT = 255
_METADATA_EMPTY_VALUE = "NA"
CURRENT_WORKING_DIR = "."


[docs] class MetadataSizeTooLarge(ValueError): """Maximum allowed size for model metadata has been exceeded. See https://docs.oracle.com/en-us/iaas/data-science/using/models_saving_catalog.htm for more details. """ def __init__(self, size: int): super().__init__( f"The metadata is `{size}` bytes and exceeds " f"the size limit of `{METADATA_SIZE_LIMIT}` bytes. " "Reduce the metadata size." )
[docs] class MetadataValueTooLong(ValueError): """Maximum allowed length of metadata value has been exceeded. See https://docs.oracle.com/en-us/iaas/data-science/using/models_saving_catalog.htm for more details. """ def __init__(self, key: str, length: int): super().__init__( f"The custom metadata value of `{key}` is `{length}` characters and exceeds " f"the length limit of `{METADATA_VALUE_LENGTH_LIMIT}` characters." )
[docs] class MetadataDescriptionTooLong(ValueError): """Maximum allowed length of metadata description has been exceeded. See https://docs.oracle.com/en-us/iaas/data-science/using/models_saving_catalog.htm for more details. """ def __init__(self, key: str, length: int): super().__init__( f"The custom metadata description of `{key}` is `{length}` characters and exceeds " f"the length limit of `{METADATA_DESCRIPTION_LENGTH_LIMIT}` characters." )
[docs] class MetadataCustomPrintColumns(str, metaclass=ExtendedEnumMeta): KEY = "Key" VALUE = "Value" DESCRIPTION = "Description" CATEGORY = "Category"
[docs] class MetadataTaxonomyPrintColumns(str, metaclass=ExtendedEnumMeta): KEY = "Key" VALUE = "Value"
[docs] class MetadataTaxonomyKeys(str, metaclass=ExtendedEnumMeta): USE_CASE_TYPE = "UseCaseType" FRAMEWORK = "Framework" FRAMEWORK_VERSION = "FrameworkVersion" ALGORITHM = "Algorithm" HYPERPARAMETERS = "Hyperparameters" ARTIFACT_TEST_RESULT = "ArtifactTestResults"
[docs] class MetadataCustomKeys(str, metaclass=ExtendedEnumMeta): SLUG_NAME = "SlugName" CONDA_ENVIRONMENT = "CondaEnvironment" CONDA_ENVIRONMENT_PATH = "CondaEnvironmentPath" ENVIRONMENT_TYPE = "EnvironmentType" MODEL_ARTIFACTS = "ModelArtifacts" TRAINING_DATASET = "TrainingDataset" VALIDATION_DATASET = "ValidationDataset" MODEL_SERIALIZATION_FORMAT = "ModelSerializationFormat" TRAINING_DATASET_SIZE = "TrainingDatasetSize" VALIDATION_DATASET_SIZE = "ValidationDatasetSize" TRAINING_DATASET_NUMBER_OF_ROWS = "TrainingDatasetNumberOfRows" TRAINING_DATASET_NUMBER_OF_COLS = "TrainingDatasetNumberOfCols" VALIDATION_DATASET_NUMBER_OF_ROWS = "ValidationDatasetNumberOfRows" VALIDATION_DATASET_NUMBER_OF_COLS = "ValidationDataSetNumberOfCols" CLIENT_LIBRARY = "ClientLibrary" MODEL_FILE_NAME = "ModelFileName"
[docs] class MetadataCustomCategory(str, metaclass=ExtendedEnumMeta): PERFORMANCE = "Performance" TRAINING_PROFILE = "Training Profile" TRAINING_AND_VALIDATION_DATASETS = "Training and Validation Datasets" TRAINING_ENV = "Training Environment" OTHER = "Other"
[docs] class UseCaseType(str, metaclass=ExtendedEnumMeta): BINARY_CLASSIFICATION = "binary_classification" REGRESSION = "regression" MULTINOMIAL_CLASSIFICATION = "multinomial_classification" CLUSTERING = "clustering" RECOMMENDER = "recommender" DIMENSIONALITY_REDUCTION = "dimensionality_reduction/representation" TIME_SERIES_FORECASTING = "time_series_forecasting" ANOMALY_DETECTION = "anomaly_detection" TOPIC_MODELING = "topic_modeling" NER = "ner" SENTIMENT_ANALYSIS = "sentiment_analysis" IMAGE_CLASSIFICATION = "image_classification" OBJECT_LOCALIZATION = "object_localization" OTHER = "other"
[docs] class Framework(str, metaclass=ExtendedEnumMeta): SCIKIT_LEARN = "scikit-learn" XGBOOST = "xgboost" TENSORFLOW = "tensorflow" PYTORCH = "pytorch" MXNET = "mxnet" KERAS = "keras" LIGHT_GBM = "lightgbm" PYMC3 = "pymc3" PYOD = "pyod" SPACY = "spacy" PROPHET = "prophet" SKTIME = "sktime" STATSMODELS = "statsmodels" CUML = "cuml" ORACLE_AUTOML = "oracle_automl" H20 = "h2o" TRANSFORMERS = "transformers" NLTK = "nltk" EMCEE = "emcee" PYSTAN = "pystan" BERT = "bert" GENSIM = "gensim" FLAIR = "flair" WORD2VEC = "word2vec" ENSEMBLE = "ensemble" SPARK = "pyspark" OTHER = "other"
[docs] class ModelMetadataItem(ABC): """The base abstract class representing model metadata item. Methods ------- to_dict(self) -> Dict Serializes model metadata item to dictionary. from_dict(cls, data: Dict) -> ModelMetadataItem Constructs an instance of ModelMetadataItem from a dictionary. to_yaml(self) Serializes model metadata item to YAML. size(self) -> int Returns the size of the metadata in bytes. to_json(self) -> JSON Serializes metadata item to JSON. to_json_file(self, file_path: str, storage_options: dict = None) -> None Saves the metadata item value to a local file or object storage. validate(self) -> bool Validates metadata item. """ _FIELDS = []
[docs] @classmethod def from_dict(cls, data: Dict) -> "ModelMetadataItem": """Constructs an instance of `ModelMetadataItem` from a dictionary. Parameters ---------- data : Dict Metadata item in a dictionary format. Returns ------- ModelMetadataItem An instance of model metadata item. """ return cls(**data or {})
[docs] def to_dict(self) -> dict: """Serializes model metadata item to dictionary. Returns ------- dict The dictionary representation of model metadata item. """ return {field: getattr(self, field) for field in self._FIELDS}
[docs] def to_yaml(self): """Serializes model metadata item to YAML. Returns ------- Yaml The model metadata item in a YAML representation. """ return yaml.dump(self.to_dict(), Dumper=dumper)
[docs] def size(self) -> int: """Returns the size of the model metadata in bytes. Returns ------- int The size of model metadata in bytes. """ return len(json.dumps(self.to_dict()).encode("utf-16"))
[docs] def to_json(self): """Serializes metadata item into a JSON. Returns ------- JSON The metadata item in a JSON representation. """ return json.dumps(self.to_dict())
[docs] def to_json_file( self, file_path: str, storage_options: dict = None, ) -> None: """Saves the metadata item value to a local file or object storage. Parameters ---------- file_path : str The file path to store the data. "oci://bucket_name@namespace/folder_name/" "oci://bucket_name@namespace/folder_name/result.json" "path/to/local/folder" "path/to/local/folder/result.json" storage_options : dict. Default None Parameters passed on to the backend filesystem class. Defaults to `options` set using `DatasetFactory.set_default_storage()`. Returns ------- None Nothing. Raises ------ ValueError: When file path is empty. TypeError: When file path not a string. Examples -------- >>> metadata_item = ModelCustomMetadataItem(key="key1", value="value1") >>> storage_options = {"config": oci.config.from_file(os.path.join("~/.oci", "config"))} >>> storage_options {'log_requests': False, 'additional_user_agent': '', 'pass_phrase': None, 'user': '<user-id>', 'fingerprint': '05:15:2b:b1:46:8a:32:ec:e2:69:5b:32:01:**:**:**)', 'tenancy': '<tenency-id>', 'region': 'us-ashburn-1', 'key_file': '/home/datascience/.oci/oci_api_key.pem'} >>> metadata_item.to_json_file(file_path = 'oci://bucket_name@namespace/folder_name/file.json', storage_options=storage_options) >>> metadata_item.to_json_file("path/to/local/folder/file.json") """ if not file_path: raise ValueError("File path must be specified.") if not isinstance(file_path, str): raise TypeError("File path must be a string.") if not Path(os.path.basename(file_path)).suffix: file_path = os.path.join(file_path, f"{self.key}.json") if not storage_options: storage_options = factory.default_storage_options or {"config": {}} with fsspec.open( file_path, mode="w", **(storage_options), ) as f: f.write(json.dumps(self.value))
def _to_oci_metadata(self): """Converts metadata item to OCI metadata item.""" dict = self.to_dict() if not dict["value"]: return oci.data_science.models.Metadata(**dict) if isinstance(dict["value"], (str, int, float)): dict["value"] = str(dict["value"]).replace("NaN", "null") else: dict["value"] = json.dumps(dict["value"]).replace("NaN", "null") return oci.data_science.models.Metadata(**dict) @classmethod def _from_oci_metadata(cls, oci_metadata_item) -> "ModelMetadataItem": """Creates a new metadata item from the OCI metadata item.""" oci_metadata_item = to_dict(oci_metadata_item) key_value_map = {field: oci_metadata_item.get(field) for field in cls._FIELDS} if isinstance(key_value_map["value"], str): try: key_value_map["value"] = json.loads(oci_metadata_item.get("value")) except Exception: pass return cls(**key_value_map) def __hash__(self): return hash(self.key.lower()) def __eq__(self, other): return hash(self) == hash(other) def __repr__(self): return self.to_yaml()
[docs] @abstractmethod def validate(self) -> bool: """Validates metadata item. Returns ------- bool True if validation passed. """ pass
[docs] class ModelTaxonomyMetadataItem(ModelMetadataItem): """Class that represents model taxonomy metadata item. Attributes ---------- key: str The model metadata item key. value: str The model metadata item value. Methods ------- reset(self) -> None Resets model metadata item. to_dict(self) -> Dict Serializes model metadata item to dictionary. from_dict(cls) -> ModelTaxonomyMetadataItem Constructs model metadata item from dictionary. to_yaml(self) Serializes model metadata item to YAML. size(self) -> int Returns the size of the metadata in bytes. update(self, value: str = "") -> None Updates metadata item information. to_json(self) -> JSON Serializes metadata item into a JSON. to_json_file(self, file_path: str, storage_options: dict = None) -> None Saves the metadata item value to a local file or object storage. validate(self) -> bool Validates metadata item. """ _FIELDS = ["key", "value"] def __init__( self, key: str, value: str = None, ): self.key = key self.value = value @property def key(self) -> str: return self._key @key.setter def key(self, key: str): """The model metadata key setter. Raises ------ TypeError If provided key is not a string. ValueError If provided key is already setup. If provided key is empty. """ if hasattr(self, "_key"): raise ValueError("The key field is immutable and cannot be changed.") if not isinstance(key, str): raise TypeError("The key must be a string.") if key is None or key == "": raise ValueError("The key cannot be empty.") self._key = key @property def value(self) -> str: return self._value @value.setter def value(self, value: str): """The model metadata value setter. Accepts any JSON serializable value. Raises ------ ValueError If provided value cannot be serialized to JSON. """ if value is None or value == "": self._value = value return try: json.dumps(value) except TypeError: raise ValueError( f"An error occurred in attempt to serialize the value of {self.key} to JSON. " "The value must be JSON serializable." ) self._value = value
[docs] def reset(self) -> None: """Resets model metadata item. Resets value to None. Returns ------- None Nothing. """ self.update(value=None)
[docs] def update(self, value: str) -> None: """Updates metadata item value. Parameters ---------- value: str The value of model metadata item. Returns ------- None Nothing. """ self.value = value
[docs] def validate(self) -> bool: """Validates metadata item. Returns ------- bool True if validation passed. Raises ------ ValueError If invalid UseCaseType provided. If invalid Framework provided. """ if ( self.key.lower() == MetadataTaxonomyKeys.USE_CASE_TYPE.lower() and self.value and (not isinstance(self.value, str) or self.value not in UseCaseType) ): raise ValueError( f"Invalid value of `UseCaseType`. Choose from {UseCaseType.values()}." ) if ( self.key.lower() == MetadataTaxonomyKeys.FRAMEWORK.lower() and self.value and (not isinstance(self.value, str) or self.value not in Framework) ): raise ValueError( f"Invalid value of `Framework`. Choose from {Framework.values()}." ) return True
[docs] class ModelCustomMetadataItem(ModelTaxonomyMetadataItem): """Class that represents model custom metadata item. Attributes ---------- key: str The model metadata item key. value: str The model metadata item value. description: str The model metadata item description. category: str The model metadata item category. Methods ------- reset(self) -> None Resets model metadata item. to_dict(self)->dict Serializes model metadata item to dictionary. from_dict(cls) -> ModelCustomMetadataItem Constructs model metadata item from dictionary. to_yaml(self) Serializes model metadata item to YAML. size(self) -> int Returns the size of the metadata in bytes. update(self, value: str = "", description: str = "", category: str = "") -> None Updates metadata item information. to_json(self) -> JSON Serializes metadata item into a JSON. to_json_file(self, file_path: str, storage_options: dict = None) -> None Saves the metadata item value to a local file or object storage. validate(self) -> bool Validates metadata item. """ _FIELDS = ["key", "value", "description", "category"] def __init__( self, key: str, value: str = None, description: str = None, category: str = None, ): super().__init__(key=key, value=value) self.description = description self.category = category @property def description(self) -> str: return self._description @description.setter def description(self, description: str): """The model metadata description setter. Raises ------ TypeError If provided key is not a string. """ if description != None and not isinstance(description, str): raise TypeError("The description must be a string.") self._description = description @property def category(self) -> str: return self._category @category.setter def category(self, category: str): """The model metadata category setter. Raises ------ TypeError If provided category is not a string. ValueError If provided category not supported. """ if not category: self._category = None return if not isinstance(category, str): raise TypeError( f"Invalid category type for the {self.key}." "The category must be a string." ) if category not in MetadataCustomCategory: raise ValueError( f"Invalid category value for the {self.key}. " f"Choose from {MetadataCustomCategory.values()}." ) self._category = category
[docs] def reset(self) -> None: """Resets model metadata item. Resets value, description and category to None. Returns ------- None Nothing. """ self.update(value=None, description=None, category=None)
[docs] def update(self, value: str, description: str, category: str) -> None: """Updates metadata item. Parameters ---------- value: str The value of model metadata item. description: str The description of model metadata item. category: str The category of model metadata item. Returns ------- None Nothing. """ self.value = value self.description = description self.category = category
def _to_oci_metadata(self): """Converts metadata item to OCI metadata item.""" oci_metadata_item = super()._to_oci_metadata() if not oci_metadata_item.value: oci_metadata_item.value = _METADATA_EMPTY_VALUE if not oci_metadata_item.category: oci_metadata_item.category = MetadataCustomCategory.OTHER return oci_metadata_item
[docs] def validate(self) -> bool: """Validates metadata item. Returns ------- bool True if validation passed. Raises ------ ValueError If invalid category provided. MetadataValueTooLong If value exceeds the length limit. """ if self.category and self.category not in MetadataCustomCategory: raise ValueError( f"Invalid category value for the {self.key}. " f"Choose from {MetadataCustomCategory.values()}." ) if self.value: value = ( self.value if isinstance(self.value, str) else json.dumps(self.value) ) if len(value) > METADATA_VALUE_LENGTH_LIMIT: raise MetadataValueTooLong(self.key, len(value)) if ( self.description and len(self.description) > METADATA_DESCRIPTION_LENGTH_LIMIT ): raise MetadataDescriptionTooLong(self.key, len(self.description)) return True
[docs] class ModelMetadata(ABC): """The base abstract class representing model metadata. Methods ------- get(self, key: str) -> ModelMetadataItem Returns the model metadata item by provided key. reset(self) -> None Resets all model metadata items to empty values. to_dataframe(self) -> pd.DataFrame Returns the model metadata list in a data frame format. size(self) -> int Returns the size of the model metadata in bytes. validate(self) -> bool Validates metadata. to_dict(self) Serializes model metadata into a dictionary. from_dict(cls) -> ModelMetadata Constructs model metadata from dictionary. to_yaml(self) Serializes model metadata into a YAML. to_json(self) Serializes model metadata into a JSON. to_json_file(self, file_path: str, storage_options: dict = None) -> None Saves the metadata to a local file or object storage. """ @abstractmethod def __init__(self): """Initializes Model Metadata.""" self._items = set()
[docs] def get(self, key: str) -> ModelMetadataItem: """Returns the model metadata item by provided key. Parameters ---------- key: str The key of model metadata item. Returns ------- ModelMetadataItem The model metadata item. Raises ------ ValueError If provided key is empty or metadata item not found. """ if key is None or not isinstance(key, str) or key == "": raise ValueError("The key must not be an empty string.") for item in self._items: if item.key.lower() == key.lower(): return item raise ValueError(f"The metadata with {key} not found.")
[docs] def reset(self) -> None: """Resets all model metadata items to empty values. Resets value, description and category to None for every metadata item. """ for item in self._items: item.reset()
[docs] def size(self) -> int: """Returns the size of the model metadata in bytes. Returns ------- int The size of model metadata in bytes. """ return sum(item.size() for item in self._items)
[docs] def validate_size(self) -> bool: """Validates model metadata size. Validates the size of metadata. Throws an error if the size of the metadata exceeds expected value. Returns ------- bool True if metadata size is valid. Raises ------ MetadataSizeTooLarge If the size of the metadata exceeds expected value. """ if self.size() > METADATA_SIZE_LIMIT: raise MetadataSizeTooLarge(self.size()) return True
[docs] def validate(self) -> bool: """Validates model metadata. Returns ------- bool True if metadata is valid. """ for item in self._items: item.validate() return True
[docs] def to_dict(self): """Serializes model metadata into a dictionary. Returns ------- Dict The model metadata in a dictionary representation. """ return {"data": [item.to_dict() for item in self._items]}
[docs] def to_yaml(self): """Serializes model metadata into a YAML. Returns ------- Yaml The model metadata in a YAML representation. """ return yaml.dump(self.to_dict(), Dumper=dumper)
[docs] def to_json(self): """Serializes model metadata into a JSON. Returns ------- JSON The model metadata in a JSON representation. """ return json.dumps(self.to_dict())
@property def keys(self) -> Tuple[str]: """Returns all registered metadata keys. Returns ------- Tuple[str] The list of metadata keys. """ return tuple(item.key for item in self._items) def _to_oci_metadata(self): """Convert to a list of `oci.data_science.models.Metadata` objects. Returns ------- list[oci.data_science.models.Metadata] A list of oci data science model metadata. Examples -------- >>> metadata_taxonomy = ModelTaxonomyMetadata() >>> metadata_taxonomy.get(key="FrameworkVersion").update(value="2.3.1") >>> metadata_taxonomy._to_oci_metadata() [{ "key": "FrameworkVersion", "value": "2.3.1" }, { "key": "UseCaseType", "value": null }, { "key": "Algorithm", "value": null }, { "key": "Framework", "value": null }, { "key": "Hyperparameters", "value": null }] """ return [item._to_oci_metadata() for item in self._items]
[docs] def to_json_file( self, file_path: str, storage_options: dict = None, ) -> None: """Saves the metadata to a local file or object storage. Parameters ---------- file_path : str The file path to store the data. "oci://bucket_name@namespace/folder_name/" "oci://bucket_name@namespace/folder_name/metadata.json" "path/to/local/folder" "path/to/local/folder/metadata.json" storage_options : dict. Default None Parameters passed on to the backend filesystem class. Defaults to `options` set using `DatasetFactory.set_default_storage()`. Returns ------- None Nothing. Raises ------ ValueError: When file path is empty. TypeError: When file path not a string. Examples -------- >>> metadata = ModelTaxonomyMetadataItem() >>> storage_options = {"config": oci.config.from_file(os.path.join("~/.oci", "config"))} >>> storage_options {'log_requests': False, 'additional_user_agent': '', 'pass_phrase': None, 'user': '<user-id>', 'fingerprint': '05:15:2b:b1:46:8a:32:ec:e2:69:5b:32:01:**:**:**)', 'tenancy': '<tenancy-id>', 'region': 'us-ashburn-1', 'key_file': '/home/datascience/.oci/oci_api_key.pem'} >>> metadata.to_json_file(file_path = 'oci://bucket_name@namespace/folder_name/metadata_taxonomy.json', storage_options=storage_options) >>> metadata_item.to_json_file("path/to/local/folder/metadata_taxonomy.json") """ if not file_path: raise ValueError("File path must be specified.") if not isinstance(file_path, str): raise TypeError("File path must be a string.") if not Path(os.path.basename(file_path)).suffix: file_path = os.path.join(file_path, f"{self.__class__.__name__}.json") if not storage_options: storage_options = factory.default_storage_options or {"config": {}} with fsspec.open( file_path, mode="w", **(storage_options), ) as f: f.write(self.to_json())
def __getitem__(self, key: str) -> ModelMetadataItem: return self.get(key) def __repr__(self): return self.to_yaml() def __len__(self): return len(self._items) @abstractclassmethod def _from_oci_metadata(cls, metadata_list): pass
[docs] @abstractmethod def to_dataframe(self) -> pd.DataFrame: """Returns the model metadata list in a data frame format. Returns ------- `pandas.DataFrame` The model metadata in a dataframe format. """ pass
[docs] @abstractclassmethod def from_dict(cls, data: Dict) -> "ModelMetadata": """Constructs an instance of `ModelMetadata` from a dictionary. Parameters ---------- data : Dict Model metadata in a dictionary format. Returns ------- ModelMetadata An instance of model metadata. """ pass
[docs] class ModelCustomMetadata(ModelMetadata): """Class that represents Model Custom Metadata. Methods ------- get(self, key: str) -> ModelCustomMetadataItem Returns the model metadata item by provided key. reset(self) -> None Resets all model metadata items to empty values. to_dataframe(self) -> pd.DataFrame Returns the model metadata list in a data frame format. size(self) -> int Returns the size of the model metadata in bytes. validate(self) -> bool Validates metadata. to_dict(self) Serializes model metadata into a dictionary. from_dict(cls) -> ModelCustomMetadata Constructs model metadata from dictionary. to_yaml(self) Serializes model metadata into a YAML. add(self, key: str, value: str, description: str = "", category: str = MetadataCustomCategory.OTHER, replace: bool = False) -> None: Adds a new model metadata item. Replaces existing one if replace flag is True. remove(self, key: str) -> None Removes a model metadata item by key. clear(self) -> None Removes all metadata items. isempty(self) -> bool Checks if metadata is empty. to_json(self) Serializes model metadata into a JSON. to_json_file(self, file_path: str, storage_options: dict = None) -> None Saves the metadata to a local file or object storage. Examples -------- >>> metadata_custom = ModelCustomMetadata() >>> metadata_custom.add(key="format", value="pickle") >>> metadata_custom.add(key="note", value="important note", description="some description") >>> metadata_custom["format"].description = "some description" >>> metadata_custom.to_dataframe() Key Value Description Category ---------------------------------------------------------------------------- 0 format pickle some description user defined 1 note important note some description user defined >>> metadata_custom metadata: - category: user defined description: some description key: format value: pickle - category: user defined description: some description key: note value: important note >>> metadata_custom.remove("format") >>> metadata_custom metadata: - category: user defined description: some description key: note value: important note >>> metadata_custom.to_dict() {'metadata': [{ 'key': 'note', 'value': 'important note', 'category': 'user defined', 'description': 'some description' }]} >>> metadata_custom.reset() >>> metadata_custom metadata: - category: None description: None key: note value: None >>> metadata_custom.clear() >>> metadata_custom.to_dataframe() Key Value Description Category ---------------------------------------------------------------------------- """ def __init__(self): """Initializes custom model metadata.""" self._items = set()
[docs] def add( self, key: str, value: str, description: str = "", category: str = MetadataCustomCategory.OTHER, replace: bool = False, ) -> None: """Adds a new model metadata item. Overrides the existing one if replace flag is True. Parameters ---------- key: str The metadata item key. value: str The metadata item value. description: str The metadata item description. category: str The metadata item category. replace: bool Overrides the existing metadata item if replace flag is True. Returns ------- None Nothing. Raises ------ TypeError If provided key is not a string. If provided description not a string. ValueError If provided key is empty. If provided value is empty. If provided value cannot be serialized to JSON. If item with provided key is already registered and replace flag is False. If provided category is not supported. MetadataValueTooLong If the length of provided value exceeds 255 charracters. MetadataDescriptionTooLong If the length of provided description exceeds 255 charracters. """ if not category: category = MetadataCustomCategory.OTHER if key is None or key == "": raise ValueError("The key cannot be empty.") if value is None or value == "": raise ValueError("The value cannot be empty.") if not isinstance(key, str): raise TypeError("The key must be a string.") if not isinstance(category, str): raise TypeError("The category must be a string.") if category not in MetadataCustomCategory: raise ValueError( f"Invalid category value. " f"Choose from {MetadataCustomCategory.values()}." ) if description and not isinstance(description, str): raise TypeError("The description must be a string.") try: tmp_value = json.dumps(value) except TypeError: raise ValueError( f"An error occurred in attempt to serialize the value of `{key}` to JSON. " "The value must be JSON serializable." ) if len(tmp_value) > METADATA_VALUE_LENGTH_LIMIT: raise MetadataValueTooLong(key, len(tmp_value)) if description and len(description) > METADATA_DESCRIPTION_LENGTH_LIMIT: raise MetadataDescriptionTooLong(key, len(description)) if not replace and key in self.keys: raise ValueError( f"The metadata item with key {key} is already registered. " "Use replace=True to overwrite." ) self._add( ModelCustomMetadataItem( key=key, value=value, description=description, category=category, ), replace=replace, )
def _add(self, item: ModelCustomMetadataItem, replace=False) -> None: """Adds a new model metadata item. Overrides the existing one if replace flag is True. Parameters ---------- item: ModelCustomMetadataItem The model metadata item. replace: bool Overrides the existing metadata item if replace flag is True. Returns ------- None Nothing. Raises ------ ValueError If item is already registered and replace flag is False. TypeError If input data has a wrong format. """ if not isinstance(item, ModelCustomMetadataItem): raise TypeError( "Argument must be an instance of the class ModelCustomMetadataItem." ) if not replace and item in self._items: raise ValueError( f"The metadata item with key {item.key} is already registered. " "Use replace=True to overwrite." ) self._items.discard(item) self._items.add(item) def _add_many(self, items: List[ModelCustomMetadataItem], replace=False) -> None: """Adds model metadata items into model metadata. Overrides the existing ones if replace flag is True. Parameters ---------- items: List[ModelCustomMetadataItem] The list of model metadata items. replace: bool Overrides the existing metadata items if replace flag is True. Returns ------- None Nothing. Raises ------ TypeError If input data has wrong format. """ if not isinstance(items, list): raise TypeError("Argument must be a list of model metadata items.") for item in items: self._add(item, replace)
[docs] def set_training_data(self, path: str, data_size: str = None): """Adds training_data path and data size information into model custom metadata. Parameters ---------- path: str The path where the training_data is stored. data_size: str The size of the training_data. Returns ------- None Nothing. """ self.add( key=MetadataCustomKeys.TRAINING_DATASET, value=path, category=MetadataCustomCategory.TRAINING_AND_VALIDATION_DATASETS, description="The path where training dataset path are stored.", replace=True, ) if data_size is not None: self.add( key=MetadataCustomKeys.TRAINING_DATASET_SIZE, value=data_size, category=MetadataCustomCategory.TRAINING_AND_VALIDATION_DATASETS, description="The size of the training data.", replace=True, )
[docs] def set_validation_data(self, path: str, data_size: str = None): """Adds validation_data path and data size information into model custom metadata. Parameters ---------- path: str The path where the validation_data is stored. data_size: str The size of the validation_data. Returns ------- None Nothing. """ self.add( key=MetadataCustomKeys.VALIDATION_DATASET, value=path, category=MetadataCustomCategory.TRAINING_AND_VALIDATION_DATASETS, description="The path where validation dataset path are stored.", replace=True, ) if data_size is not None: self.add( key=MetadataCustomKeys.VALIDATION_DATASET_SIZE, value=data_size, category=MetadataCustomCategory.TRAINING_AND_VALIDATION_DATASETS, description="The size of the validation data.", replace=True, )
[docs] def remove(self, key: str) -> None: """Removes a model metadata item. Parameters ---------- key: str The key of the metadata item that should be removed. Returns ------- None Nothing. """ self._items.discard(self.get(key))
[docs] def clear(self) -> None: """Removes all metadata items. Returns ------- None Nothing. """ self._items.clear()
[docs] def isempty(self) -> bool: """Checks if metadata is empty. Returns ------- bool True if metadata is empty, False otherwise. """ return len(self._items) == 0
@classmethod def _from_oci_metadata(cls, metadata_list): """Convert from list of OCI metadata list to an ModelCustomMetadata object. Returns ------- ModelCustomMetadata A `ModelCustomMetadata` instance. """ metadata = cls() for item in metadata_list: metadata._add( ModelCustomMetadataItem._from_oci_metadata(item), replace=True ) return metadata
[docs] def to_dataframe(self) -> pd.DataFrame: """Returns the model metadata list in a data frame format. Returns ------- `pandas.DataFrame` The model metadata in a dataframe format. """ return ( pd.DataFrame( ( (item.key, item.value, item.description, item.category) for item in self._items ), columns=[value for value in MetadataCustomPrintColumns.values()], ) .sort_values(by=[MetadataCustomPrintColumns.KEY]) .reset_index(drop=True) )
[docs] @classmethod def from_dict(cls, data: Dict) -> "ModelCustomMetadata": """Constructs an instance of `ModelCustomMetadata` from a dictionary. Parameters ---------- data : Dict Model metadata in a dictionary format. Returns ------- ModelCustomMetadata An instance of model custom metadata. Raises ------ ValueError In case of the wrong input data format. """ if ( not data or not isinstance(data, Dict) or not "data" in data or not isinstance(data["data"], List) ): raise ValueError( "An error occurred when attempting to deserialize the model custom metadata from a dictionary. " "The input data must be a dictionary with `data` key. Example: `{'data': []}`" ) metadata = cls() for item in data["data"]: metadata._add(ModelCustomMetadataItem.from_dict(item), replace=True) return metadata
[docs] class ModelTaxonomyMetadata(ModelMetadata): """Class that represents Model Taxonomy Metadata. Methods ------- get(self, key: str) -> ModelTaxonomyMetadataItem Returns the model metadata item by provided key. reset(self) -> None Resets all model metadata items to empty values. to_dataframe(self) -> pd.DataFrame Returns the model metadata list in a data frame format. size(self) -> int Returns the size of the model metadata in bytes. validate(self) -> bool Validates metadata. to_dict(self) Serializes model metadata into a dictionary. from_dict(cls) -> ModelTaxonomyMetadata Constructs model metadata from dictionary. to_yaml(self) Serializes model metadata into a YAML. to_json(self) Serializes model metadata into a JSON. to_json_file(self, file_path: str, storage_options: dict = None) -> None Saves the metadata to a local file or object storage. Examples -------- >>> metadata_taxonomy = ModelTaxonomyMetadata() >>> metadata_taxonomy.to_dataframe() Key Value -------------------------------------------- 0 UseCaseType binary_classification 1 Framework sklearn 2 FrameworkVersion 0.2.2 3 Algorithm algorithm 4 Hyperparameters {} >>> metadata_taxonomy.reset() >>> metadata_taxonomy.to_dataframe() Key Value -------------------------------------------- 0 UseCaseType None 1 Framework None 2 FrameworkVersion None 3 Algorithm None 4 Hyperparameters None >>> metadata_taxonomy metadata: - key: UseCaseType category: None description: None value: None """ def __init__(self): super().__init__() for key in MetadataTaxonomyKeys.values(): self._items.add(ModelTaxonomyMetadataItem(key=key)) def _populate_from_map(self, map: Dict[str, str]) -> None: """Populates metadata information from map. Parameters ---------- map: Dict[str, str] The key/value map with model metadata information. Returns ------- None Nothing. """ for value in MetadataTaxonomyKeys.values(): if value in map: self[value].update(value=map[value]) @classmethod def _from_oci_metadata(cls, metadata_list): """ Convert from list of oci metadata to a ModelTaxonomyMetadata object. Parameters ---------- metadata_list: List List of oci metadata. Returns ------- ModelTaxonomyMetadata A `ModelTaxonomyMetadata` instance. """ metadata = cls() for oci_item in metadata_list: item = ModelTaxonomyMetadataItem._from_oci_metadata(oci_item) metadata[item.key].update(value=item.value) return metadata
[docs] def to_dataframe(self) -> pd.DataFrame: """Returns the model metadata list in a data frame format. Returns ------- `pandas.DataFrame` The model metadata in a dataframe format. """ return ( pd.DataFrame( ((item.key, item.value) for item in self._items), columns=[value for value in MetadataTaxonomyPrintColumns.values()], ) .sort_values(by=[MetadataTaxonomyPrintColumns.KEY]) .reset_index(drop=True) )
[docs] @classmethod def from_dict(cls, data: Dict) -> "ModelTaxonomyMetadata": """Constructs an instance of `ModelTaxonomyMetadata` from a dictionary. Parameters ---------- data : Dict Model metadata in a dictionary format. Returns ------- ModelTaxonomyMetadata An instance of model taxonomy metadata. Raises ------ ValueError In case of the wrong input data format. """ if ( not data or not isinstance(data, Dict) or not "data" in data or not isinstance(data["data"], List) ): raise ValueError( "An error occurred when attempting to deserialize the model taxonomy metadata from a dictionary. " "The input data must be a dictionary with `data` key. Example: `{'data': []}`" ) metadata = cls() for item in data["data"]: item = ModelTaxonomyMetadataItem.from_dict(item) metadata[item.key].update(value=item.value) return metadata
[docs] @dataclass(repr=True) class ModelProvenanceMetadata(DataClassSerializable): """ModelProvenanceMetadata class. Examples -------- >>> provenance_metadata = ModelProvenanceMetadata.fetch_training_code_details() ModelProvenanceMetadata(repo=<git.repo.base.Repo '/home/datascience/.git'>, git_branch='master', git_commit='99ad04c31803f1d4ffcc3bf4afbd6bcf69a06af2', repository_url='file:///home/datascience', "", "") >>> provenance_metadata.assert_path_not_dirty("your_path", ignore=False) """ repo: str = field(default=None, metadata={"serializable": False}) git_branch: str = None git_commit: str = None repository_url: str = None training_script_path: str = None training_id: str = None artifact_dir: str = None
[docs] @classmethod def fetch_training_code_details( cls, training_script_path: str = None, training_id: str = None, artifact_dir: str = None, ): """Fetches the training code details: repo, git_branch, git_commit, repository_url, training_script_path and training_id. Parameters ---------- training_script_path: (str, optional). Defaults to None. Training script path. training_id: (str, optional). Defaults to None. The training OCID for model. artifact_dir: str artifact directory to store the files needed for deployment. Returns ------- ModelProvenanceMetadata A ModelProvenanceMetadata instance. """ git_dir = CURRENT_WORKING_DIR if training_script_path: if not os.path.exists(training_script_path): logger.warning( f"Training script {os.path.abspath(training_script_path)} does not exist." ) else: training_script_path = os.path.abspath(training_script_path) git_dir = os.path.dirname(training_script_path) repo = git.Repo(git_dir, search_parent_directories=True) # get repository url if len(repo.remotes) > 0: repository_url = ( repo.remotes.origin.url if repo.remotes.origin in repo.remotes else list(repo.remotes.values())[0].url ) else: repository_url = "file://" + repo.working_dir # no remote repo # get git commit git_branch = None git_commit = None try: # get git branch git_branch = format(repo.active_branch) git_commit = format(str(repo.head.commit.hexsha)) or None except Exception: logger.warning("No commit found.") return cls( repo=repo, git_branch=git_branch, git_commit=git_commit, repository_url=repository_url, training_script_path=training_script_path, training_id=training_id, artifact_dir=artifact_dir, )
[docs] def assert_path_not_dirty(self, path: str, ignore: bool): """Checks if all the changes in this path has been commited. Parameters ---------- path: (str) path. ignore (bool) whether to ignore the changes or not. Raises ------ ChangesNotCommitted: if there are changes not being commited. Returns ------- None Nothing. """ if ObjectStorageDetails.is_oci_path(path): return if self.repo is not None and not ignore: path_abs = os.path.abspath(path) if ( os.path.commonpath([path_abs, self.repo.working_dir]) == self.repo.working_dir ): path_relpath = os.path.relpath(path_abs, self.repo.working_dir) if self.repo.is_dirty(path=path_relpath) or any( [ os.path.commonpath([path_relpath, untracked]) == path_relpath for untracked in self.repo.untracked_files ] ): raise ChangesNotCommitted(path_abs)
def _to_oci_metadata(self) -> oci.data_science.models.ModelProvenance: """Convert to `oci.data_science.models.ModelProvenance` object. Returns ------- oci.data_science.models.ModelProvenance OCI model provenance object. """ return oci.data_science.models.ModelProvenance( repository_url=self.repository_url, git_branch=self.git_branch, git_commit=self.git_commit, script_dir=self.artifact_dir, training_script=self.training_script_path, training_id=self.training_id, ) @classmethod def _from_oci_metadata( cls, model_provenance: oci.data_science.models.ModelProvenance ) -> "ModelProvenanceMetadata": """Creates a new model provenance metadata item from the `oci.data_science.models.ModelProvenance` object. Returns ------- ModelProvenanceMetadata Model provenance metadata object. """ return ModelProvenanceMetadata( repo=model_provenance.repository_url, git_branch=model_provenance.git_branch, git_commit=model_provenance.git_commit, repository_url=model_provenance.repository_url, training_script_path=model_provenance.training_script, training_id=model_provenance.training_id, artifact_dir=model_provenance.script_dir, )
[docs] @classmethod def from_dict(cls, data: Dict[str, str]) -> "ModelProvenanceMetadata": """Constructs an instance of ModelProvenanceMetadata from a dictionary. Parameters ---------- data : Dict[str,str] Model provenance metadata in dictionary format. Returns ------- ModelProvenanceMetadata An instance of ModelProvenanceMetadata. """ return cls(**data or {})
[docs] def to_dict(self) -> dict: """Serializes model provenance metadata into a dictionary. Returns ------- Dict The dictionary representation of the model provenance metadata. """ return { f.name: getattr(self, f.name) for f in fields(self) if ("serializable" not in f.metadata or f.metadata["serializable"]) }
def __repr__(self): """Returns printable version of object. Parameters ---------- string Serialized version of object as a YAML string """ return self.to_yaml()