Source code for ads.data_labeling.metadata

#!/usr/bin/env python
# -*- coding: utf-8; -*-

# Copyright (c) 2021, 2024 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

from dataclasses import asdict, dataclass, field
from typing import Any, Dict, List

import pandas as pd
from ads.common.object_storage_details import ObjectStorageDetails
from ads.common.serializer import DataClassSerializable
from oci.data_labeling_service_dataplane.models.dataset import Dataset as OCIDLSDataset


[docs] @dataclass class Metadata(DataClassSerializable): """The class that representing the labeled dataset metadata. Attributes ---------- source_path: str Contains information on where all the source data(image/text/document) stores. records_path: str Contains information on where records jsonl file stores. labels: List List of classes/labels for the dataset. dataset_name: str Dataset display name on the Data Labeling Service console. compartment_id: str Compartment id of the labeled dataset. dataset_id: str Dataset id. annotation_type: str Type of the labeling/annotation task. Currently supports SINGLE_LABEL, MULTI_LABEL, ENTITY_EXTRACTION, BOUNDING_BOX. dataset_type: str Type of the dataset. Currently supports Text, Image, DOCUMENT. """ source_path: str = "" records_path: str = "" labels: List[str] = field(default_factory=list) dataset_name: str = "" compartment_id: str = "" dataset_id: str = "" annotation_type: str = "" dataset_type: str = ""
[docs] def to_dict(self) -> Dict: """Converts to dictionary representation. Returns ------- Dict The metadata in dictionary type. """ return asdict(self)
def __repr__(self): """Show the Metadata in yaml format.""" return self.to_yaml()
[docs] def to_dataframe(self) -> pd.DataFrame: """ Converts the metadata to dataframe format. Returns ------- pandas.DataFrame The metadata in Pandas dataframe format. """ return pd.DataFrame({"": self.to_dict()})
def _repr_html_(self): """Shows metadata in dataframe format.""" return ( self.to_dataframe().style.set_properties(**{"margin-left": "0px"}).to_html() )
[docs] @classmethod def from_dls_dataset(cls, dataset: OCIDLSDataset) -> "Metadata": """Contructs a Metadata instance from OCI DLS dataset. Parameters ---------- dataset: OCIDLSDataset OCIDLSDataset object. Returns ------- Metadata The ads labeled dataset metadata instance. """ oci_labels = [d.name for d in dataset.label_set.items] oci_source_path = ObjectStorageDetails( dataset.dataset_source_details.bucket, dataset.dataset_source_details.namespace, dataset.dataset_source_details.prefix, ).path return Metadata( annotation_type=dataset.annotation_format, dataset_name=dataset.display_name, source_path=oci_source_path, labels=oci_labels, compartment_id=dataset.compartment_id, dataset_id=dataset.id, dataset_type=dataset.dataset_format_details.format_type, )