Source code for ads.feature_engineering.accessor.mixin.feature_types_mixin

#!/usr/bin/env python
# -*- coding: utf-8 -*--

# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at

The module that represents the ADS Feature Types Mixin class that extends
Pandas Series and Dataframe accessors.

        ADS Feature Types Mixin class that extends Pandas Series and Dataframe accessors.
import inspect
from typing import Union

import pandas as pd
import tabulate
from ads.feature_engineering.feature_type.base import FeatureType
from ads.feature_engineering.feature_type_manager import (
    FeatureTypeManager as feature_type_manager,
from ads.feature_engineering.feature_type_manager import _feature_type_by_dtype

[docs]class ADSFeatureTypesMixin: """ADS Feature Types Mixin class that extends Pandas Series and DataFrame accessors. Methods ------- warning_registered(cls) -> pd.DataFrame Lists registered warnings for registered feature types. validator_registered(cls) -> pd.DataFrame Lists registered validators for registered feature types. help(self, prop: str = None) -> None Help method that prints either a table of available properties or, given a property, returns its docstring. """ @staticmethod def _feature_type_by_dtype(dtype) -> FeatureType: """Determines feature type by DataFrame dtype. Parameters ---------- dtype: pd.DataFrame.dtypes The Pandas series data type. Returns ------- FeatureType The subclass of FeatureType. """ return _feature_type_by_dtype(dtype) @staticmethod def _is_type_registered(feature_type: Union[FeatureType, str]) -> bool: """Checks if provided feature type is registered in the system. Parameters ---------- feature_type: Union[FeatureType, str] The FeatureType subclass or a str indicating feature type. Returns ------- bool True if provided feature type registered, False otherwise. """ return feature_type_manager.is_type_registered(feature_type) @staticmethod def _get_type(feature_type: Union[FeatureType, str]) -> FeatureType: """Gets a feature type by class object or name. Parameters ---------- feature_type: Union[FeatureType, str] The FeatureType subclass or a str indicating feature type. Returns ------- FeatureType Found feature type. """ return feature_type_manager.feature_type_object(feature_type)
[docs] def warning_registered(self) -> pd.DataFrame: """Lists registered warnings for all registered feature types. Returns ------- :class:`pandas.DataFrame` The list of registered warnings for registered feature types. Examples -------- >>> Column Feature Type Warning Handler ------------------------------------------------------------------------- 0 Age continuous zeros zeros_handler 1 Age continuous high_cardinality high_cardinality_handler >>> df["Age"].ads.warning_registered() Feature Type Warning Handler --------------------------------------------------------------- 0 continuous zeros zeros_handler 1 continuous high_cardinality high_cardinality_handler """ common_columns = ["Feature Type", "Warning", "Handler"] if isinstance(self._obj, pd.DataFrame): result_df = pd.DataFrame((), columns=["Column"] + common_columns) for col in self._obj.columns: feature_type_df = self._obj[col].ads.warning_registered() feature_type_df.insert(0, "Column", col) result_df = result_df.append(feature_type_df) else: result_df = pd.DataFrame((), columns=common_columns) for feature_type in self._feature_type: feature_type_df = feature_type.warning.registered() feature_type_df.insert(0, "Feature Type", feature_type_df = feature_type_df.rename(columns={"Name": "Warning"}) result_df = result_df.append(feature_type_df) result_df.reset_index(drop=True, inplace=True) return result_df
[docs] def validator_registered(self) -> pd.DataFrame: """Lists registered validators for registered feature types. Returns ------- :class:`pandas.DataFrame` The list of registered validators for registered feature types Examples -------- >>> Column Feature Type Validator Condition Handler ------------------------------------------------------------------------------------------------------ 0 PhoneNumber phone_number is_phone_number () default_handler 1 PhoneNumber phone_number is_phone_number {'country_code': '+7'} specific_country_handler 2 CreditCard credit_card is_credit_card () default_handler >>> df['PhoneNumber'].ads.validator_registered() Feature Type Validator Condition Handler ------------------------------------------------------------------------------------------- 0 phone_number is_phone_number () default_handler 1 phone_number is_phone_number {'country_code': '+7'} specific_country_handler """ common_columns = ["Feature Type", "Validator", "Condition", "Handler"] if isinstance(self._obj, pd.DataFrame): result_df = pd.DataFrame((), columns=["Column"] + common_columns) for col in self._obj.columns: feature_type_df = self._obj[col].ads.validator_registered() feature_type_df.insert(0, "Column", col) result_df = result_df.append(feature_type_df) else: result_df = pd.DataFrame((), columns=common_columns) for feature_type in self._feature_type: feature_type_df = feature_type.validator.registered() feature_type_df.insert(0, "Feature Type", feature_type_df = feature_type_df.rename(columns={"Name": "Validator"}) result_df = result_df.append(feature_type_df) result_df.reset_index(drop=True, inplace=True) return result_df
[docs] def help(self, prop: str = None) -> None: """Help method that prints either a table of available properties or, given an individual property, returns its docstring. Parameters ---------- prop : str The Name of property. Returns ------- None Nothing. """ if prop: if hasattr(self, prop): print(inspect.getdoc(getattr(self, prop))) return print(f"Property {prop} not found.") return methods = set() attrs = set() def get_attr(c): for attr in dir(c): if not attr.startswith("__") and not attr.startswith("_"): attr_obj = getattr(c, attr) if callable(attr_obj): doc = inspect.getdoc(attr_obj) if doc and len(doc.split(".")) > 0: methods.add((attr, doc.split(".")[0])) else: methods.add((attr, "method")) else: if hasattr(c.__class__, attr): attr_obj = getattr(c.__class__, attr) doc = inspect.getdoc(attr_obj) attrs.add((attr, doc.split(".")[0])) get_attr(self) props = sorted(list(methods) + list(attrs)) print(tabulate.tabulate(props, headers=("Property", "Description")))