ads.feature_engineering package#

Subpackages#

Submodules#

ads.feature_engineering.exceptions module#

exception ads.feature_engineering.exceptions.InvalidFeatureType(tname: str)[source]#

Bases: TypeError

exception ads.feature_engineering.exceptions.NameAlreadyRegistered(name: str)[source]#

Bases: NameError

exception ads.feature_engineering.exceptions.TypeAlreadyAdded(tname: str)[source]#

Bases: TypeError

exception ads.feature_engineering.exceptions.TypeAlreadyRegistered(tname: str)[source]#

Bases: TypeError

exception ads.feature_engineering.exceptions.TypeNotFound(tname: str)[source]#

Bases: TypeError

exception ads.feature_engineering.exceptions.WarningAlreadyExists(name: str)[source]#

Bases: ValueError

exception ads.feature_engineering.exceptions.WarningNotFound(name: str)[source]#

Bases: ValueError

ads.feature_engineering.feature_type_manager module#

The module that helps to manage feature types. Provides functionalities to register, unregister, list feature types.

Classes#

FeatureTypeManager

Feature Types Manager class that manages feature types.

Examples

>>> from ads.feature_engineering.feature_type.base import FeatureType
>>> class NewType(FeatureType):
...    description="My personal type."
...    pass
>>> FeatureTypeManager.feature_type_register(NewType)
>>> FeatureTypeManager.feature_type_registered()
            Name        Feature Type                                  Description
---------------------------------------------------------------------------------
0     Continuous          continuous          Type representing continuous values.
1       DateTime           date_time           Type representing date and/or time.
2       Category            category  Type representing discrete unordered values.
3        Ordinal             ordinal             Type representing ordered values.
4        NewType            new_type                             My personal type.
>>> FeatureTypeManager.warning_registered()
    Feature Type             Warning                    Handler
----------------------------------------------------------------------
0     continuous               zeros              zeros_handler
1     continuous    high_cardinality   high_cardinality_handler
>>> FeatureTypeManager.validator_registered()
    Feature Type            Validator                 Condition                     Handler
-------------------------------------------------------------------------------------------
0   phone_number      is_phone_number                        ()             default_handler
1   phone_number      is_phone_number    {'country_code': '+7'}    specific_country_handler
2    credit_card       is_credit_card                        ()             default_handler
>>> FeatureTypeManager.feature_type_unregister(NewType)
>>> FeatureTypeManager.feature_type_reset()
>>> FeatureTypeManager.feature_type_object('continuous')
Continuous
class ads.feature_engineering.feature_type_manager.FeatureTypeManager[source]#

Bases: object

Feature Types Manager class that manages feature types.

Provides functionalities to register, unregister, list feature types.

feature_type_object(cls, feature_type: FeatureType | str) FeatureType[source]#

Gets a feature type by class object or name.

feature_type_register(cls, feature_type_cls: FeatureType) None[source]#

Registers a feature type.

feature_type_unregister(cls, feature_type_cls: FeatureType | str) None[source]#

Unregisters a feature type.

feature_type_reset(cls) None[source]#

Resets feature types to be default.

feature_type_registered(cls) pd.DataFrame[source]#

Lists all registered feature types as a DataFrame.

warning_registered(cls) pd.DataFrame[source]#

Lists registered warnings for all registered feature types.

validator_registered(cls) pd.DataFrame[source]#

Lists registered validators for all registered feature types.

Examples

>>> from ads.feature_engineering.feature_type.base import FeatureType
>>> class NewType(FeatureType):
...    pass
>>> FeatureTypeManager.register_feature_type(NewType)
>>> FeatureTypeManager.feature_type_registered()
            Name      Feature Type                                  Description
-------------------------------------------------------------------------------
0     Continuous        continuous          Type representing continuous values.
1       DateTime         date_time           Type representing date and/or time.
2       Category          category  Type representing discrete unordered values.
3        Ordinal           ordinal             Type representing ordered values.
>>> FeatureTypeManager.warning_registered()
    Feature Type             Warning                    Handler
----------------------------------------------------------------------
0     continuous               zeros              zeros_handler
1     continuous    high_cardinality   high_cardinality_handler
>>> FeatureTypeManager.validator_registered()
    Feature Type            Validator                 Condition                     Handler
-------------------------------------------------------------------------------------------
0   phone_number      is_phone_number                        ()             default_handler
1   phone_number      is_phone_number    {'country_code': '+7'}    specific_country_handler
2    credit_card       is_credit_card                        ()             default_handler
>>> FeatureTypeManager.feature_type_unregister(NewType)
>>> FeatureTypeManager.feature_type_reset()
>>> FeatureTypeManager.feature_type_object('continuous')
Continuous
classmethod feature_type_object(feature_type: FeatureType | str) FeatureType[source]#

Gets a feature type by class object or name.

Parameters:

feature_type (Union[FeatureType, str]) – The FeatureType subclass or a str indicating feature type.

Returns:

Found feature type.

Return type:

FeatureType

Raises:
  • TypeNotFound – If provided feature type not registered.

  • TypeError – If provided feature type not a subclass of FeatureType.

classmethod feature_type_register(feature_type_cls: FeatureType) None[source]#

Registers new feature type.

Parameters:

feature_type (FeatureType) – Subclass of FeatureType to be registered.

Returns:

Nothing.

Return type:

None

Raises:
  • TypeError – Type is not a subclass of FeatureType.

  • TypeError – Type has already been registered.

  • NameError – Name has already been used.

classmethod feature_type_registered() DataFrame[source]#

Lists all registered feature types as a DataFrame.

Returns:

The list of feature types in a DataFrame format.

Return type:

pd.DataFrame

classmethod feature_type_reset() None[source]#

Resets feature types to be default.

Returns:

Nothing.

Return type:

None

classmethod feature_type_unregister(feature_type: FeatureType | str) None[source]#

Unregisters a feature type.

Parameters:

feature_type ((FeatureType | str)) – The FeatureType subclass or a str indicating feature type.

Returns:

Nothing.

Return type:

None

Raises:

TypeError – In attempt to unregister a default feature type.

classmethod is_type_registered(feature_type: FeatureType | str) bool[source]#

Checks if provided feature type registered in the system.

Parameters:

feature_type (Union[FeatureType, str]) – The FeatureType subclass or a str indicating feature type.

Returns:

True if provided feature type registered, False otherwise.

Return type:

bool

classmethod validator_registered() DataFrame[source]#

Lists registered validators for registered feature types.

Returns:

The list of registered validators for registered feature types in a DataFrame format.

Return type:

pd.DataFrame

Examples

>>> FeatureTypeManager.validator_registered()
    Feature Type            Validator                 Condition                     Handler
-------------------------------------------------------------------------------------------
0   phone_number      is_phone_number                        ()             default_handler
1   phone_number      is_phone_number    {'country_code': '+7'}    specific_country_handler
2    credit_card       is_credit_card                        ()             default_handler
classmethod warning_registered() DataFrame[source]#

Lists registered warnings for all registered feature types.

Returns:

The list of registered warnings for registered feature types in a DataFrame format.

Return type:

pd.DataFrame

Examples

>>> FeatureTypeManager.warning_registered()
    Feature Type             Warning                    Handler
----------------------------------------------------------------------
0     continuous               zeros              zeros_handler
1     continuous    high_cardinality   high_cardinality_handler

ads.feature_engineering.schema module#

class ads.feature_engineering.schema.Attribute(dtype: str, feature_type: str, name: str, domain: Domain, required: bool, description: str, order: int | None = None)[source]#

Bases: DataClassSerializable

Attribute describes the column/feature/element. It holds following information - * dtype - Type of data - float, int64, etc. Matches with Pandas dtypes * feature_type - Feature type of data - Integer, String, etc. Matches with ads feature types. * name - Name of the feature * domain - Represented by the Domain class * required - Boolean - True or False * description - Description about the column/feature * order - order of the column/feature in the data

Examples

>>> attr_fruits = Attribute(
...     dtype = "category",
...     feature_type = "category",
...     name = "fruits",
...     domain = Domain(values="Apple, Orange, Grapes", stats={"mode": "Orange"}, constraints=[Expression("in ['Apple', 'Orange', 'Grapes']")]),
...     required = True,
...     description = "Names of fruits",
...     order = 0
... )
>>> attr_fruits
description: Names of fruits
domain:
    constraints:
    - expression: in ['Apple', 'Orange', 'Grapes']
        language: python
    stats:
        mode: Orange
    values: Apple, Orange, Grapes
dtype: category
feature_type: category
name: fruits
order: 0
required: true
>>> attr_fruits.key
'fruits'
description: str#
domain: Domain#
dtype: str#
feature_type: str#
property key#
name: str#
order: int | None = None#
required: bool#
sort_index: int#
to_dict(**kwargs) dict[source]#

Serializes instance of class into a dictionary

kwargs#

side_effect: Optional[SideEffect]

side effect to take on the dictionary. The side effect can be either convert the dictionary keys to “lower” (SideEffect.CONVERT_KEYS_TO_LOWER.value) or “upper”(SideEffect.CONVERT_KEYS_TO_UPPER.value) cases.

returns:

A dictionary.

rtype:

Dict

class ads.feature_engineering.schema.BaseSchemaLoader[source]#

Bases: ABC

Base Schema Loader which load and validate schema.

load_schema(self)[source]#

Load and validate schema from a file and return the normalized schema.

load_schema(schema_path)[source]#

Load and validate schema from a file and return the normalized schema.

exception ads.feature_engineering.schema.DataSizeTooWide(data_col_num: int, max_col_num: int)[source]#

Bases: ValueError

class ads.feature_engineering.schema.Domain(values: str = '', stats: ~typing.Dict = <factory>, constraints: ~typing.List[~ads.feature_engineering.schema.Expression] = <factory>)[source]#

Bases: DataClassSerializable

Domain describes the data. It holds following information - * stats - Statistics of the data. * constraints - List of Expression which defines the constraint for the data. * Domain values.

Examples

>>> Domain(values='Rational Numbers', stats={"mean":50, "median":51, "min": 5, "max":100}, constraints=[Expression('$x > 5')])
constraints:
- expression: $x > 5
    language: python
stats:
    max: 100
    mean: 50
    median: 51
    min: 5
values: Rational Numbers
constraints: List[Expression]#
stats: Dict#
values: str = ''#
class ads.feature_engineering.schema.Expression(expression: str, language: str = 'python')[source]#

Bases: DataClassSerializable

Expression allows specifying string representation of an expression which can be evaluated by the language corresponding to the value provided in langauge attribute

Default value for language is python

Parameters:
  • exression (Must use string.Template format for specifying the exression) – type: str

  • language (default value is python. It could be any language. evaluate method expects the expression to be of type python) –

Examples

>>> exp = Expression("($x > 10 and $x <100) or ($x < -1 and $x > -500)")
>>> exp.evaluate(x=500)
False
>>> exp.evaluate(x=20)
True
>>> exp.evaluate(x=9)
False
>>> exp.evaluate(x=-9)
True
evaluate(**kwargs)[source]#
expression: str#
language: str = 'python'#
class ads.feature_engineering.schema.JsonSchemaLoader[source]#

Bases: BaseSchemaLoader

Json Schema which load and validate schema from json file.

load_schema(self)#

Load and validate schema from json file and return the normalized schema.

Examples

>>> schema_loader = JsonSchemaLoader()
>>> schema_dict = schema_loader.load_schema('schema.json')
>>> schema_dict
{'Schema': [{'dtype': 'object',
    'feature_type': 'String',
    'name': 'Attrition',
    'domain': {'values': 'String',
        'stats': {'count': 1470, 'unique': 2},
        'constraints': []},
    'required': True,
    'description': 'Attrition'},
    {'dtype': 'int64',
    'feature_type': 'Integer',
    'name': 'Age',
    'domain': {'values': 'Integer',
        'stats': {'count': 1470.0,
        'mean': 37.923809523809524,
        'std': 9.135373489136732,
        'min': 19.0,
        '25%': 31.0,
        '50%': 37.0,
        '75%': 44.0,
        'max': 61.0},
        'constraints': []},
    'required': True,
    'description': 'Age'}]}
class ads.feature_engineering.schema.Schema(_version: str = '1.1')[source]#

Bases: object

Schema describes the structure of the data.

add(self, item: Attribute, replace: bool = False)[source]#

Adds a new attribute item. Replaces existing one if replace flag is True.

from_dict(self)[source]#

Constructs an instance of Schema from a dictionary.

from_file(cls, file_path):

Loads the data schema from a file.

to_dict(self)[source]#

Serializes the data schema into a dictionary.

to_yaml(self)[source]#

Serializes the data schema into a YAML.

to_json(self)[source]#

Serializes the data schema into a json string.

to_json_file(self)[source]#

Saves the data schema into a json file.

to_yaml_file(self)[source]#

Save to a yaml file.

add(self, item: Attribute, replace=False) None[source]#

Adds a new attribute item. Replaces existing one if replace flag is True.

Examples

>>> attr_fruits = Attribute(
...     dtype = "category",
...     feature_type = "category",
...     name = "fruits",
...     domain = Domain(values="Apple, Orange, Grapes", stats={"mode": "Orange"}, constraints=[Expression("in ['Apple', 'Orange', 'Grapes']")]),
...     required = True,
...     description = "Names of fruits",
...     order = 0,
... )
>>> attr_animals = Attribute(
...     dtype = "category",
...     feature_type = "category",
...     name = "animals",
...     domain = Domain(values="Dog, Cat, Python", stats={"mode": "Dog"}, constraints=[Expression("in ['Dog', 'Cat', 'Python']")]),
...     required = True,
...     description = "Names of animals",
...     order = 1,
... )
>>> schema = Schema()
>>> schema.add(attr_fruits)
>>> schema.add(attr_animals)
>>> schema
schema:
- description: Names of fruits
domain:
    constraints:
    - expression: in ['Apple', 'Orange', 'Grapes']
    language: python
    stats:
    mode: Orange
    values: Apple, Orange, Grapes
dtype: category
feature_type: category
name: fruits
order: 0
required: true
- description: Names of animals
domain:
    constraints:
    - expression: in ['Dog', 'Cat', 'Python']
    language: python
    stats:
    mode: Dog
    values: Dog, Cat, Python
dtype: category
feature_type: category
name: animals
order: 1
required: true
>>> schema.to_dict()
    {'schema': [{'dtype': 'category',
    'feature_type': 'category',
    'name': 'fruits',
    'domain': {'values': 'Apple, Orange, Grapes',
        'stats': {'mode': 'Orange'},
        'constraints': [{'expression': "in ['Apple', 'Orange', 'Grapes']",
        'language': 'python'}]},
    'required': True,
    'description': 'Names of fruits',
    'order': 0},
    {'dtype': 'category',
    'feature_type': 'category',
    'name': 'animals',
    'domain': {'values': 'Dog, Cat, Python',
        'stats': {'mode': 'Dog'},
        'constraints': [{'expression': "in ['Dog', 'Cat', 'Python']",
        'language': 'python'}]},
    'required': True,
    'description': 'Names of animals',
    'order': 1}]}
add(item: Attribute, replace: bool = False)[source]#

Adds a new attribute item. Replaces existing one if replace flag is True.

Overrides the existing one if replace flag is True.

Parameters:
  • item (Attribute) – The attribute instance of a column/feature/element.

  • replace (bool) – Overrides the existing attribute item if replace flag is True.

Returns:

Nothing.

Return type:

None

Raises:
  • ValueError – If item is already registered and replace flag is False.

  • TypeError – If input data has a wrong format.

classmethod from_dict(schema: dict)[source]#

Constructs an instance of Schema from a dictionary.

Parameters:

schema (dict) – Data schema in dictionary format.

Returns:

An instance of Schema.

Return type:

Schema

classmethod from_file(file_path: str)[source]#

Loads the data schema from a file.

Parameters:

file_path (str) – File Path to load the data schema.

Returns:

An instance of Schema.

Return type:

Schema

classmethod from_json(schema: str)[source]#

Constructs an instance of Schema from a Json.

Parameters:

schema (str) – Data schema in Json format.

Returns:

An instance of Schema.

Return type:

Schema

property keys: list#

Returns all registered Attribute keys.

Returns:

The list of Attribute keys.

Return type:

Tuple[str]

to_dict()[source]#

Serializes data schema into a dictionary.

Returns:

The dictionary representation of data schema.

Return type:

dict

to_json()[source]#

Serializes the data schema into a json string. :returns: The json representation of data schema. :rtype: str

to_json_file(file_path, storage_options: dict | None = None)[source]#

Saves the data schema into a json file.

Parameters:
  • file_path (str) – File Path to store the schema in json format.

  • storage_options (dict. Default None) – Parameters passed on to the backend filesystem class. Defaults to storage_options set using DatasetFactory.set_default_storage().

Returns:

Nothing.

Return type:

None

to_yaml()[source]#

Serializes the data schema into a YAML. :returns: The yaml representation of data schema. :rtype: str

to_yaml_file(file_path)[source]#

Saves the data schema into a yaml file. :param file_path: File Path to store the schema in yaml format. :type file_path: str

Returns:

Nothing.

Return type:

None

validate_schema()[source]#

Validate the schema.

validate_size() bool[source]#

Validates schema size.

Validates the size of schema. Throws an error if the size of the schema exceeds expected value.

Returns:

True if schema does not exceeds the size limit.

Return type:

bool

Raises:

SchemaSizeTooLarge – If the size of the schema exceeds expected value.

class ads.feature_engineering.schema.SchemaFactory[source]#

Bases: object

Schema Factory.

register_format(self)[source]#

Register a new type of schema class.

get_schema(self)[source]#

Get the YamlSchema or JsonSchema based on the format.

default_schema(cls)[source]#

Construct a SchemaFactory instance and register yaml and json loader.

Examples

>>> factory = SchemaFactory.default_schema()
>>> schema_loader = factory.get_schema('.json')
>>> schema_dict = schema_loader.load_schema('schema.json')
>>> schema = Schema.from_dict(schema_dict)
>>> schema
Schema:
- description: Attrition
domain:
    constraints: []
    stats:
    count: 1470
    unique: 2
    values: String
dtype: object
feature_type: String
name: Attrition
required: true
- description: Age
domain:
    constraints: []
    stats:
    25%: 31.0
    50%: 37.0
    75%: 44.0
    count: 1470.0
    max: 61.0
    mean: 37.923809523809524
    min: 19.0
    std: 9.135373489136732
    values: Integer
dtype: int64
feature_type: Integer
name: Age
required: true
classmethod default_schema()[source]#
get_schema(file_format)[source]#

Get the YamlSchema or JsonSchema based on the format.

register_format(file_format, creator)[source]#

Register a new type of schema class.

exception ads.feature_engineering.schema.SchemaSizeTooLarge(size: int)[source]#

Bases: ValueError

class ads.feature_engineering.schema.YamlSchemaLoader[source]#

Bases: BaseSchemaLoader

Yaml Schema which loads and validates schema from a yaml file.

load_schema(self)#

Loads and validates schema from a yaml file and returns the normalized schema.

Examples

>>> schema_loader = YamlSchemaLoader()
>>> schema_dict = schema_loader.load_schema('schema.yaml')
>>> schema_dict
{'Schema': [{'description': 'Attrition',
    'domain': {'constraints': [],
        'stats': {'count': 1470, 'unique': 2},
        'values': 'String'},
    'dtype': 'object',
    'feature_type': 'String',
    'name': 'Attrition',
    'required': True},
    {'description': 'Age',
    'domain': {'constraints': [],
        'stats': {'25%': 31.0,
        '50%': 37.0,
        '75%': 44.0,
        'count': 1470.0,
        'max': 61.0,
        'mean': 37.923809523809524,
        'min': 19.0,
        'std': 9.135373489136732},
        'values': 'Integer'},
    'dtype': 'int64',
    'feature_type': 'Integer',
    'name': 'Age',
    'required': True}]}

ads.feature_engineering.utils module#

The module that represents utility functions.

Functions:
is_boolean(value: Any) -> bool

Checks if value type is boolean.

class ads.feature_engineering.utils.SchemeNeutral[source]#

Bases: str

AREA_DARK = '#9E9892'#
AREA_LIGHT = '#BCB6B1'#
BACKGROUND_DARK = '#E4E1DD'#
BACKGROUND_LIGHT = '#F5F4F2'#
LINE_DARK = '#47423E'#
LINE_LIGHT = '#665F5B'#
class ads.feature_engineering.utils.SchemeTeal[source]#

Bases: str

AREA_DARK = '#76A2A0'#
AREA_LIGHT = '#9ABFBF'#
BACKGROUND_DARK = '#D6E5E5'#
BACKGROUND_LIGHT = '#F0f6f5'#
LINE_DARK = '#2B484B'#
LINE_LIGHT = '#3E686C'#
ads.feature_engineering.utils.assign_issuer(cardnumber)[source]#
ads.feature_engineering.utils.is_boolean(value: Any) bool[source]#

Checks if value type is boolean.

Parameters:

value (Any) – The value to check.

Returns:

bool

Return type:

True if value is boolean, False otherwise.

ads.feature_engineering.utils.random_color_func(z, word=None, font_size=None, position=None, orientation=None, font_path=None, random_state=None)[source]#

Returns random color function use for color_func in creating WordCloud

Module contents#