#!/usr/bin/env python
# -*- coding: utf-8 -*--
# Copyright (c) 2021, 2023 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
"""
The module that helps to register custom validators for the feature types and
extending registered validators with dispatching based on the specific arguments.
Classes
-------
FeatureValidator
The Feature Validator class to manage custom validators.
FeatureValidatorMethod
The Feature Validator Method class. Extends methods which requires
dispatching based on the specific arguments.
"""
import inspect
from typing import Any, Callable, Dict, Tuple, Union
import pandas as pd
[docs]
class WrongHandlerMethodSignature(ValueError):
def __init__(self, handler_name: str, condition: str, handler_signature: str):
super().__init__(
f"The registered condition {condition} is not compatible "
f"with the provided {handler_name} method. "
f"Expected parameters: {handler_signature}"
)
[docs]
class ValidatorNotFound(ValueError):
def __init__(self, name: str):
super().__init__(f"Validator {name} not found.")
[docs]
class ValidatorWithConditionNotFound(ValueError):
def __init__(self, name: str):
super().__init__(f"Validator {name} with provided condition not found.")
[docs]
class ValidatorAlreadyExists(ValueError):
def __init__(self, name: str):
super().__init__(f"Validator {name} already exists.")
[docs]
class ValidatorWithConditionAlreadyExists(ValueError):
def __init__(self, name: str):
super().__init__(f"Validator {name} with provided condition already exists.")
def _prepare_condition(params: Union[Tuple, Dict[str, Any]]) -> Tuple:
"""Converts provided parameters to Tuple.
Parameters
-----------
params: (Union[Tuple, Dict[str, Any]])
The condition which will be used to register a new validator.
Returns
-------
Tuple
Prepared condition.
Raises
------
ValueError
If condition not provided or provided in the wrong format.
"""
if not params:
raise ValueError("Condition not provided.")
if not isinstance(params, (dict, tuple)):
raise ValueError(
"Wrong format for the condition. Condition should be dict or list."
)
if not isinstance(params, tuple):
return tuple((key, params[key]) for key in params)
return params
[docs]
class FeatureValidator:
"""The Feature Validator class to manage custom validators.
Methods
-------
register(self, name: str, handler: Callable, condition: Union[Tuple, Dict[str, Any]] = None, replace: bool = False) -> None
Registers new validator.
unregister(self, name: str, condition: Union[Tuple, Dict[str, Any]] = None) -> None
Unregisters validator.
registered(self) -> pd.DataFrame
Gets the list of registered validators.
Examples
--------
>>> series = pd.Series(['+1-202-555-0141', '+1-202-555-0142'], name='Phone Number')
>>> def phone_number_validator(data: pd.Series) -> pd.Series:
... print("phone_number_validator")
... return data
>>> def universal_phone_number_validator(data: pd.Series, country_code) -> pd.Series:
... print("universal_phone_number_validator")
... return data
>>> def us_phone_number_validator(data: pd.Series, country_code) -> pd.Series:
... print("us_phone_number_validator")
... return data
>>> PhoneNumber.validator.register(name="is_phone_number", handler=phone_number_validator, replace=True)
>>> PhoneNumber.validator.register(name="is_phone_number", handler=universal_phone_number_validator, condition = ('country_code',))
>>> PhoneNumber.validator.register(name="is_phone_number", handler=us_phone_number_validator, condition = {'country_code':'+1'})
>>> PhoneNumber.validator.is_phone_number(series)
phone_number_validator
0 +1-202-555-0141
1 +1-202-555-0142
>>> PhoneNumber.validator.is_phone_number(series, country_code = '+7')
universal_phone_number_validator
0 +1-202-555-0141
1 +1-202-555-0142
>>> PhoneNumber.validator.is_phone_number(series, country_code = '+1')
us_phone_number_validator
0 +1-202-555-0141
1 +1-202-555-0142
>>> PhoneNumber.validator.registered()
Validator Condition Handler
---------------------------------------------------------------------------------
0 is_phone_number () phone_number_validator
1 is_phone_number ('country_code') universal_phone_number_validator
2 is_phone_number {'country_code': '+1'} us_phone_number_validator
>>> series.ads.validator.is_phone_number()
phone_number_validator
0 +1-202-555-0141
1 +1-202-555-0142
>>> series.ads.validator.is_phone_number(country_code = '+7')
universal_phone_number_validator
0 +1-202-555-0141
1 +1-202-555-0142
>>> series.ads.validator.is_phone_number(country_code = '+1')
us_phone_number_validator
0 +1-202-555-0141
1 +1-202-555-0142
"""
def __init__(self):
"""Initializes the FeatureValidator."""
self._validators = {}
[docs]
def register(
self,
name: str,
handler: Callable,
condition: Union[Tuple, Dict[str, Any]] = None,
replace: bool = False,
) -> None:
"""Registers new validator.
Parameters
----------
name : str
The validator name.
handler: callable
The handler.
condition: Union[Tuple, Dict[str, Any]]
The condition for the validator.
replace: bool
The flag indicating if the registered validator should be replaced with the new one.
Returns
-------
None
Nothing.
Raises
------
ValueError
The name is empty or handler is not provided.
TypeError
The handler is not callable.
The name of the validator is not a string.
ValidatorAlreadyExists
The validator is already registered.
"""
if not name:
raise ValueError("Validator name is not provided.")
if not isinstance(name, str):
raise TypeError("Validator name should be a string.")
if not replace and name in self._validators:
if not condition:
raise ValidatorAlreadyExists(name)
if self._validators[name]._has_condition(condition):
raise ValidatorWithConditionAlreadyExists(name)
if not handler:
raise ValueError("Handler is not provided.")
if not callable(handler):
raise TypeError("Handler should be a function.")
if condition:
self._validators[name].register(condition=condition, handler=handler)
else:
self._validators[name] = FeatureValidatorMethod(handler)
[docs]
def unregister(
self, name: str, condition: Union[Tuple, Dict[str, Any]] = None
) -> None:
"""Unregisters validator.
Parameters
----------
name: str
The name of the validator to be unregistered.
condition: Union[Tuple, Dict[str, Any]]
The condition for the validator to be unregistered.
Returns
-------
None
Nothing.
Raises
------
TypeError
The name of the validator is not a string.
ValidatorNotFound
The validator not found.
ValidatorWIthConditionNotFound
The validator with provided condition not found.
"""
if not name:
raise ValueError("Validator name is not provided.")
if not isinstance(name, str):
raise TypeError("Validator name should be a string.")
if name not in self._validators:
raise ValidatorNotFound(name)
if condition and not self._validators[name]._has_condition(condition):
raise ValidatorWithConditionNotFound(name)
if condition:
self._validators[name].unregister(condition)
else:
del self._validators[name]
[docs]
def registered(self) -> pd.DataFrame:
"""Gets the list of registered validators.
Returns
-------
pd.DataFrame
The list of registerd validators.
"""
result_df = pd.DataFrame((), columns=["Validator", "Condition", "Handler"])
for key, feature_validator in self._validators.items():
feature_validators_df = feature_validator.registered()
feature_validators_df.insert(0, "Validator", key)
result_df = pd.concat([result_df, feature_validators_df])
result_df.reset_index(drop=True, inplace=True)
return result_df
def _bind_data(self, data: pd.Series) -> None:
"""Binds the data to the all registered validators.
Parameters
----------
data: pd.Series
The data to be processed.
"""
for validator in self._validators.values():
validator._bind_data(data)
def __getattr__(self, attr):
"""Makes it possible to invoke registered validators as a regular method."""
if attr in self._validators:
return self._validators[attr]
raise AttributeError(attr)
[docs]
class FeatureValidatorMethod:
"""The Feature Validator Method class.
Extends methods which requires dispatching based on the specific arguments.
Methods
-------
register(self, condition: Union[Tuple, Dict[str, Any]], handler: Callable) -> None
Registers new handler.
unregister(self, condition: Union[Tuple, Dict[str, Any]]) -> None
Unregisters existing handler.
registered(self) -> pd.DataFrame
Gets the list of registered handlers.
"""
def __init__(self, handler: Callable):
"""Initializes the Feature Validator Method.
Parameters
----------
handler: Callable
The handler that will be called by default if suitable one not found.
"""
if not handler:
raise ValueError("Default handler is not specified.")
self._default_handler = handler
self._handlers = {}
self._data = None
[docs]
def register(
self, condition: Union[Tuple, Dict[str, Any]], handler: Callable
) -> None:
"""Registers new handler.
Parameters
-----------
condition: Union[Tuple, Dict[str, Any]]
The condition which will be used to register a new handler.
handler: Callable
The handler to be registered.
Returns
-------
None
Nothing.
Raises
------
ValueError
If condition not provided or provided in the wrong format.
If handler not provided or has wrong format.
"""
if not condition:
raise ValueError("Condition not provided.")
if not isinstance(condition, (dict, tuple)):
raise ValueError(
"Wrong format for the condition. Condition should be dict or list."
)
if not handler or not callable(handler):
raise ValueError("Handler not provided. Handler should be a function.")
prepared_condition = _prepare_condition(condition)
# self.__validate_handler_signature(handler)
self._handlers[prepared_condition] = handler
self._data = None
[docs]
def unregister(self, condition: Union[Tuple, Dict[str, Any]]) -> None:
"""Unregisters existing handler.
Parameters
-----------
condition: Union[Tuple, Dict[str, Any]]
The condition which will be used to unregister a handler.
Returns
-------
None
Nothing.
Raises
------
ValueError
If condition not provided or provided in the wrong format.
If condition not registered.
"""
if not condition:
raise ValueError("Condition not provided.")
if not isinstance(condition, (dict, tuple)):
raise ValueError(
"Wrong format for the condition. Condition should be dict or list."
)
prepared_condition = _prepare_condition(condition)
if prepared_condition not in self._handlers:
raise ValueError("Condition not registered.")
del self._handlers[prepared_condition]
[docs]
def registered(self) -> pd.DataFrame:
"""Gets the list of registered handlers.
Returns
-------
pd.DataFrame
The list of registerd handlers.
"""
result = [("()", self._default_handler.__name__)]
for key, value in self._handlers.items():
try:
str_key = str(dict(key))
except ValueError:
str_key = str(key)
result.append((str_key, value.__name__))
return pd.DataFrame(result, columns=["Condition", "Handler"])
def _process(self, *args, **kwargs) -> pd.Series:
"""Finds and invokes a suitable handler for the provided condition.
Parameters
----------
*args
Variable length argument list.
**kwargs
Arbitrary keyword arguments. Parameters to search suitable handler.
Returns
-------
pd.Series
The result of invoked handler.
"""
if kwargs:
for key in (
tuple((key, kwargs[key]) for key in kwargs),
tuple(kwargs.keys()),
):
if key in self._handlers:
return self._handlers[key](self._data, *args, **kwargs)
return self._default_handler(self._data, *args, **kwargs)
def _bind_data(self, data: pd.Series) -> None:
"""Binds the data to the validator.
Parameters
----------
data: pd.Series
The data to be processed.
"""
self._data = data
def _validate_handler_signature(
self, condition: Union[Tuple, Dict[str, Any]], handler: Callable
) -> bool:
"""Validates handler signature.
Parameters
----------
condition: Union[Tuple, Dict[str, Any]]
The condition to validate.
handler: Callabe
The hanlder to validate.
Returns
-------
bool
True if provided condition and handler arguments compatible.
Raises
-------
WrongHandlerMethodSignature
If provided condition and handler arguments not compatible.
"""
prepared_condition = _prepare_condition(condition)
handler_args = list(inspect.signature(handler).parameters.keys())
params_args = ["data"] + (
list(arg[0] for arg in prepared_condition)
if isinstance(prepared_condition[0], tuple)
else list(prepared_condition)
)
if handler_args != params_args:
raise WrongHandlerMethodSignature(
handler.__name__, str(params_args), str(handler_args)
)
return True
def _has_condition(self, condition: Union[Tuple, Dict[str, Any]]) -> bool:
"""Checks whether provided condition registered or not.
Parameters
----------
condition: Union[Tuple, Dict[str, Any]]
The condition to check.
Returns
-------
bool
True if condition registered, False othervise.
Raises
------
ValueError
If condition not provided or has wrong format.
"""
if not condition:
raise ValueError("Condition not provided.")
if not isinstance(condition, (dict, tuple)):
raise ValueError(
"Wrong format for the condition. Condition should be dict or list."
)
prepared_condition = _prepare_condition(condition)
return prepared_condition in self._handlers
def __call__(self, *args, **kwargs) -> pd.Series:
"""Makes class instance callable.
Parameters
----------
*args
Variable length argument list.
**kwargs
Arbitrary keyword arguments.
Returns
-------
pd.Series
The result of processing data.
Raises
------
ValueError
If data is not provided,
TypeError
If data has wrong format.
"""
if args and len(args) > 0:
self._data = args[0]
if self._data is None:
raise ValueError("Data is not provided.")
if not isinstance(self._data, pd.Series):
raise TypeError("Wrong data format. Data should be Series.")
return self._process(**kwargs)