Source code for ads.feature_engineering.feature_type.ip_address

#!/usr/bin/env python
# -*- coding: utf-8 -*--

# Copyright (c) 2021 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

"""
The module that represents an IpAddress feature type.

Classes:
    IpAddress
        The IpAddress feature type.
"""
import pandas as pd
import re
from ads.feature_engineering.feature_type.base import FeatureType
from ads.feature_engineering.utils import _count_unique_missing
from ads.feature_engineering import schema

PATTERNV4 = re.compile(
    r"(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)",
    re.IGNORECASE,
)

PATTERNV6 = re.compile(
    r"\s*(?!.*::.*::)(?:(?!:)|:(?=:))(?:[0-9a-f]{0,4}(?:(?<=::)|(?<!::):)){6}(?:[0-9a-f]{0,4}(?:(?<=::)|(?<!::):)[0-9a-f]{0,4}(?:(?<=::)|(?<!:)|(?<=:)(?<!::):)|(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)){3})\s*",
    re.VERBOSE | re.IGNORECASE | re.DOTALL,
)



[docs]
def default_handler(data: pd.Series, *args, **kwargs) -> pd.Series:
    """Processes given data and indicates if the data matches requirements.

    Parameters
    ----------
    data: :class:`pandas.Series`
        The data to process.

    Returns
    -------
    :class:`pandas.Series`
        The logical list indicating if the data matches requirements.
    """

    def _is_ip_address(x):
        return not pd.isnull(x) and (
            PATTERNV4.match(str(x)) is not None or PATTERNV6.match(str(x)) is not None
        )

    return data.apply(lambda x: True if _is_ip_address(x) else False)




[docs]
class IpAddress(FeatureType):
    """
    Type representing IP Address.

    Attributes
    ----------
    description: str
        The feature type description.
    name: str
        The feature type name.
    warning: FeatureWarning
        Provides functionality to register warnings and invoke them.
    validator
        Provides functionality to register validators and invoke them.

    Methods
    --------
    feature_stat(x: pd.Series) -> pd.DataFrame
        Generates feature statistics.

    Example
    -------
    >>> from ads.feature_engineering.feature_type.ip_address import IpAddress
    >>> import pandas as pd
    >>> import numpy as np
    >>> s = pd.Series(['192.168.0.1', '2001:db8::', '', np.NaN, None], name='ip_address')
    >>> s.ads.feature_type = ['ip_address']
    >>> IpAddress.validator.is_ip_address(s)
    0     True
    1     True
    2    False
    3    False
    4    False
    Name: ip_address, dtype: bool
    """

    description = "Type representing IP Address."


[docs]
    @staticmethod
    def feature_stat(x: pd.Series) -> pd.DataFrame:
        """Generates feature statistics.

        Feature statistics include (total)count, unique(count) and missing(count).

        Examples
        --------
        >>> s = pd.Series(['2002:db8::', '192.168.0.1', '2001:db8::', '2002:db8::', np.NaN, None], name='ip_address')
        >>> s.ads.feature_type = ['ip_address']
        >>> s.ads.feature_stat()
            Metric  Value
        0	count	6
        1	unique	2
        2	missing	2

        Returns
        -------
        :class:`pandas.DataFrame`
            Summary statistics of the Series provided.
        """
        return _count_unique_missing(x)



[docs]
    @classmethod
    def feature_domain(cls, x: pd.Series) -> schema.Domain:
        """
        Generate the domain of the data of this feature type.

        Examples
        --------
        >>> s = pd.Series(['2002:db8::', '192.168.0.1', '2001:db8::', '2002:db8::', np.NaN, None], name='ip_address')
        >>> s.ads.feature_type = ['ip_address']
        >>> s.ads.feature_domain()
        constraints: []
        stats:
            count: 6
            missing: 2
            unique: 3
        values: IpAddress

        Returns
        -------
        ads.feature_engineering.schema.Domain
            Domain based on the IpAddress feature type.
        """

        return schema.Domain(
            cls.__name__,
            cls.feature_stat(x).to_dict()[x.name],
            [],
        )




IpAddress.validator.register("is_ip_address", default_handler)