Source code for ads.feature_engineering.feature_type.phone_number
#!/usr/bin/env python# -*- coding: utf-8 -*--# Copyright (c) 2021 Oracle and/or its affiliates.# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/"""The module that represents a Phone Number feature type.Classes: PhoneNumber The Phone Number feature type.Functions: default_handler(data: pd.Series) -> pd.Series Processes given data and indicates if the data matches requirements."""importpandasaspdimportrefromads.feature_engineering.feature_type.stringimportStringfromads.feature_engineering.utilsimport_count_unique_missingfromads.feature_engineeringimportschemaPATTERN=re.compile(r"^(\+?\d{1,2}[\s-])?\(?(\d{3})\)?[\s.-]?\d{3}[\s.-]?\d{4}$",re.VERBOSE)
[docs]defdefault_handler(data:pd.Series,*args,**kwargs)->pd.Series:"""Processes given data and indicates if the data matches requirements. Parameters ---------- data: :class:`pandas.Series` The data to process. Returns ------- :class:`pandas.Series` The logical list indicating if the data matches requirements. """def_is_phone_number(x:any):return(notpd.isnull(x)andisinstance(x,str)andre.match(PATTERN,x)isnotNone)returndata.apply(lambdax:Trueif_is_phone_number(x)elseFalse)
[docs]classPhoneNumber(String):"""Type representing phone numbers. Attributes ----------- description: str The feature type description. name: str The feature type name. warning: FeatureWarning Provides functionality to register warnings and invoke them. validator Provides functionality to register validators and invoke them. Methods -------- feature_stat(x: pd.Series) -> pd.DataFrame Generates feature statistics. Examples -------- >>> from ads.feature_engineering.feature_type.phone_number import PhoneNumber >>> import pandas as pd >>> s = pd.Series([None, "1-640-124-5367", "1-573-916-4412"]) >>> PhoneNumber.validator.is_phone_number(s) 0 False 1 True 2 True dtype: bool """description="Type representing phone numbers."
[docs]@staticmethoddeffeature_stat(x:pd.Series)->pd.DataFrame:"""Generates feature statistics. Feature statistics include (total)count, unique(count) and missing(count) if there is any. Examples -------- >>> s = pd.Series(['2068866666', '6508866666', '2068866666', '', np.NaN, np.nan, None], name='phone') >>> s.ads.feature_type = ['phone_number'] >>> s.ads.feature_stat() Metric Value 1 count 7 2 unique 2 3 missing 4 Returns ------- :class:`pandas.DataFrame` Summary statistics of the Series or Dataframe provided. """return_count_unique_missing(x)
[docs]@classmethoddeffeature_domain(cls,x:pd.Series)->schema.Domain:""" Generate the domain of the data of this feature type. Examples -------- >>> s = pd.Series(['2068866666', '6508866666', '2068866666', '', np.NaN, np.nan, None], name='phone') >>> s.ads.feature_type = ['phone_number'] >>> s.ads.feature_domain() constraints: [] stats: count: 7 missing: 4 unique: 2 values: PhoneNumber Returns ------- ads.feature_engineering.schema.Domain Domain based on the PhoneNumber feature type. """returnschema.Domain(cls.__name__,cls.feature_stat(x).to_dict()[x.name],[])