Source code for ads.feature_engineering.feature_type.zip_code
#!/usr/bin/env python# -*- coding: utf-8 -*--# Copyright (c) 2021 Oracle and/or its affiliates.# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/"""The module that represents a ZipCode feature type.Classes: ZipCode The ZipCode feature type.Functions: default_handler(data: pd.Series) -> pd.Series Processes given data and indicates if the data matches requirements."""importmatplotlib.pyplotaspltimportpandasaspdimportrefromads.feature_engineering.feature_type.stringimportStringfromads.feature_engineering.utilsimport(_count_unique_missing,_to_lat_long,_plot_gis_scatter,_zip_code,)fromads.feature_engineeringimportschemaPATTERN=re.compile(r"^[0-9]{5}(?:-[0-9]{4})?$",re.VERBOSE)
[docs]defdefault_handler(data:pd.Series,*args,**kwargs)->pd.Series:"""Processes given data and indicates if the data matches requirements. Parameters ---------- data: pd.Series The data to process. Returns ------- pd.Series: The logical list indicating if the data matches requirements. """def_is_zip_code(x:any):return(notpd.isnull(x)andisinstance(x,str)andre.match(PATTERN,x)isnotNone)returndata.apply(lambdax:Trueif_is_zip_code(x)elseFalse)
[docs]classZipCode(String):"""Type representing postal code. Attributes ---------- description: str The feature type description. name: str The feature type name. warning: FeatureWarning Provides functionality to register warnings and invoke them. validator Provides functionality to register validators and invoke them. Methods -------- feature_stat(x: pd.Series) -> pd.DataFrame Generates feature statistics. feature_plot(x: pd.Series) -> plt.Axes Shows the geometry distribution base on location of zipcode. Example ------- >>> from ads.feature_engineering.feature_type.zip_code import ZipCode >>> import pandas as pd >>> import numpy as np >>> s = pd.Series(["94065", "90210", np.NaN, None], name='zipcode') >>> ZipCode.validator.is_zip_code(s) 0 True 1 True 2 False 3 False Name: zipcode, dtype: bool """description="Type representing postal code."
[docs]@staticmethoddeffeature_stat(x:pd.Series)->pd.DataFrame:"""Generates feature statistics. Feature statistics include (total)count, unique(count) and missing(count). Examples -------- >>> zipcode = pd.Series([94065, 90210, np.NaN, None], name='zipcode') >>> zipcode.ads.feature_type = ['zip_code'] >>> zipcode.ads.feature_stat() Metric Value 0 count 4 1 unique 2 2 missing 2 Returns ------- Pandas Dataframe Summary statistics of the Series provided. """return_count_unique_missing(x)
[docs]@staticmethoddeffeature_plot(x:pd.Series)->plt.Axes:""" Shows the geometry distribution base on location of zipcode. Examples -------- >>> zipcode = pd.Series([94065, 90210, np.NaN, None], name='zipcode') >>> zipcode.ads.feature_type = ['zip_code'] >>> zipcode.ads.feature_plot() Returns ------- matplotlib.axes._subplots.AxesSubplot Plot object for the series based on the ZipCode feature type. """gis=_to_lat_long(x.loc[default_handler(x)],_zip_code())iflen(gis.index):return_plot_gis_scatter(gis,"longitude","latitude")
[docs]@classmethoddeffeature_domain(cls,x:pd.Series)->schema.Domain:""" Generate the domain of the data of this feature type. Examples -------- >>> zipcode = pd.Series([94065, 90210, np.NaN, None], name='zipcode') >>> zipcode.ads.feature_type = ['zip_code'] >>> zipcode.ads.feature_domain() constraints: [] stats: count: 4 missing: 2 unique: 2 values: ZipCode Returns ------- ads.feature_engineering.schema.Domain Domain based on the ZipCode feature type. """returnschema.Domain(cls.__name__,cls.feature_stat(x).to_dict()[x.name],[],)