Source code for ads.type_discovery.zipcode_detector

#!/usr/bin/env python
# -*- coding: utf-8; -*-

# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/


from __future__ import print_function, absolute_import, division

import re

import pandas as pd

from ads.type_discovery import logger
from ads.type_discovery.abstract_detector import AbstractTypeDiscoveryDetector
from ads.type_discovery.typed_feature import ZipcodeTypedFeature


[docs] class ZipCodeDetector(AbstractTypeDiscoveryDetector): def _is_zip_code(self, values): return all( [re.match("^[0-9]{5}(?:-[0-9]{4})?$", str(v)) for v in values.head(10)] )
[docs] def discover(self, name, series): candidates = series.loc[~series.isnull()] if ( self._is_zip_code(candidates.head(1000)) if candidates.dtype == "object" else self._is_zip_code(candidates.astype("object")) ): logger.debug( "type discovery on column [{}]/[{}] found to be a zipcode".format( name, series.dtype ) ) return ZipcodeTypedFeature.build(name, series) return False
if __name__ == "__main__": dd = ZipCodeDetector() print( dd.discover( "zip", pd.Series([None, "00501", "94065", "94065-1107", "90210", None]) ) )