ads.type_discovery package

Submodules

ads.type_discovery.abstract_detector module

class ads.type_discovery.abstract_detector.AbstractTypeDiscoveryDetector[source]

Bases: object

abstract discover(name, series)[source]
class ads.type_discovery.abstract_detector.DiscreteDiscoveryDetector[source]

Bases: AbstractTypeDiscoveryDetector

abstract discover(name, series)[source]

ads.type_discovery.constant_detector module

class ads.type_discovery.constant_detector.ConstantDetector[source]

Bases: AbstractTypeDiscoveryDetector

discover(name, series)[source]
is_constant(name, values)[source]

ads.type_discovery.continuous_detector module

class ads.type_discovery.continuous_detector.ContinuousDetector[source]

Bases: AbstractTypeDiscoveryDetector

discover(name, series)[source]

ads.type_discovery.credit_card_detector module

Note

There’s an opportunity here to generate a new feature, credict card numbers are not preditive because they don’t generalize, however, if the feature is replaced by the type of card that might be predictive.

  • Visa: ^4[0-9]{12}(?:[0-9]{3})?$ All Visa card numbers start with a 4. New cards have 16 digits. Old cards have 13.

  • MasterCard: ^(?:5[1-5][0-9]{2}|222[1-9]|22[3-9][0-9]|2[3-6][0-9]{2}|27[01][0-9]|2720)[0-9]{12}$ MasterCard numbers

    either start with the numbers 51 through 55 or with the numbers 2221 through 2720. All have 16 digits.

  • American Express: ^3[47][0-9]{13}$ American Express card numbers start with 34 or 37 and have 15 digits.

  • Diners Club: ^3(?:0[0-5]|[68][0-9])[0-9]{11}$ Diners Club card numbers begin with 300 through 305, 36 or 38.

    All have 14 digits. There are Diners Club cards that begin with 5 and have 16 digits. These are a joint venture between Diners Club and MasterCard, and should be processed like a MasterCard.

  • Discover: ^6(?:011|5[0-9]{2})[0-9]{12}$ Discover card numbers begin with 6011 or 65. All have 16 digits.

  • JCB: ^(?:2131|1800|35d{3})d{11}$ JCB cards beginning with 2131 or 1800 have 15 digits.

    JCB cards beginning with 35 have 16 digits.

class ads.type_discovery.credit_card_detector.CreditCardDetector[source]

Bases: AbstractTypeDiscoveryDetector

discover(name, series)[source]
is_credit_card(name, values)[source]
is_luhn_valid(card_number)[source]
luhn_checksum(card_number)[source]

ads.type_discovery.datetime_detector module

class ads.type_discovery.datetime_detector.DateTimeDetector[source]

Bases: AbstractTypeDiscoveryDetector

discover(name, series)[source]

ads.type_discovery.discrete_detector module

class ads.type_discovery.discrete_detector.DiscreteDetector[source]

Bases: DiscreteDiscoveryDetector

discover(name, series)[source]

ads.type_discovery.document_detector module

class ads.type_discovery.document_detector.DocumentDetector[source]

Bases: AbstractTypeDiscoveryDetector

cjk_string(document)[source]
discover(name, series)[source]
html_document(document)[source]

ads.type_discovery.ip_detector module

class ads.type_discovery.ip_detector.IPDetector[source]

Bases: AbstractTypeDiscoveryDetector

discover(name, series)[source]

ads.type_discovery.latlon_detector module

class ads.type_discovery.latlon_detector.LatLonDetector[source]

Bases: AbstractTypeDiscoveryDetector

discover(name, series)[source]
static extract_x_y(gis_series)[source]

takes a GIS series and parses it into a new dataframe with X (longitude) and Y (latitude) columns.

is_lat_lon(name, values)[source]

ads.type_discovery.phone_number_detector module

class ads.type_discovery.phone_number_detector.PhoneNumberDetector[source]

Bases: AbstractTypeDiscoveryDetector

discover(name, series)[source]
is_phone_number(name, values)[source]

ads.type_discovery.type_discovery_driver module

class ads.type_discovery.type_discovery_driver.TypeDiscoveryDriver[source]

Bases: object

discover(name: str, s: Series, is_target: bool = False)[source]

return the type of series

Parameters:
  • name (type) – variable name to discover.

  • s (type) – series of values to ‘type’

  • is_target (type) – when true the rules differ, any continuous is contunuous regardless of other rules

Returns:

one of:

ConstantDetector, DocumentDetector, ZipCodeDetector, LatLonDetector, CreditCardDetector, PhoneNumberDetector, DateTimeDetector, DiscreteDetector, ContinuousDetector,

Return type:

type

ads.type_discovery.typed_feature module

class ads.type_discovery.typed_feature.AddressTypedFeature(name, meta_data)[source]

Bases: TypedFeature

static build(name, series)[source]
class ads.type_discovery.typed_feature.CategoricalTypedFeature(name, meta_data)[source]

Bases: DiscreteTypedFeature

static build(name, series)[source]
class ads.type_discovery.typed_feature.ConstantTypedFeature(name, meta_data)[source]

Bases: TypedFeature

static build(name, series)[source]
class ads.type_discovery.typed_feature.ContinuousTypedFeature(name, meta_data)[source]

Bases: TypedFeature

static build(name, series)[source]
class ads.type_discovery.typed_feature.CreditCardTypedFeature(name, meta_data)[source]

Bases: TypedFeature

static build(name, series)[source]
class ads.type_discovery.typed_feature.DateTimeTypedFeature(name, meta_data)[source]

Bases: TypedFeature

static build(name, series)[source]
class ads.type_discovery.typed_feature.DiscreteTypedFeature(name, meta_data)[source]

Bases: TypedFeature

class ads.type_discovery.typed_feature.DocumentTypedFeature(name, meta_data)[source]

Bases: TypedFeature

static build(name, series, is_cjk, is_html)[source]
static corpus_processor(series)[source]
static sub_vectorization(feature_name, series, min_df=0.0, max_df=1.0, min_tf=2)[source]
static vectorization(feature_name, series, mean_document_length)[source]
class ads.type_discovery.typed_feature.GISTypedFeature(name, meta_data)[source]

Bases: TypedFeature

static build(name, series, samples)[source]
class ads.type_discovery.typed_feature.IPAddressTypedFeature(name, meta_data)[source]

Bases: TypedFeature

static build(name, series)[source]
class ads.type_discovery.typed_feature.OrdinalTypedFeature(name, meta_data)[source]

Bases: DiscreteTypedFeature

static build(name, series)[source]
class ads.type_discovery.typed_feature.PhoneNumberTypedFeature(name, meta_data)[source]

Bases: TypedFeature

static build(name, series)[source]
class ads.type_discovery.typed_feature.TypedFeature(name, meta_data)[source]

Bases: Bunch

static build(name, series)[source]
class ads.type_discovery.typed_feature.UnknownTypedFeature(name, meta_data)[source]

Bases: TypedFeature

static build(name, series)[source]
class ads.type_discovery.typed_feature.ZipcodeTypedFeature(name, meta_data)[source]

Bases: TypedFeature

static build(name, series)[source]

ads.type_discovery.unknown_detector module

class ads.type_discovery.unknown_detector.UnknownDetector[source]

Bases: AbstractTypeDiscoveryDetector

discover(name, series)[source]

ads.type_discovery.zipcode_detector module

class ads.type_discovery.zipcode_detector.ZipCodeDetector[source]

Bases: AbstractTypeDiscoveryDetector

discover(name, series)[source]

Module contents