Source code for ads.feature_engineering.feature_type.integer
#!/usr/bin/env python# -*- coding: utf-8 -*--# Copyright (c) 2021, 2022 Oracle and/or its affiliates.# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/"""The module that represents an Integer feature type.Classes: Integer The Integer feature type."""importmatplotlib.pyplotaspltimportpandasaspdfromads.feature_engineering.feature_type.baseimportFeatureTypefromads.feature_engineering.utilsimport(_add_missing,_set_seaborn_theme,SchemeTeal,_format_stat,)fromads.feature_engineeringimportschemafromads.common.decorator.runtime_dependencyimport(runtime_dependency,OptionalDependency,)
[docs]classInteger(FeatureType):""" Type representing integer values. Attributes ---------- description: str The feature type description. name: str The feature type name. warning: FeatureWarning Provides functionality to register warnings and invoke them. validator Provides functionality to register validators and invoke them. Methods -------- feature_stat(x: pd.Series) -> pd.DataFrame Generates feature statistics. feature_plot(x: pd.Series) -> plt.Axes Shows distributions of datasets using box plot. """description="Type representing integer values."
[docs]@staticmethoddeffeature_stat(x:pd.Series)->pd.DataFrame:"""Generates feature statistics. Feature statistics include (total)count, mean, standard deviation, sample minimum, lower quartile, median, 75%, upper quartile, max and missing(count) if there is any. Examples -------- >>> x = pd.Series([1, 0, 1, 2, 3, 4, np.nan], name='integer') >>> x.ads.feature_type = ['integer'] >>> x.ads.feature_stat() Metric Value 0 count 7 1 mean 1 2 standard deviation 1 3 sample minimum 0 4 lower quartile 1 5 median 1 6 upper quartile 2 7 sample maximum 4 8 missing 1 Returns ------- :class:`pandas.DataFrame` Summary statistics of the Series or Dataframe provided. """df_stat=x.describe()_format_stat(df_stat)df_stat["count"]=len(x)df_stat=_add_missing(x,df_stat).to_frame()df_stat.iloc[:,0]=df_stat.iloc[:,0]returndf_stat
[docs]@staticmethod@runtime_dependency(module="seaborn",install_from=OptionalDependency.VIZ)deffeature_plot(x:pd.Series)->plt.Axes:""" Shows distributions of datasets using box plot. Examples -------- >>> x = pd.Series([1, 0, 1, 2, 3, 4, np.nan], name='integer') >>> x.ads.feature_type = ['integer'] >>> x.ads.feature_plot() Returns ------- matplotlib.axes._subplots.AxesSubplot Plot object for the series based on the Integer feature type. """col_name=x.nameifx.nameelse"integer"df=x.to_frame(name=col_name)df=df[pd.to_numeric(df[col_name],errors="coerce").notnull()]iflen(df.index):_set_seaborn_theme()returnseaborn.boxplot(x=df[col_name],width=0.2,color=SchemeTeal.AREA_DARK)
[docs]@classmethoddeffeature_domain(cls,x:pd.Series)->schema.Domain:""" Generate the domain of the data of this feature type. Examples -------- >>> s = pd.Series([True, False, True, False, np.NaN, None], name='integer') >>> s.ads.feature_type = ['integer'] >>> s.ads.feature_domain() constraints: [] stats: count: 6 freq: 2 missing: 2 top: true unique: 2 values: Integer Returns ------- ads.feature_engineering.schema.Domain Domain based on the Integer feature type. """returnschema.Domain(cls.__name__,cls.feature_stat(x).to_dict()[x.name],[])