#!/usr/bin/env python# -*- coding: utf-8; -*-# Copyright (c) 2020, 2022 Oracle and/or its affiliates.# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/importnumpyasnpfromsklearnimportpreprocessingfromads.common.decorator.runtime_dependencyimport(runtime_dependency,OptionalDependency,)fromads.commonimportutilsfromads.type_discovery.typed_featureimportCategoricalTypedFeaturefromads.type_discovery.typed_featureimportContinuousTypedFeaturefromads.type_discovery.typed_featureimportOrdinalTypedFeatureABSOLUTE_SKEWNESS_THRESHOLD=0.5# between 0 and 1, ratio of least / most represented classCLASS_IMBALANCE_THRESHOLD=0.5
[docs]classTargetVariable:""" This class provides target specific APIs. """@runtime_dependency(module="scipy",install_from=OptionalDependency.VIZ)def__init__(self,sampled_ds,target,target_type):fromscipy.statsimportskewself.sampled_ds=sampled_dsself.name=targetself.type=target_typeself.target_vals=Noneifisinstance(self.type,CategoricalTypedFeature):self.target_vals=self.sampled_ds.sampled_df[target].unique().tolist()val_counts=self.sampled_ds.sampled_df[target].value_counts()self.class_imbalance_ratio=val_counts.min()/val_counts.max()elifisinstance(self.type,ContinuousTypedFeature):try:self.skewness=np.abs(self.sampled_ds.sampled_df[target].skew())exceptTypeErrorase:self.skewness=np.abs(self.sampled_ds.sampled_df[target].astype("float").skew())else:# can also be DateTimeTypedFeature IPAddressTypedFeature PhoneNumberTypedFeature GISTypedFeature# AddressTypedFeature DocumentTypedFeature ZipcodeTypedFeature UnknownTypedFeature ConstantTypedFeature# CreditCardTypedFeature OrdinalTypedFeatureself.skewness=Noneself.numeric_columns=(self.sampled_ds.sampled_df._get_numeric_data().columns.values)
[docs]defshow_in_notebook(self,feature_names=None):# pragma: no cover""" Plot target distribution or target versus feature relation. Parameters ---------- feature_names: list, Optional Plot target against a list of features. Display target distribution if feature_names is not provided. """ifnotutils.is_notebook():print("show_in_notebook called but not in notebook environment")returnverbose=Trueiffeature_namesisnotNone:forfeature_nameinfeature_names:self.sampled_ds.plot(feature_name,self.name,verbose=verbose).show_in_notebook()verbose=Falseelse:self.sampled_ds.plot(self.name,verbose=False).show_in_notebook()
[docs]defis_balanced(self,skewness_threshold=ABSOLUTE_SKEWNESS_THRESHOLD,class_imbalance_threshold=CLASS_IMBALANCE_THRESHOLD,):""" Returns True if the target is balanced, False otherwise. Returns ------- is_balanced: bool """ifisinstance(self.type,ContinuousTypedFeature):returnself.skewness<skewness_thresholdelifisinstance(self.type,CategoricalTypedFeature):returnself.class_imbalance_ratio>class_imbalance_thresholdelse:returnTrue