#!/usr/bin/env python
# -*- coding: utf-8 -*--
# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
from abc import ABC
from ads.hpo.distributions import *
from ads.hpo.utils import _update_space_name
from sklearn.ensemble import (
ExtraTreesClassifier,
ExtraTreesRegressor,
GradientBoostingClassifier,
GradientBoostingRegressor,
RandomForestClassifier,
RandomForestRegressor,
)
from sklearn.linear_model import (
ElasticNet,
Lasso,
LogisticRegression,
Ridge,
RidgeClassifier,
SGDClassifier,
SGDRegressor,
)
from sklearn.svm import SVC, SVR, LinearSVC, LinearSVR
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
[docs]
class ModelSearchSpace(ABC):
"""Defines an abstract base class for setting the search space and strategy used
during hyperparameter optimization
"""
def __init__(self, strategy):
self.strategy = strategy
self.space = {}
self.step_name = ""
super().__init__()
[docs]
def suggest_space(self, **kwargs):
pass
[docs]
class RidgeSearchSpace(ModelSearchSpace):
def __init__(self, strategy):
super(RidgeSearchSpace, self).__init__(strategy)
[docs]
def suggest_space(self, **kwargs):
space = {"alpha": LogUniformDistribution(10**-4, 10**-1)}
if self.strategy != "perfunctory":
space.update(
{
"alpha": LogUniformDistribution(10**-5, 10**1),
"fit_intercept": CategoricalDistribution([True, False]),
"normalize": CategoricalDistribution([True, False]),
}
)
return _update_space_name(space, **kwargs)
[docs]
class LassoSearchSpace(RidgeSearchSpace):
pass
[docs]
class RidgeClassifierSearchSpace(RidgeSearchSpace):
pass
[docs]
class ElasticNetSearchSpace(ModelSearchSpace):
def __init__(self, strategy):
super(ElasticNetSearchSpace, self).__init__(strategy)
[docs]
def suggest_space(self, **kwargs):
space = {
"alpha": LogUniformDistribution(10**-4, 10**-1),
"l1_ratio": UniformDistribution(0, 1),
}
if self.strategy != "perfunctory":
space.update(
{
"alpha": LogUniformDistribution(10**-5, 10),
"fit_intercept": CategoricalDistribution([True, False]),
"l1_ratio": UniformDistribution(0, 1),
"normalize": CategoricalDistribution([True, False]),
}
)
return _update_space_name(space, **kwargs)
[docs]
class LogisticRegressionSearchSpace(ModelSearchSpace):
def __init__(self, strategy):
super(LogisticRegressionSearchSpace, self).__init__(strategy)
[docs]
def suggest_space(self, **kwargs):
space = {
"C": LogUniformDistribution(10**-4, 10**-1),
"dual": CategoricalDistribution([False]),
"penalty": CategoricalDistribution(["l1", "l2"]),
"solver": CategoricalDistribution(["saga"]),
}
if self.strategy != "perfunctory":
space.update(
{
"C": LogUniformDistribution(10**-5, 10),
"l1_ratio": UniformDistribution(0, 1),
"penalty": CategoricalDistribution(["elasticnet"]),
}
)
return _update_space_name(space, **kwargs)
[docs]
class SGDClassifierSearchSpace(ModelSearchSpace):
def __init__(self, strategy):
super(SGDClassifierSearchSpace, self).__init__(strategy)
[docs]
def suggest_space(self, **kwargs):
space = {
"alpha": LogUniformDistribution(10**-4, 10**-1),
"penalty": CategoricalDistribution(["l1", "l2", None]),
}
if self.strategy != "perfunctory":
space.update(
{
"alpha": LogUniformDistribution(10**-5, 10**1),
"l1_ratio": UniformDistribution(0, 1),
"penalty": CategoricalDistribution(["elasticnet"]),
}
)
return _update_space_name(space, **kwargs)
[docs]
class SGDRegressorSearchSpace(SGDClassifierSearchSpace):
def __init__(self, strategy):
super(SGDRegressorSearchSpace, self).__init__(strategy)
[docs]
class SVCSearchSpace(ModelSearchSpace):
def __init__(self, strategy):
super(SVCSearchSpace, self).__init__(strategy)
[docs]
def suggest_space(self, **kwargs):
space = {
"C": LogUniformDistribution(10**-4, 10**-1),
"max_iter": CategoricalDistribution([1000]),
}
if self.strategy != "perfunctory":
space.update(
{
"C": LogUniformDistribution(10**-5, 5),
"gamma": CategoricalDistribution(["scale", "auto"]),
"kernel": CategoricalDistribution(
["linear", "poly", "rbf", "sigmoid"]
),
"max_iter": CategoricalDistribution([5000]),
}
)
return _update_space_name(space, **kwargs)
[docs]
class SVRSearchSpace(SVCSearchSpace):
pass
[docs]
class LinearSVCSearchSpace(ModelSearchSpace):
def __init__(self, strategy):
super(LinearSVCSearchSpace, self).__init__(strategy)
[docs]
def suggest_space(self, **kwargs):
space = {
"C": LogUniformDistribution(10**-4, 10**-1),
"dual": CategoricalDistribution([False]),
}
if self.strategy != "perfunctory":
space.update(
{
"C": LogUniformDistribution(10**-5, 5),
"class_weight": CategoricalDistribution(
["balanced", None]
), # max_iter defaults to 1000
"fit_intercept": CategoricalDistribution([True, False]),
"loss": CategoricalDistribution(["squared_hinge"]),
"penalty": CategoricalDistribution(["l1"]),
}
)
return _update_space_name(space, **kwargs)
[docs]
class LinearSVRSearchSpace(ModelSearchSpace):
def __init__(self, strategy):
super(LinearSVRSearchSpace, self).__init__(strategy)
[docs]
def suggest_space(self, **kwargs):
space = {"C": LogUniformDistribution(10**-4, 10**-1)}
if self.strategy != "perfunctory":
space.update(
{
"C": LogUniformDistribution(10**-5, 10**1),
"dual": CategoricalDistribution([False]),
"fit_intercept": CategoricalDistribution([True, False]),
"loss": CategoricalDistribution(["squared_epsilon_insensitive"]),
}
)
return _update_space_name(space, **kwargs)
[docs]
class DecisionTreeClassifierSearchSpace(ModelSearchSpace):
def __init__(self, strategy):
super(DecisionTreeClassifierSearchSpace, self).__init__(strategy)
[docs]
def suggest_space(self, **kwargs):
space = {
"max_depth": IntUniformDistribution(1, 5),
"min_impurity_decrease": UniformDistribution(0, 0.05),
"min_samples_split": IntUniformDistribution(2, 500),
}
if self.strategy != "perfunctory":
space.update(
{
"criterion": CategoricalDistribution(["gini", "entropy"]),
"max_depth": IntUniformDistribution(1, 10),
"min_samples_leaf": IntUniformDistribution(2, 500),
}
) # max_iter defaults to 1000
return _update_space_name(space, **kwargs)
[docs]
class DecisionTreeRegressorSearchSpace(ModelSearchSpace):
def __init__(self, strategy):
super(DecisionTreeRegressorSearchSpace, self).__init__(strategy)
[docs]
def suggest_space(self, **kwargs):
space = {
"max_depth": IntUniformDistribution(1, 5),
"min_impurity_decrease": UniformDistribution(0, 0.05),
"min_samples_split": IntUniformDistribution(2, 500),
}
if self.strategy != "perfunctory":
space.update(
{
"criterion": CategoricalDistribution(
[
"squared_error",
"friedman_mse",
"absolute_error",
]
),
"min_samples_leaf": IntUniformDistribution(2, 500),
}
) # max_iter defaults to 1000
return _update_space_name(space, **kwargs)
[docs]
class XGBClassifierSearchSpace(ModelSearchSpace):
def __init__(self, strategy):
super(XGBClassifierSearchSpace, self).__init__(strategy)
[docs]
def suggest_space(self, **kwargs):
space = {
"colsample_bytree": UniformDistribution(0.6, 0.8),
"learning_rate": LogUniformDistribution(0.3, 0.4),
"max_depth": IntUniformDistribution(1, 5),
"n_estimators": IntUniformDistribution(50, 250),
"subsample": UniformDistribution(0.5, 1),
}
if self.strategy != "perfunctory":
space.update(
{
"colsample_bytree": UniformDistribution(0.3, 0.7),
"gamma": UniformDistribution(0, 10),
"learning_rate": LogUniformDistribution(0.001, 0.6),
"max_depth": IntUniformDistribution(1, 10),
"min_child_weight": IntUniformDistribution(0, 20),
"n_estimators": IntUniformDistribution(50, 500),
# 'scale_pos_weight': LogUniformDistribution(10 ** -5, 1),
"subsample": UniformDistribution(0.25, 1),
"reg_alpha": LogUniformDistribution(10**-5, 1),
"reg_lambda": LogUniformDistribution(10**-5, 1),
}
)
return _update_space_name(space, **kwargs)
[docs]
class XGBRegressorSearchSpace(XGBClassifierSearchSpace):
pass
[docs]
class LGBMClassifierSearchSpace(ModelSearchSpace):
def __init__(self, strategy):
super(LGBMClassifierSearchSpace, self).__init__(strategy)
[docs]
def suggest_space(self, **kwargs):
space = {
"colsample_bytree": UniformDistribution(0.3, 0.7),
"learning_rate": LogUniformDistribution(0.3, 0.4),
"max_depth": IntUniformDistribution(1, 5),
"n_estimators": IntUniformDistribution(50, 250),
"subsample": UniformDistribution(0.5, 1),
}
if self.strategy != "perfunctory":
space.update(
{
"boosting_type": CategoricalDistribution(["dart"]),
"learning_rate": LogUniformDistribution(0.001, 0.6),
"max_depth": IntUniformDistribution(1, 10),
"min_child_weight": IntUniformDistribution(0, 20),
"n_estimators": IntUniformDistribution(50, 500),
"num_leaves": IntLogUniformDistribution(7, 40),
"reg_alpha": LogUniformDistribution(10**-5, 1),
"reg_lambda": LogUniformDistribution(10**-5, 1),
}
)
return _update_space_name(space, **kwargs)
[docs]
class LGBMRegressorSearchSpace(LGBMClassifierSearchSpace):
pass
[docs]
class RandomForestClassifierSearchSpace(ExtraTreesClassifierSearchSpace):
pass
[docs]
class RandomForestRegressorSearchSpace(ExtraTreesClassifierSearchSpace):
pass
[docs]
class GradientBoostingRegressorSearchSpace(ModelSearchSpace):
def __init__(self, strategy):
super(GradientBoostingRegressorSearchSpace, self).__init__(strategy)
[docs]
def suggest_space(self, **kwargs):
space = {
"max_depth": IntUniformDistribution(1, 5),
"max_features": CategoricalDistribution(["sqrt", "log2"]),
"n_estimators": IntUniformDistribution(50, 250),
}
if self.strategy != "perfunctory":
space.update(
{
"learning_rate": LogUniformDistribution(0.001, 0.6),
"max_depth": IntUniformDistribution(1, 10),
"min_samples_leaf": IntUniformDistribution(5, 25),
"min_samples_split": IntUniformDistribution(2, 500),
"n_estimators": IntUniformDistribution(50, 500),
"subsample": UniformDistribution(0.5, 1),
}
)
return _update_space_name(space, **kwargs)
[docs]
class GradientBoostingClassifierSearchSpace(GradientBoostingRegressorSearchSpace):
pass
[docs]
def get_model2searchspace():
model2searchspace = {
Ridge: RidgeSearchSpace,
RidgeClassifier: RidgeClassifierSearchSpace,
Lasso: LassoSearchSpace,
ElasticNet: ElasticNetSearchSpace,
LogisticRegression: LogisticRegressionSearchSpace,
SVC: SVCSearchSpace,
SVR: SVRSearchSpace,
LinearSVC: LinearSVCSearchSpace,
LinearSVR: LinearSVRSearchSpace,
DecisionTreeClassifier: DecisionTreeClassifierSearchSpace,
DecisionTreeRegressor: DecisionTreeRegressorSearchSpace,
RandomForestClassifier: RandomForestClassifierSearchSpace,
RandomForestRegressor: RandomForestRegressorSearchSpace,
GradientBoostingClassifier: GradientBoostingClassifierSearchSpace,
GradientBoostingRegressor: GradientBoostingRegressorSearchSpace,
ExtraTreesClassifier: ExtraTreesClassifierSearchSpace,
ExtraTreesRegressor: ExtraTreesRegressorSearchSpace,
SGDClassifier: SGDClassifierSearchSpace,
SGDRegressor: SGDRegressorSearchSpace,
}
try:
from xgboost import XGBClassifier, XGBRegressor
model2searchspace[XGBClassifier] = XGBClassifierSearchSpace
model2searchspace[XGBRegressor] = XGBRegressorSearchSpace
except:
pass
try:
from lightgbm import LGBMClassifier, LGBMRegressor
model2searchspace[LGBMClassifier] = LGBMClassifierSearchSpace
model2searchspace[LGBMRegressor] = LGBMRegressorSearchSpace
except:
pass
return model2searchspace
model_list = list(get_model2searchspace().keys())