ads.hpo package#

Subpackages#

Submodules#

ads.hpo.ads_search_space module#

class ads.hpo.ads_search_space.DecisionTreeClassifierSearchSpace(strategy)[source]#

Bases: ModelSearchSpace

suggest_space(**kwargs)[source]#
class ads.hpo.ads_search_space.DecisionTreeRegressorSearchSpace(strategy)[source]#

Bases: ModelSearchSpace

suggest_space(**kwargs)[source]#
class ads.hpo.ads_search_space.ElasticNetSearchSpace(strategy)[source]#

Bases: ModelSearchSpace

suggest_space(**kwargs)[source]#
class ads.hpo.ads_search_space.ExtraTreesClassifierSearchSpace(strategy)[source]#

Bases: ModelSearchSpace

suggest_space(**kwargs)[source]#
class ads.hpo.ads_search_space.ExtraTreesRegressorSearchSpace(strategy)[source]#

Bases: ExtraTreesClassifierSearchSpace

class ads.hpo.ads_search_space.GradientBoostingClassifierSearchSpace(strategy)[source]#

Bases: GradientBoostingRegressorSearchSpace

class ads.hpo.ads_search_space.GradientBoostingRegressorSearchSpace(strategy)[source]#

Bases: ModelSearchSpace

suggest_space(**kwargs)[source]#
class ads.hpo.ads_search_space.LGBMClassifierSearchSpace(strategy)[source]#

Bases: ModelSearchSpace

suggest_space(**kwargs)[source]#
class ads.hpo.ads_search_space.LGBMRegressorSearchSpace(strategy)[source]#

Bases: LGBMClassifierSearchSpace

class ads.hpo.ads_search_space.LassoSearchSpace(strategy)[source]#

Bases: RidgeSearchSpace

class ads.hpo.ads_search_space.LinearSVCSearchSpace(strategy)[source]#

Bases: ModelSearchSpace

suggest_space(**kwargs)[source]#
class ads.hpo.ads_search_space.LinearSVRSearchSpace(strategy)[source]#

Bases: ModelSearchSpace

suggest_space(**kwargs)[source]#
class ads.hpo.ads_search_space.LogisticRegressionSearchSpace(strategy)[source]#

Bases: ModelSearchSpace

suggest_space(**kwargs)[source]#
class ads.hpo.ads_search_space.ModelSearchSpace(strategy)[source]#

Bases: ABC

Defines an abstract base class for setting the search space and strategy used during hyperparameter optimization

suggest_space(**kwargs)[source]#
class ads.hpo.ads_search_space.RandomForestClassifierSearchSpace(strategy)[source]#

Bases: ExtraTreesClassifierSearchSpace

class ads.hpo.ads_search_space.RandomForestRegressorSearchSpace(strategy)[source]#

Bases: ExtraTreesClassifierSearchSpace

class ads.hpo.ads_search_space.RidgeClassifierSearchSpace(strategy)[source]#

Bases: RidgeSearchSpace

class ads.hpo.ads_search_space.RidgeSearchSpace(strategy)[source]#

Bases: ModelSearchSpace

suggest_space(**kwargs)[source]#
class ads.hpo.ads_search_space.SGDClassifierSearchSpace(strategy)[source]#

Bases: ModelSearchSpace

suggest_space(**kwargs)[source]#
class ads.hpo.ads_search_space.SGDRegressorSearchSpace(strategy)[source]#

Bases: SGDClassifierSearchSpace

class ads.hpo.ads_search_space.SVCSearchSpace(strategy)[source]#

Bases: ModelSearchSpace

suggest_space(**kwargs)[source]#
class ads.hpo.ads_search_space.SVRSearchSpace(strategy)[source]#

Bases: SVCSearchSpace

class ads.hpo.ads_search_space.XGBClassifierSearchSpace(strategy)[source]#

Bases: ModelSearchSpace

suggest_space(**kwargs)[source]#
class ads.hpo.ads_search_space.XGBRegressorSearchSpace(strategy)[source]#

Bases: XGBClassifierSearchSpace

ads.hpo.ads_search_space.get_model2searchspace()[source]#

ads.hpo.distributions module#

class ads.hpo.distributions.CategoricalDistribution(choices: Sequence[None | bool | int | float | str])[source]#

Bases: Distribution

A categorical distribution.

Parameters:

choices – Parameter value candidates. It is recommended to restrict the types of the choices to the following: None, bool, int, float and str.

class ads.hpo.distributions.DiscreteUniformDistribution(low: float, high: float, step: float)[source]#

Bases: Distribution

A discretized uniform distribution in the linear domain.

Note

If the range \([\mathsf{low}, \mathsf{high}]\) is not divisible by \(q\), \(\mathsf{high}\) will be replaced with the maximum of \(k q + \mathsf{low} \lt \mathsf{high}\), where \(k\) is an integer.

Parameters:
  • low (float) – Lower endpoint of the range of the distribution. low is included in the range.

  • high (float) – Upper endpoint of the range of the distribution. high is included in the range.

  • step (float) – A discretization step.

class ads.hpo.distributions.Distribution(dist)[source]#

Bases: object

Defines the abstract base class for hyperparameter search distributions

get_distribution()[source]#

Returns the distribution

class ads.hpo.distributions.DistributionEncode(*, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, default=None)[source]#

Bases: JSONEncoder

Constructor for JSONEncoder, with sensible defaults.

If skipkeys is false, then it is a TypeError to attempt encoding of keys that are not str, int, float or None. If skipkeys is True, such items are simply skipped.

If ensure_ascii is true, the output is guaranteed to be str objects with all incoming non-ASCII characters escaped. If ensure_ascii is false, the output can contain non-ASCII characters.

If check_circular is true, then lists, dicts, and custom encoded objects will be checked for circular references during encoding to prevent an infinite recursion (which would cause an RecursionError). Otherwise, no such check takes place.

If allow_nan is true, then NaN, Infinity, and -Infinity will be encoded as such. This behavior is not JSON specification compliant, but is consistent with most JavaScript based encoders and decoders. Otherwise, it will be a ValueError to encode such floats.

If sort_keys is true, then the output of dictionaries will be sorted by key; this is useful for regression tests to ensure that JSON serializations can be compared on a day-to-day basis.

If indent is a non-negative integer, then JSON array elements and object members will be pretty-printed with that indent level. An indent level of 0 will only insert newlines. None is the most compact representation.

If specified, separators should be an (item_separator, key_separator) tuple. The default is (’, ‘, ‘: ‘) if indent is None and (‘,’, ‘: ‘) otherwise. To get the most compact JSON representation, you should specify (‘,’, ‘:’) to eliminate whitespace.

If specified, default is a function that gets called for objects that can’t otherwise be serialized. It should return a JSON encodable version of the object or raise a TypeError.

default(dist: Distribution) Dict[str, Any][source]#

Implement this method in a subclass such that it returns a serializable object for o, or calls the base implementation (to raise a TypeError).

For example, to support arbitrary iterators, you could implement default like this:

def default(self, o):
    try:
        iterable = iter(o)
    except TypeError:
        pass
    else:
        return list(iterable)
    # Let the base class default method raise the TypeError
    return JSONEncoder.default(self, o)
static from_json(json_object: Dict[Any, Any])[source]#
class ads.hpo.distributions.IntLogUniformDistribution(low: float, high: float, step: float = 1)[source]#

Bases: Distribution

A uniform distribution on integers in the log domain.

Parameters:
  • low – Lower endpoint of the range of the distribution. low is included in the range.

  • high – Upper endpoint of the range of the distribution. high is included in the range.

  • step – A step for spacing between values.

class ads.hpo.distributions.IntUniformDistribution(low: float, high: float, step: float = 1)[source]#

Bases: Distribution

A uniform distribution on integers.

Note

If the range \([\mathsf{low}, \mathsf{high}]\) is not divisible by \(\mathsf{step}\), \(\mathsf{high}\) will be replaced with the maximum of \(k \times \mathsf{step} + \mathsf{low} \lt \mathsf{high}\), where \(k\) is an integer.

Parameters:
  • low – Lower endpoint of the range of the distribution. low is included in the range.

  • high – Upper endpoint of the range of the distribution. high is included in the range.

  • step – A step for spacing between values.

class ads.hpo.distributions.LogUniformDistribution(low: float, high: float)[source]#

Bases: Distribution

A uniform distribution in the log domain.

Parameters:
  • low – Lower endpoint of the range of the distribution. low is included in the range.

  • high – Upper endpoint of the range of the distribution. high is excluded from the range.

class ads.hpo.distributions.UniformDistribution(low: float, high: float)[source]#

Bases: Distribution

A uniform distribution in the linear domain.

Parameters:
  • low – Lower endpoint of the range of the distribution. low is included in the range.

  • high – Upper endpoint of the range of the distribution. high is excluded from the range.

ads.hpo.distributions.decode(s: str)[source]#

Decodes a string to an object

Parameters:

s (str) – The string being decoded to a distribution object

Returns:

Decoded string

Return type:

Distribution or Dict

ads.hpo.distributions.encode(o: Distribution) str[source]#

Encodes a distribution to a string

Parameters:

o (Distribution) – The distribution to encode

Returns:

The distribution encoded as a string

Return type:

str (DistributionEncode)

ads.hpo.objective module#

ads.hpo.search_cv module#

class ads.hpo.search_cv.ADSTuner(model: BaseEstimator | Pipeline, strategy: str | Mapping[str, optuna.distributions.BaseDistribution] = 'perfunctory', scoring: Callable[..., float] | str | None = None, cv: int | None = 5, study_name: str | None = None, storage: str | None = None, load_if_exists: bool | None = True, random_state: int | None = None, loglevel: int | None = 20, n_jobs: int | None = 1, X: List[List[float]] | np.ndarray | pd.DataFrame | spmatrix | ADSData = None, y: OneDimArrayLikeType | TwoDimArrayLikeType | None = None)[source]#

Bases: BaseEstimator

Hyperparameter search with cross-validation.

Returns a hyperparameter tuning object

Parameters:
  • model – Object to use to fit the data. This is assumed to implement the scikit-learn estimator or pipeline interface.

  • strategyperfunctory, detailed or a dictionary/mapping of hyperparameter and its distribution . If obj:perfunctory, picks a few relatively more important hyperparmeters to tune . If obj:detailed, extends to a larger search space. If obj:dict, user defined search space: Dictionary where keys are hyperparameters and values are distributions. Distributions are assumed to implement the ads distribution interface.

  • scoring (Optional[Union[Callable[..., float], str]]) – String or callable to evaluate the predictions on the validation data. If None, score on the estimator is used.

  • cv (int) – Integer to specify the number of folds in a CV splitter. If estimator is a classifier and y is either binary or multiclass, sklearn.model_selection.StratifiedKFold is used. otherwise, sklearn.model_selection.KFold is used.

  • study_name (str,) – Name of the current experiment for the ADSTuner object. One ADSTuner object can only be attached to one study_name.

  • storage – Database URL. (e.g. sqlite:///example.db). Default to sqlite:////tmp/hpo_*.db.

  • load_if_exists – Flag to control the behavior to handle a conflict of study names. In the case where a study named study_name already exists in the storage, a DuplicatedStudyError is raised if load_if_exists is set to False. Otherwise, the existing one is returned.

  • random_state – Seed of the pseudo random number generator. If int, this is the seed used by the random number generator. If None, the global random state from numpy.random is used.

  • loglevel – loglevel. can be logging.NOTSET, logging.INFO, logging.DEBUG, logging.WARNING

  • n_jobs (int) – Number of parallel jobs. -1 means using all processors.

  • X (TwoDimArrayLikeType, Union[List[List[float]], np.ndarray,) –

  • pd.DataFrame – Training data.

  • spmatrix – Training data.

  • ADSData] – Training data.

  • y (Union[OneDimArrayLikeType, TwoDimArrayLikeType], optional) –

  • OneDimArrayLikeType (Union[List[float], np.ndarray, pd.Series]) –

  • TwoDimArrayLikeType (Union[List[List[float]], np.ndarray, pd.DataFrame, spmatrix, ADSData]) – Target.

Example:

from ads.hpo.stopping_criterion import *
from ads.hpo.search_cv import ADSTuner
from sklearn.datasets import load_iris
from sklearn.svm import SVC

tuner = ADSTuner(
                SVC(),
                strategy='detailed',
                scoring='f1_weighted',
                random_state=42
            )

X, y = load_iris(return_X_y=True)
tuner.tune(X=X, y=y, exit_criterion=[TimeBudget(1)])
property best_index#

returns: Index which corresponds to the best candidate parameter setting. :rtype: int

property best_params#

returns: Parameters of the best trial. :rtype: Dict[str, Any]

property best_score#

returns: Mean cross-validated score of the best estimator. :rtype: float

best_scores(n: int = 5, reverse: bool = True)[source]#

Return the best scores from the study

Parameters:
  • n (int) – The maximum number of results to show. Defaults to 5. If None or negative return all.

  • reverse (bool) – Whether to reverse the sort order so results are in descending order. Defaults to True

Returns:

List of the best scores

Return type:

list[float or int]

Raises:

ValueError

get_status()[source]#

return the status of the current tuning process.

Alias for the property status.

Returns:

The status of the process

Return type:

Status

Example:

from ads.hpo.stopping_criterion import *
from ads.hpo.search_cv import ADSTuner
from sklearn.datasets import load_iris
from sklearn.linear_model import SGDClassifier

tuner = ADSTuner(
                SGDClassifier(),
                strategy='detailed',
                scoring='f1_weighted',
                random_state=42
            )
tuner.search_space({'max_iter': 100})
X, y = load_iris(return_X_y=True)
tuner.tune(X=X, y=y, exit_criterion=[TimeBudget(1)])
tuner.get_status()
halt()[source]#

Halt the current running tuning process.

Returns:

Nothing

Return type:

None

Raises:

InvalidStateTransition

Example:

from ads.hpo.stopping_criterion import *
from ads.hpo.search_cv import ADSTuner
from sklearn.datasets import load_iris
from sklearn.linear_model import SGDClassifier

tuner = ADSTuner(
                SGDClassifier(),
                strategy='detailed',
                scoring='f1_weighted',
                random_state=42
            )
tuner.search_space({'max_iter': 100})
X, y = load_iris(return_X_y=True)
tuner.tune(X=X, y=y, exit_criterion=[TimeBudget(1)])
tuner.halt()
is_completed()[source]#
Returns:

True if the ADSTuner instance has completed; False otherwise.

Return type:

bool

is_halted()[source]#
Returns:

True if the ADSTuner instance is halted; False otherwise.

Return type:

bool

is_running()[source]#
Returns:

True if the ADSTuner instance is running; False otherwise.

Return type:

bool

is_terminated()[source]#
Returns:

True if the ADSTuner instance has been terminated; False otherwise.

Return type:

bool

property n_trials#

returns: Number of completed trials. Alias for trial_count. :rtype: int

static optimizer(study_name, pruner, sampler, storage, load_if_exists, objective_func, global_start, global_stop, **kwargs)[source]#

Static method for running ADSTuner tuning process

Parameters:
  • study_name (str) – The name of the study.

  • pruner – The pruning method for pruning trials.

  • sampler – The sampling method used for tuning.

  • storage (str) – Storage endpoint.

  • load_if_exists (bool) – Load existing study if it exists.

  • objective_func – The objective function to be maximized.

  • global_start (multiprocesing.Value) – The global start time.

  • global_stop (multiprocessing.Value) – The global stop time.

  • kwargs (dict) – Keyword/value pairs passed into the optimize process

Raises:

Exception – Raised for any exceptions thrown by the underlying optimization process

Returns:

Nothing

Return type:

None

plot_best_scores(best=True, inferior=True, time_interval=1, fig_size=(800, 500))[source]#

Plot optimization history of all trials in a study.

Parameters:
  • best – controls whether to plot the lines for the best scores so far.

  • inferior – controls whether to plot the dots for the actual objective scores.

  • time_interval – how often(in seconds) the plot refresh to check on the new trial results.

  • fig_size (tuple) – width and height of the figure.

Returns:

Nothing.

Return type:

None

plot_contour_scores(params=None, time_interval=1, fig_size=(800, 500))[source]#

Contour plot of the scores.

Parameters:
  • params (Optional[List[str]]) – Parameter list to visualize. Defaults to all.

  • time_interval (float) – Time interval for the plot. Defaults to 1.

  • fig_size (tuple[int, int]) – Figure size. Defaults to (800, 500).

Returns:

Nothing.

Return type:

None

plot_edf_scores(time_interval=1, fig_size=(800, 500))[source]#

Plot the EDF (empirical distribution function) of the scores.

Only completed trials are used.

Parameters:
  • time_interval (float) – Time interval for the plot. Defaults to 1.

  • fig_size (tuple[int, int]) – Figure size. Defaults to (800, 500).

Returns:

Nothing.

Return type:

None

plot_intermediate_scores(time_interval=1, fig_size=(800, 500))[source]#

Plot intermediate values of all trials in a study.

Parameters:
  • time_interval (float) – Time interval for the plot. Defaults to 1.

  • fig_size (tuple[int, int]) – Figure size. Defaults to (800, 500).

Returns:

Nothing.

Return type:

None

plot_parallel_coordinate_scores(params=None, time_interval=1, fig_size=(800, 500))[source]#

Plot the high-dimentional parameter relationships in a study.

Note that, If a parameter contains missing values, a trial with missing values is not plotted.

Parameters:
  • params (Optional[List[str]]) – Parameter list to visualize. Defaults to all.

  • time_interval (float) – Time interval for the plot. Defaults to 1.

  • fig_size (tuple[int, int]) – Figure size. Defaults to (800, 500).

Returns:

Nothing.

Return type:

None

plot_param_importance(importance_evaluator='Fanova', time_interval=1, fig_size=(800, 500))[source]#

Plot hyperparameter importances.

Parameters:
  • importance_evaluator (str) – Importance evaluator. Valid values: “Fanova”, “MeanDecreaseImpurity”. Defaults to “Fanova”.

  • time_interval (float) – How often the plot refresh to check on the new trial results.

  • fig_size (tuple) – Width and height of the figure.

Raises:

NotImplementedErorr – Raised for unsupported importance evaluators

Returns:

Nothing.

Return type:

None

resume()[source]#

Resume the current halted tuning process.

Returns:

Nothing

Return type:

None

Example:

from ads.hpo.stopping_criterion import *
from ads.hpo.search_cv import ADSTuner
from sklearn.datasets import load_iris
from sklearn.linear_model import SGDClassifier

tuner = ADSTuner(
                SGDClassifier(),
                strategy='detailed',
                scoring='f1_weighted',
                random_state=42
            )
tuner.search_space({'max_iter': 100})
X, y = load_iris(return_X_y=True)
tuner.tune(X=X, y=y, exit_criterion=[TimeBudget(1)])
tuner.halt()
tuner.resume()
property score_remaining#

returns: The difference between the best score and the optimal score. :rtype: float

Raises:

ExitCriterionError – Error is raised if there is no score-based criteria for tuning.

property scoring_name#

returns: Scoring name. :rtype: str

search_space(strategy=None, overwrite=False)[source]#

Returns the search space. If strategy is not passed in, return the existing search space. When strategy is passed in, overwrite the existing search space if overwrite is set True, otherwise, only update the existing search space.

Parameters:
  • strategy (Union[str, dict], optional) – perfunctory, detailed or a dictionary/mapping of the hyperparameters and their distributions. If obj:perfunctory, picks a few relatively more important hyperparmeters to tune . If obj:detailed, extends to a larger search space. If obj:dict, user defined search space: Dictionary where keys are parameters and values are distributions. Distributions are assumed to implement the ads distribution interface.

  • overwrite (bool, optional) – Ignored when strategy is None. Otherwise, search space is overwritten if overwrite is set True and updated if it is False.

Returns:

A mapping of the hyperparameters and their distributions.

Return type:

dict

Example:

from ads.hpo.stopping_criterion import *
from ads.hpo.search_cv import ADSTuner
from sklearn.datasets import load_iris
from sklearn.linear_model import SGDClassifier

tuner = ADSTuner(
                SGDClassifier(),
                strategy='detailed',
                scoring='f1_weighted',
                random_state=42
            )
tuner.search_space({'max_iter': 100})
X, y = load_iris(return_X_y=True)
tuner.tune(X=X, y=y, exit_criterion=[TimeBudget(1)])
tuner.search_space()
property sklearn_steps#

returns: Search space which corresponds to the best candidate parameter setting. :rtype: int

property status#

returns: The status of the current tuning process. :rtype: Status

terminate()[source]#

Terminate the current tuning process.

Returns:

Nothing

Return type:

None

Example:

from ads.hpo.stopping_criterion import *
from ads.hpo.search_cv import ADSTuner
from sklearn.datasets import load_iris
from sklearn.linear_model import SGDClassifier

tuner = ADSTuner(
                SGDClassifier(),
                strategy='detailed',
                scoring='f1_weighted',
                random_state=42
            )
tuner.search_space({'max_iter': 100})
X, y = load_iris(return_X_y=True)
tuner.tune(X=X, y=y, exit_criterion=[TimeBudget(1)])
tuner.terminate()
property time_elapsed#

Return the time in seconds that the HPO process has been searching

Returns:

int

Return type:

The number of seconds the HPO process has been searching

property time_remaining#

Returns the number of seconds remaining in the study

Returns:

int

Return type:

Number of seconds remaining in the budget. 0 if complete/terminated

Raises:

ExitCriterionError – Error is raised if time has not been included in the budget.

property time_since_resume#

Return the seconds since the process has been resumed from a halt.

Returns:

int

Return type:

the number of seconds since the process was last resumed

Raises:

NoRestartError

property trial_count#

returns: Number of completed trials. Alias for trial_count. :rtype: int

property trials#

returns: Trial data up to this point. :rtype: pandas.DataFrame

trials_export(file_uri, metadata=None, script_dict={'model': None, 'scoring': None})[source]#

Export the meta data as well as files needed to reconstruct the ADSTuner object to the object storage. Data is not stored. To resume the same ADSTuner object from object storage and continue tuning from previous trials, you have to provide the dataset.

Parameters:
  • file_uri (str) – Object storage path, ‘oci://bucketname@namespace/filepath/on/objectstorage’. For example, oci://test_bucket@ociodsccust/tuner/test.zip

  • metadata (str, optional) – User defined metadata

  • script_dict (dict, optional) – Script paths for model and scoring. This is only recommended for unsupported models and user-defined scoring functions. You can store the model and scoring function in a dictionary with keys model and scoring and the respective paths as values. The model and scoring scripts must import necessary libraries for the script to run. The model and scoring variables must be set to your model and scoring function.

Returns:

Nothing

Return type:

None

Example:

# Print out a list of supported models
from ads.hpo.ads_search_space import model_list
print(model_list)

# Example scoring dictionary
{'model':'/home/datascience/advanced-ds/notebooks/scratch/ADSTunerV2/mymodel.py',
'scoring':'/home/datascience/advanced-ds/notebooks/scratch/ADSTunerV2/customized_scoring.py'}

Example:

from ads.hpo.stopping_criterion import *
from ads.hpo.search_cv import ADSTuner
from sklearn.datasets import load_iris
from sklearn.linear_model import SGDClassifier

tuner = ADSTuner(
                SGDClassifier(),
                strategy='detailed',
                scoring='f1_weighted',
                random_state=42
            )
tuner.search_space({'max_iter': 100})
X, y = load_iris(return_X_y=True)
tuner.tune(X=X, y=y, exit_criterion=[TimeBudget(1)], synchronous=True)
tuner.trials_export('oci://<bucket_name>@<namespace>/tuner/test.zip')
classmethod trials_import(file_uri, delete_zip_file=True, target_file_path=None)[source]#

Import the database file from the object storage

Parameters:
  • file_uri (str) – ‘oci://bucketname@namespace/filepath/on/objectstorage’ Example: ‘oci://<bucket_name>@<namespace>/tuner/test.zip’

  • delete_zip_file (bool, defaults to True, optional) – Whether delete the zip file afterwards.

  • target_file_path (str, optional) – The path where the zip file will be saved. For example, ‘/home/datascience/myfile.zip’.

Returns:

ADSTuner object

Return type:

ADSTuner

Examples

>>> from ads.hpo.stopping_criterion import *
>>> from ads.hpo.search_cv import ADSTuner
>>> from sklearn.datasets import load_iris
>>> from sklearn.linear_model import SGDClassifier
>>> X, y = load_iris(return_X_y=True)
>>> tuner = ADSTuner.trials_import('oci://<bucket_name>@<namespace>/tuner/test.zip')
>>> tuner.tune(X=X, y=y, exit_criterion=[TimeBudget(1)], synchronous=True)
property trials_remaining#

returns: The number of trials remaining in the budget. :rtype: int

Raises:

ExitCriterionError – Raised if the current tuner does not include a trials-based exit condition.

tune(X=None, y=None, exit_criterion=[], loglevel=None, synchronous=False)[source]#

Run hypyerparameter tuning until one of the <code>exit_criterion</code> is met. The default is to run 50 trials.

Parameters:
  • X (TwoDimArrayLikeType, Union[List[List[float]], np.ndarray, pd.DataFrame, spmatrix, ADSData]) – Training data.

  • y (Union[OneDimArrayLikeType, TwoDimArrayLikeType], optional) –

  • OneDimArrayLikeType (Union[List[float], np.ndarray, pd.Series]) –

  • TwoDimArrayLikeType (Union[List[List[float]], np.ndarray, pd.DataFrame, spmatrix, ADSData]) – Target.

  • exit_criterion (list, optional) – A list of ads stopping criterion. Can be ScoreValue(), NTrials(), TimeBudget(). For example, [ScoreValue(0.96), NTrials(40), TimeBudget(10)]. It will exit when any of the stopping criterion is satisfied in the exit_criterion list. By default, the run will stop after 50 trials.

  • loglevel (int, optional) – Log level.

  • synchronous (boolean, optional) – Tune synchronously or not. Defaults to False

Returns:

Nothing

Return type:

None

Example:

from ads.hpo.stopping_criterion import *
from ads.hpo.search_cv import ADSTuner
from sklearn.datasets import load_iris
from sklearn.svm import SVC

tuner = ADSTuner(
                SVC(),
                strategy='detailed',
                scoring='f1_weighted',
                random_state=42
            )
tuner.search_space({'max_iter': 100})
X, y = load_iris(return_X_y=True)
tuner.tune(X=X, y=y, exit_criterion=[TimeBudget(1)])
wait()[source]#

Wait for the current tuning process to finish running.

Returns:

Nothing

Return type:

None

Example:

from ads.hpo.stopping_criterion import *
from ads.hpo.search_cv import ADSTuner
from sklearn.datasets import load_iris
from sklearn.linear_model import SGDClassifier

tuner = ADSTuner(
                SGDClassifier(),
                strategy='detailed',
                scoring='f1_weighted',
                random_state=42
            )
tuner.search_space({'max_iter': 100})
X, y = load_iris(return_X_y=True)
tuner.tune(X=X, y=y, exit_criterion=[TimeBudget(1)])
tuner.wait()
class ads.hpo.search_cv.DataScienceObjective(objective, X_res, y_res)[source]#

Bases: object

This class is to replace the previous lambda function to solve the problem that python does not allow pickle local function/lambda function.

exception ads.hpo.search_cv.DuplicatedStudyError[source]#

Bases: Exception

DuplicatedStudyError is raised when a new tuner process is created with a study name that already exists in storage.

exception ads.hpo.search_cv.ExitCriterionError[source]#

Bases: Exception

ExitCriterionError is raised when an attempt is made to check exit status for a different exit type than the tuner was initialized with. For example, if an HPO study has an exit criteria based on the number of trials and a request is made for the time remaining, which is a different exit criterion, an exception is raised.

exception ads.hpo.search_cv.InvalidStateTransition[source]#

Bases: Exception

Invalid State Transition is raised when an invalid transition request is made, such as calling halt without a running process.

exception ads.hpo.search_cv.NoRestartError[source]#

Bases: Exception

NoRestartError is raised when an attempt is made to check how many seconds have transpired since the HPO process was last resumed from a halt. This can happen if the process has been terminated or it was never halted and then resumed to begin with.

class ads.hpo.search_cv.State(value)[source]#

Bases: Enum

An enumeration.

COMPLETED = 5#
HALTED = 3#
INITIATED = 1#
RUNNING = 2#
TERMINATED = 4#

ads.hpo.stopping_criterion module#

class ads.hpo.stopping_criterion.NTrials(n_trials: int)[source]#

Bases: object

Exit based on number of trials.

Parameters:

n_trials (int) – Number of trials (sets of hyperparamters tested). If None, there is no limitation on the number of trials.

Returns:

NTrials object

Return type:

NTrials

class ads.hpo.stopping_criterion.ScoreValue(score: float)[source]#

Bases: object

Exit if the score is greater than or equal to the threshold.

Parameters:

score (float) – The threshold for exiting the tuning process. If a trial value is greater or equal to score, process exits.

Returns:

ScoreValue object

Return type:

ScoreValue

class ads.hpo.stopping_criterion.TimeBudget(seconds: float)[source]#

Bases: object

Exit based on the number of seconds.

Parameters:

seconds (float) – Time limit, in seconds. If None there is no time limit.

Returns:

TimeBudget object

Return type:

TimeBudget

ads.hpo.tuner_artifact module#

class ads.hpo.tuner_artifact.DownloadTunerArtifact(file_uri, target_file_path=None, auth=None)[source]#

Bases: object

Download the tuner artifact from the cloud and deserialize the tuner args

deserialize_tuner_args(file_path_dict)[source]#

deserialize the tuner args

Parameters:

file_path_dict (dict) – dict which contains the path of different files

download_from_cloud()[source]#

Download the artifact and unpack the arhchive at the target file path

extract_tuner_args(delete_zip_file=False)[source]#

deserialize tuner argument from the zip file

static load_model(script_path)[source]#
static load_scoring(script_path)[source]#
static load_target_from_script(script_path)[source]#
exception ads.hpo.tuner_artifact.NotPickableError(message)[source]#

Bases: Exception

class ads.hpo.tuner_artifact.UploadTunerArtifact(tuner, file_uri, metadata, auth=None)[source]#

Bases: object

json_serialize_tuner_args(script_dict)[source]#

json serialize the tuner args

prepare_tuner_artifact(script_dict)[source]#

zip and save all the tuner files

Parameters:

script_dict (dict) – dict which contains the script names and path

upload(script_dict)[source]#
upload_to_cloud(tuner_zip)[source]#
ads.hpo.tuner_artifact.get_supported_model_mappings()[source]#

ads.hpo.utils module#

ads.hpo.validation module#

ads.hpo.validation.assert_is_estimator(estimator)[source]#
ads.hpo.validation.assert_model_is_supported(estimator)[source]#
ads.hpo.validation.assert_strategy_valid(param_distributions, new_strategy, old_strategy=None)[source]#
ads.hpo.validation.assert_tuner_is_fitted(estimator, msg=None)[source]#
ads.hpo.validation.validate_fit_params(X: TwoDimArrayLikeType, fit_params: Dict, indices: OneDimArrayLikeType) Dict[source]#
ads.hpo.validation.validate_params_for_plot(params, param_distributions)[source]#
ads.hpo.validation.validate_pipeline(model)[source]#
ads.hpo.validation.validate_search_space(params, param_distributions)[source]#

Module contents#