Source code for physlearn.supervised.utils._search

"""
The :mod:`physlearn.supervised.model_selection.search` module provides
basic utilities for automated (hyper)parameter search.
"""

# Author: Alex Wozniakowski
# License: MIT

from __future__ import annotations

import typing

import numpy as np

import bayes_opt
import sklearn.model_selection

from physlearn.supervised.utils._estimator_checks import _check_bayesoptcv_param_type
from physlearn.supervised.utils._definition import _SEARCH_METHOD

search_method = typing.Union[sklearn.model_selection.GridSearchCV,
                             sklearn.model_selection.RandomizedSearchCV,
                             bayes_opt.BayesianOptimization]


def _bayesoptcv(X, y, estimator, search_params, cv,
                scoring, n_jobs, verbose, random_state,
                init_points, n_iter):

    def regressor_cross_val_mean(**pbounds):
        estimator.set_params(**_check_bayesoptcv_param_type(pbounds=pbounds))
        cross_val = sklearn.model_selection.cross_val_score(estimator=estimator,
                                                            X=X, y=y,
                                                            scoring=scoring,
                                                            cv=cv,
                                                            n_jobs=n_jobs)
        return cross_val.mean()

    search = bayes_opt.BayesianOptimization(f=regressor_cross_val_mean,
                                            pbounds=search_params,
                                            verbose=verbose,
                                            random_state=random_state)

    search.maximize(init_points=init_points, n_iter=n_iter)

    return search


[docs]def _search_method(search_method: str, pipeline: ModifiedPipeline,
                   search_params: dict, scoring: str, refit=True,
                   n_jobs=-1, cv=None, verbose=0, pre_dispatch='2*n_jobs',
                   error_score=np.nan, return_train_score=None,
                   randomizedcv_n_iter=None, X=None, y=None,
                   random_state=None, init_points=None,
                   bayesoptcv_n_iter=None) -> search_method:
    """Helper (hyper)parameter search function.

    Parameters
    ----------
    search_method : str
        Specifies the search method. If ``'gridsearchcv'``, ``'randomizedsearchcv'``,
        or ``'bayesoptcv'`` then the search method is GridSearchCV, RandomizedSearchCV,
        or Bayesian Optimization.

    pipeline : ModifiedPipeline
        A ModifiedPipeline object.

    search_params : dict
        Dictionary with (hyper)parameter names as keys, and either lists of
        (hyper)parameter settings to try as values or tuples of (hyper)parameter
        lower and upper bounds to try as values.

    scoring : str, callable, list/tuple, or dict, optional (default='neg_mean_absolute_error')
        Determines scoring in the k-fold cross-validation methods.

    refit : bool, optional (default=True)
        Determines whether to return the refit ModifiedPipeline object.

    n_jobs : int or None, optional (default=-1)
        The number of jobs to run in parallel in GridSearchCV and RandomizedSearchCV.

    cv : int, cross-validation generator, an iterable, or None, optional (default=None)
        Determines the cross-validation strategy. If None, then the default
        is 5-fold cross-validation.

    verbose : int, optional (default=0)
        Determines verbosity.

    pre_dispatch : int or str, optional (default='2*n_jobs')
        Controls the number of jobs that get dispatched during parallel execution in
        GridSearchCV and RandomizedSearchCV.

    error_score : 'raise' or numeric, optional (default=np.nan)
        The assigned value if an error occurs while inducing a regressor.
        If set to 'raise', then the specific error is raised. Else if set
        to a numeric value, then FitFailedWarning is raised in GridSearchCV
        and RandomizedSearchCV.

    return_train_score : bool or None, optional (default=None)
        Determines whether to return the training scores from the k-fold
        cross-validation methods in GridSearchCV and RandomizedSearchCV.

    randomizedcv_n_iter : int or None, optional (default=None)
        Determines the number of (hyper)parameter settings that are
        sampled in RandomizedSearchCV.

    X : array-like of shape = [n_samples, n_features] or None, optional (default=None)
            The design matrix, where each row corresponds to an example and the
            column(s) correspond to the feature(s). Used in Bayesian Optimization.

    y : array-like of shape = [n_samples] or shape = [n_samples, n_targets] or None, optional (default=None)
        The target matrix, where each row corresponds to an example and the
        column(s) correspond to the single-target(s). Used in Bayesian Optimization.

    random_state : int, RandomState instance, or None, optional (default=0)
        Determines the random number generation in Bayesian Optimization.

    init_points : int or None, optional (default=None)
        Determines the number of random exploration steps in Bayesian
        Optimization. Increasing the number corresponds to diversifying
        the exploration space.

    bayesoptcv_n_iter : int or None, optional (default=None)
        Determines the number of optimization steps in in Bayesian
        Optimization.
    """

    assert search_method in _SEARCH_METHOD

    if search_method == 'gridsearchcv':
        search = sklearn.model_selection.GridSearchCV(estimator=pipeline,
                                                      param_grid=search_params,
                                                      scoring=scoring,
                                                      refit=refit,
                                                      n_jobs=n_jobs,
                                                      cv=cv,
                                                      verbose=verbose,
                                                      pre_dispatch=pre_dispatch,
                                                      error_score=error_score,
                                                      return_train_score=return_train_score)
    elif search_method == 'randomizedsearchcv':
        search = sklearn.model_selection.RandomizedSearchCV(estimator=pipeline,
                                                            param_distributions=search_params,
                                                            n_iter=randomizedcv_n_iter,
                                                            scoring=scoring,
                                                            refit=refit,
                                                            n_jobs=n_jobs,
                                                            cv=cv,
                                                            verbose=verbose,
                                                            pre_dispatch=pre_dispatch,
                                                            error_score=error_score,
                                                            return_train_score=return_train_score)
    elif search_method == 'bayesoptcv':
        search = _bayesoptcv(X=X, y=y,
                             estimator=pipeline,
                             search_params=search_params,
                             cv=cv,
                             scoring=scoring,
                             n_jobs=n_jobs,
                             verbose=verbose,
                             random_state=random_state,
                             init_points=init_points,
                             n_iter=bayesoptcv_n_iter)
    else:
        raise KeyError('The search method: %s is not a recognized option. '
                       % (search_method))

    return search
Table Of Contents

Source code for physlearn.supervised.utils._search