Source code for cornac.hyperopt

# Copyright 2018 The Cornac Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================


import numpy as np
from itertools import product

from .models import Recommender
from .metrics import RatingMetric, RankingMetric
from .eval_methods import rating_eval, ranking_eval
from .utils import get_rng


__all__ = ["Discrete", "Continuous", "GridSearch", "RandomSearch"]


class SearchDomain(object):
    """Domain of a parameter to search on.
    
    Parameters
    ----------------
    name: str, required
        Name of the parameter.

    """

    def __init__(self, name):
        self.name = name

    def _sample(self, rng):
        """Sample a value of parameter used for RandomSearch"""
        raise NotImplementedError()


[docs]class Discrete(SearchDomain): """Domain of a parameter with a set of discrete values. Parameters ---------------- name: str, required Name of the parameter. values: list, required List of values to be searched. """ def __init__(self, name, values): super().__init__(name=name) self.values = values def _sample(self, rng): """Sample a value of parameter used for RandomSearch""" return rng.choice(self.values)
[docs]class Continuous(SearchDomain): """Domain of a parameter with continuous values within a range of [low, high). Parameters ---------------- name: str, required Name of the parameter. low: float, default: 0.0 Lower bound of the searched values (included). high: float, default: 1.0 Upper bound of the searched values (excluded). """ def __init__(self, name, low=0.0, high=1.0): super().__init__(name=name) self.low = low self.high = high def _sample(self, rng): """Sample a value of parameter used for RandomSearch""" return rng.uniform(low=self.low, high=self.high)
class BaseSearch(Recommender): """Base class for doing parameter search. Parameters ---------------- model: :obj:`cornac.models.Recommender`, required Base recommender model to be tuned. space: list, required Parameter space to be searched on. It's a list of :obj:`cornac.hyperopt.SearchDomain`. metric: :obj:`cornac.metrics.RatingMetric` or :obj:`cornac.metrics.RankingMetric`, required Scoring metric to measure the performance and rank the parameter settings. eval_method: :obj:`cornac.eval_methods.BaseMethod`, required Evaluation method is being used. name: str, default: 'BaseSearch' The name of the searching strategy. """ def __init__(self, model, space, metric, eval_method, name="BaseSearch"): super().__init__(name=name, verbose=model.verbose) self.model = model self.space = sorted(space, key=lambda x: x.name) # for reproducibility self.metric = metric self.eval_method = eval_method def _build_param_set(self): """Generate searching points""" raise NotImplementedError() def fit(self, train_set, val_set=None): """Doing hyper-parameter search""" assert val_set is not None Recommender.fit(self, train_set, val_set) param_set = self._build_param_set() compare_op = np.greater if self.metric.higher_better else np.less self.best_score = -np.inf if self.metric.higher_better else np.inf self.best_model = None self.best_params = None # this can be parallelized if needed # keep it simple because multimodal algorithms are usually resource-hungry for params in param_set: if self.verbose: print("Evaluating: {}".format(params)) model = self.model.clone(params).fit(train_set, val_set) if isinstance(self.metric, RatingMetric): score = rating_eval(model, [self.metric], val_set)[0][0] else: score = ranking_eval( model, [self.metric], train_set, val_set, rating_threshold=self.eval_method.rating_threshold, exclude_unknowns=self.eval_method.exclude_unknowns, verbose=False, )[0][0] if compare_op(score, self.best_score): self.best_score = score self.best_model = model self.best_params = params del model if self.verbose: print("Best parameter settings: {}".format(self.best_params)) print("{} = {:.4f}".format(self.metric.name, self.best_score)) return self def score(self, user_idx, item_idx=None): """Scoring using the best searched model""" return self.best_model.score(user_idx, item_idx)
[docs]class GridSearch(BaseSearch): """Parameter searching on a grid. Parameters ---------------- model: :obj:`cornac.models.Recommender`, required Base recommender model to be tuned. space: list, required Parameter space to be searched on. It's a list of :obj:`cornac.hyperopt.SearchDomain`. metric: :obj:`cornac.metrics.RatingMetric` or :obj:`cornac.metrics.RankingMetric`, required Scoring metric to measure the performance and rank the parameter settings. eval_method: :obj:`cornac.eval_methods.BaseMethod`, required Evaluation method is being used. """ def __init__(self, model, space, metric, eval_method): super().__init__( model, self._validate(space), metric, eval_method, name="GridSearch_{}".format(model.name), ) @staticmethod def _validate(space): """GridSearch only accepts Discrete search domain""" for domain in space: if isinstance(domain, Discrete): continue raise ValueError( "GridSearch only supports Discrete domain but {} is not!\n\ Please consider using RandomSearch instead.".format( domain.name ) ) return space def _build_param_set(self): """Generate searching points""" param_set = [] keys = [d.name for d in self.space] for params in product(*[sorted(d.values) for d in self.space]): param_set.append(dict(zip(keys, params))) return param_set
[docs]class RandomSearch(BaseSearch): """Parameter searching with random strategy. Parameters ---------------- model: :obj:`cornac.models.Recommender`, required Base recommender model to be tuned. space: list, required Parameter space to be searched on. It's a list of :obj:`cornac.hyperopt.SearchDomain`. metric: :obj:`cornac.metrics.RatingMetric` or :obj:`cornac.metrics.RankingMetric`, required Scoring metric to measure the performance and rank the parameter settings. eval_method: :obj:`cornac.eval_methods.BaseMethod`, required Evaluation method is being used. n_trails: int, default: 10 Number of trails for random search. """ def __init__(self, model, space, metric, eval_method, n_trails=10): super().__init__( model, space, metric, eval_method, name="RandomSearch_{}".format(model.name) ) self.n_trails = n_trails def _build_param_set(self): """Generate searching points""" param_set = [] keys = [d.name for d in self.space] rng = get_rng(self.model.seed) while len(param_set) < self.n_trails: params = [d._sample(rng) for d in self.space] param_set.append(dict(zip(keys, params))) return param_set