# Copyright 2018 The Cornac Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
from itertools import product
from .models import Recommender
from .metrics import RatingMetric, RankingMetric
from .eval_methods import rating_eval, ranking_eval
from .utils import get_rng
__all__ = ["Discrete", "Continuous", "GridSearch", "RandomSearch"]
class SearchDomain(object):
"""Domain of a parameter to search on.
Parameters
----------------
name: str, required
Name of the parameter.
"""
def __init__(self, name):
self.name = name
def _sample(self, rng):
"""Sample a value of parameter used for RandomSearch"""
raise NotImplementedError()
[docs]
class Discrete(SearchDomain):
"""Domain of a parameter with a set of discrete values.
Parameters
----------------
name: str, required
Name of the parameter.
values: list, required
List of values to be searched.
"""
def __init__(self, name, values):
super().__init__(name=name)
self.values = values
def _sample(self, rng):
"""Sample a value of parameter used for RandomSearch"""
return rng.choice(self.values)
[docs]
class Continuous(SearchDomain):
"""Domain of a parameter with continuous values within a range of [low, high).
Parameters
----------------
name: str, required
Name of the parameter.
low: float, default: 0.0
Lower bound of the searched values (included).
high: float, default: 1.0
Upper bound of the searched values (excluded).
"""
def __init__(self, name, low=0.0, high=1.0):
super().__init__(name=name)
self.low = low
self.high = high
def _sample(self, rng):
"""Sample a value of parameter used for RandomSearch"""
return rng.uniform(low=self.low, high=self.high)
class BaseSearch(Recommender):
"""Base class for doing parameter search.
Parameters
----------------
model: :obj:`cornac.models.Recommender`, required
Base recommender model to be tuned.
space: list, required
Parameter space to be searched on.
It's a list of :obj:`cornac.hyperopt.SearchDomain`.
metric: :obj:`cornac.metrics.RatingMetric` or :obj:`cornac.metrics.RankingMetric`, required
Scoring metric to measure the performance and rank the parameter settings.
eval_method: :obj:`cornac.eval_methods.BaseMethod`, required
Evaluation method is being used.
name: str, default: 'BaseSearch'
The name of the searching strategy.
"""
def __init__(self, model, space, metric, eval_method, name="BaseSearch"):
super().__init__(name=name, verbose=model.verbose)
self.model = model
self.space = sorted(space, key=lambda x: x.name) # for reproducibility
self.metric = metric
self.eval_method = eval_method
def _build_param_set(self):
"""Generate searching points"""
raise NotImplementedError()
def fit(self, train_set, val_set=None):
"""Doing hyper-parameter search"""
assert val_set is not None
Recommender.fit(self, train_set, val_set)
param_set = self._build_param_set()
compare_op = np.greater if self.metric.higher_better else np.less
self.best_score = -np.inf if self.metric.higher_better else np.inf
self.best_model = None
self.best_params = None
# this can be parallelized if needed
# keep it simple because multimodal algorithms are usually resource-hungry
for params in param_set:
if self.verbose:
print("Evaluating: {}".format(params))
model = self.model.clone(params).fit(train_set, val_set)
if isinstance(self.metric, RatingMetric):
score = rating_eval(model, [self.metric], val_set)[0][0]
else:
score = ranking_eval(
model,
[self.metric],
train_set,
val_set,
rating_threshold=self.eval_method.rating_threshold,
exclude_unknowns=self.eval_method.exclude_unknowns,
verbose=False,
)[0][0]
if compare_op(score, self.best_score):
self.best_score = score
self.best_model = model
self.best_params = params
del model
if self.verbose:
print("Best parameter settings: {}".format(self.best_params))
print("{} = {:.4f}".format(self.metric.name, self.best_score))
return self
def score(self, user_idx, item_idx=None):
"""Scoring using the best searched model"""
return self.best_model.score(user_idx, item_idx)
[docs]
class GridSearch(BaseSearch):
"""Parameter searching on a grid.
Parameters
----------------
model: :obj:`cornac.models.Recommender`, required
Base recommender model to be tuned.
space: list, required
Parameter space to be searched on.
It's a list of :obj:`cornac.hyperopt.SearchDomain`.
metric: :obj:`cornac.metrics.RatingMetric` or :obj:`cornac.metrics.RankingMetric`, required
Scoring metric to measure the performance and rank the parameter settings.
eval_method: :obj:`cornac.eval_methods.BaseMethod`, required
Evaluation method is being used.
"""
def __init__(self, model, space, metric, eval_method):
super().__init__(
model,
self._validate(space),
metric,
eval_method,
name="GridSearch_{}".format(model.name),
)
@staticmethod
def _validate(space):
"""GridSearch only accepts Discrete search domain"""
for domain in space:
if isinstance(domain, Discrete):
continue
raise ValueError(
"GridSearch only supports Discrete domain but {} is not!\n\
Please consider using RandomSearch instead.".format(
domain.name
)
)
return space
def _build_param_set(self):
"""Generate searching points"""
param_set = []
keys = [d.name for d in self.space]
for params in product(*[sorted(d.values) for d in self.space]):
param_set.append(dict(zip(keys, params)))
return param_set
[docs]
class RandomSearch(BaseSearch):
"""Parameter searching with random strategy.
Parameters
----------------
model: :obj:`cornac.models.Recommender`, required
Base recommender model to be tuned.
space: list, required
Parameter space to be searched on.
It's a list of :obj:`cornac.hyperopt.SearchDomain`.
metric: :obj:`cornac.metrics.RatingMetric` or :obj:`cornac.metrics.RankingMetric`, required
Scoring metric to measure the performance and rank the parameter settings.
eval_method: :obj:`cornac.eval_methods.BaseMethod`, required
Evaluation method is being used.
n_trails: int, default: 10
Number of trails for random search.
"""
def __init__(self, model, space, metric, eval_method, n_trails=10):
super().__init__(
model, space, metric, eval_method, name="RandomSearch_{}".format(model.name)
)
self.n_trails = n_trails
def _build_param_set(self):
"""Generate searching points"""
param_set = []
keys = [d.name for d in self.space]
rng = get_rng(self.model.seed)
while len(param_set) < self.n_trails:
params = [d._sample(rng) for d in self.space]
param_set.append(dict(zip(keys, params)))
return param_set