Source code for cornac.models.recommender

# Copyright 2018 The Cornac Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

import os
import copy
import inspect
import pickle
from glob import glob
from datetime import datetime

import numpy as np

from ..exception import ScoreException
from ..utils.common import intersects, clip


[docs]class Recommender: """Generic class for a recommender model. All recommendation models should inherit from this class Parameters ---------------- name: str, required The name of the recommender model trainable: boolean, optional, default: True When False, the model is not trainable """ def __init__(self, name, trainable=True, verbose=False): self.name = name self.trainable = trainable self.verbose = verbose self.train_set = None self.val_set = None # attributes to be ignored when being saved self.ignored_attrs = ["train_set", "val_set"] def reset_info(self): self.best_value = -np.Inf self.best_epoch = 0 self.current_epoch = 0 self.stopped_epoch = 0 self.wait = 0 def __deepcopy__(self, memo): cls = self.__class__ result = cls.__new__(cls) for k, v in self.__dict__.items(): if k in self.ignored_attrs: continue setattr(result, k, copy.deepcopy(v)) return result @classmethod def _get_init_params(cls): """Get initial parameters from the model constructor""" init = getattr(cls.__init__, "deprecated_original", cls.__init__) if init is object.__init__: return [] init_signature = inspect.signature(init) parameters = [p for p in init_signature.parameters.values() if p.name != "self"] return sorted([p.name for p in parameters])
[docs] def clone(self, new_params=None): """Clone an instance of the model object. Parameters ---------- new_params: dict, optional, default: None New parameters for the cloned instance. Returns ------- object: :obj:`cornac.models.Recommender` """ new_params = {} if new_params is None else new_params init_params = {} for name in self._get_init_params(): init_params[name] = new_params.get(name, copy.deepcopy(getattr(self, name))) return self.__class__(**init_params)
[docs] def save(self, save_dir=None): """Save a recommender model to the filesystem. Parameters ---------- save_dir: str, default: None Path to a directory for the model to be stored. Returns ------- model_file : str Path to the model file stored on the filesystem. """ if save_dir is None: return model_dir = os.path.join(save_dir, self.name) os.makedirs(model_dir, exist_ok=True) timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f") model_file = os.path.join(model_dir, "{}.pkl".format(timestamp)) saved_model = copy.deepcopy(self) pickle.dump( saved_model, open(model_file, "wb"), protocol=pickle.HIGHEST_PROTOCOL ) if self.verbose: print("{} model is saved to {}".format(self.name, model_file)) return model_file
[docs] @staticmethod def load(model_path, trainable=False): """Load a recommender model from the filesystem. Parameters ---------- model_path: str, required Path to a file or directory where the model is stored. If a directory is provided, the latest model will be loaded. trainable: boolean, optional, default: False Set it to True if you would like to finetune the model. By default, the model parameters are assumed to be fixed after being loaded. Returns ------- self : object """ if os.path.isdir(model_path): model_file = sorted(glob("{}/*.pkl".format(model_path)))[-1] else: model_file = model_path model = pickle.load(open(model_file, "rb")) model.trainable = trainable model.load_from = model_file # for further loading return model
[docs] def fit(self, train_set, val_set=None): """Fit the model to observations. Parameters ---------- train_set: :obj:`cornac.data.Dataset`, required User-Item preference data as well as additional modalities. val_set: :obj:`cornac.data.Dataset`, optional, default: None User-Item preference data for model selection purposes (e.g., early stopping). Returns ------- self : object """ self.reset_info() self.train_set = train_set.reset() self.val_set = None if val_set is None else val_set.reset() return self
[docs] def score(self, user_idx, item_idx=None): """Predict the scores/ratings of a user for an item. Parameters ---------- user_idx: int, required The index of the user for whom to perform score prediction. item_idx: int, optional, default: None The index of the item for that to perform score prediction. If None, scores for all known items will be returned. Returns ------- res : A scalar or a Numpy array Relative scores that the user gives to the item or to all known items """ raise NotImplementedError("The algorithm is not able to make score prediction!")
[docs] def default_score(self): """Overwrite this function if your algorithm has special treatment for cold-start problem """ return self.train_set.global_mean
[docs] def rate(self, user_idx, item_idx, clipping=True): """Give a rating score between pair of user and item Parameters ---------- user_idx: int, required The index of the user for whom to perform item raking. item_idx: int, required The index of the item to be rated by the user. clipping: bool, default: True Whether to clip the predicted rating value. Returns ------- A scalar A rating score of the user for the item """ try: rating_pred = self.score(user_idx, item_idx) except ScoreException: rating_pred = self.default_score() if clipping: rating_pred = clip( values=rating_pred, lower_bound=self.train_set.min_rating, upper_bound=self.train_set.max_rating, ) return rating_pred
[docs] def rank(self, user_idx, item_indices=None): """Rank all test items for a given user. Parameters ---------- user_idx: int, required The index of the user for whom to perform item raking. item_indices: 1d array, optional, default: None A list of candidate item indices to be ranked by the user. If `None`, list of ranked known item indices and their scores will be returned. ASSUMPTION: list of item indices are continuous from 0 to len(item_indices). Returns ------- Tuple of `item_rank`, and `item_scores`. The order of values in item_scores are corresponding to the order of their ids in item_ids """ # obtain item scores from the model try: known_item_scores = self.score(user_idx) except ScoreException: known_item_scores = ( np.ones(self.train_set.total_items) * self.default_score() ) # check if the returned scores also cover unknown items # if not, all unknown items will be given the MIN score if len(known_item_scores) == self.train_set.total_items: all_item_scores = known_item_scores else: all_item_scores = np.ones(self.train_set.total_items) * np.min( known_item_scores ) all_item_scores[: self.train_set.num_items] = known_item_scores # rank items based on their scores if item_indices is None: item_scores = all_item_scores[: self.train_set.num_items] item_rank = item_scores.argsort()[::-1] else: item_scores = all_item_scores[: len(item_indices)] item_rank = item_scores.argsort()[::-1] item_scores = item_scores[item_indices] return item_rank, item_scores
[docs] def monitor_value(self): """Calculating monitored value used for early stopping on validation set (`val_set`). This function will be called by `early_stop()` function. Note: `val_set` could be `None` thus it needs to be checked before usage. Returns ------- :raise NotImplementedError """ raise NotImplementedError()
[docs] def early_stop(self, min_delta=0.0, patience=0): """Check if training should be stopped when validation loss has stopped improving. Parameters ---------- min_delta: float, optional, default: 0. The minimum increase in monitored value on validation set to be considered as improvement, i.e. an increment of less than `min_delta` will count as no improvement. patience: int, optional, default: 0 Number of epochs with no improvement after which training should be stopped. Returns ------- res : bool Return `True` if model training should be stopped (no improvement on validation set), otherwise return `False`. """ self.current_epoch += 1 current_value = self.monitor_value() if current_value is None: return False if np.greater_equal(current_value - self.best_value, min_delta): self.best_value = current_value self.best_epoch = self.current_epoch self.wait = 0 else: self.wait += 1 if self.wait >= patience: self.stopped_epoch = self.current_epoch if self.stopped_epoch > 0: print("Early stopping:") print( "- best epoch = {}, stopped epoch = {}".format( self.best_epoch, self.stopped_epoch ) ) print( "- best monitored value = {:.6f} (delta = {:.6f})".format( self.best_value, current_value - self.best_value ) ) return True return False