Source code for cornac.models.trirank.recom_trirank

# Copyright 2018 The Cornac Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

import numpy as np
from scipy.sparse import csr_matrix
from tqdm.auto import tqdm

from ..recommender import Recommender
from ...utils import get_rng
from ...utils.init_utils import uniform
from ...exception import ScoreException


EPS = 1e-10


[docs] class TriRank(Recommender): """TriRank: Review-aware Explainable Recommendation by Modeling Aspects. Parameters ---------- name: string, optional, default: 'TriRank' The name of the recommender model. alpha: float, optional, default: 1 The weight of smoothness on user-item relation beta: float, optional, default: 1 The weight of smoothness on item-aspect relation gamma: float, optional, default: 1 The weight of smoothness on user-aspect relation eta_U: float, optional, default: 1 The weight of fitting constraint on users eta_P: float, optional, default: 1 The weight of fitting constraint on items eta_A: float, optional, default: 1 The weight of fitting constraint on aspects max_iter: int, optional, default: 100 Maximum number of iterations to stop online training. If set to `max_iter=-1`, \ the online training will stop when model parameters are converged. trainable: boolean, optional, default: True When False, the model is not trained and Cornac assumes that the model already \ pre-trained (R, X, Y, p, a, u are not None). verbose: boolean, optional, default: False When True, running logs are displayed. init_params: dictionary, optional, default: None List of initial parameters, e.g., init_params = {'R':R, 'X':X, 'Y':Y, 'p':p, 'a':a, 'u':u} R: csr_matrix, shape (n_users, n_items) The symmetric normalized of edge weight matrix of user-item relation, optional initialization via init_params X: csr_matrix, shape (n_users, n_aspects) The symmetric normalized of edge weight matrix of user-aspect relation, optional initialization via init_params Y: csr_matrix, shape (n_items, n_aspects) The symmetric normalized of edge weight matrix of item-aspect relation, optional initialization via init_params p: ndarray, shape (n_items,) Initialized item weights, optional initialization via init_params a: ndarray, shape (n_aspects,) Initialized aspect weights, optional initialization via init_params u: ndarray, shape (n_aspects,) Initialized user weights, optional initialization via init_params seed: int, optional, default: None Random seed for parameters initialization. References ---------- He, Xiangnan, Tao Chen, Min-Yen Kan, and Xiao Chen. 2014. \ TriRank: Review-aware Explainable Recommendation by Modeling Aspects. \ In the 24th ACM international on conference on information and knowledge management (CIKM'15). \ ACM, New York, NY, USA, 1661-1670. DOI: https://doi.org/10.1145/2806416.2806504 """ def __init__( self, name="TriRank", alpha=1, beta=1, gamma=1, eta_U=1, eta_P=1, eta_A=1, max_iter=100, verbose=True, init_params=None, seed=None, ): super().__init__(name) self.alpha = alpha self.beta = beta self.gamma = gamma self.eta_U = eta_U self.eta_P = eta_P self.eta_A = eta_A self.max_iter = max_iter self.verbose = verbose self.seed = seed self.rng = get_rng(seed) # Init params if provided self.init_params = {} if init_params is None else init_params self.R = self.init_params.get("R", None) self.X = self.init_params.get("X", None) self.Y = self.init_params.get("Y", None) self.p = self.init_params.get("p", None) self.a = self.init_params.get("a", None) self.u = self.init_params.get("u", None) def _init(self, train_set): # Initialize user, item and aspect rank. if self.p is None: self.p = uniform(train_set.num_items, random_state=self.rng) if self.a is None: self.a = uniform(train_set.sentiment.num_aspects, random_state=self.rng) if self.u is None: self.u = uniform(train_set.num_users, random_state=self.rng) def _symmetrical_normalization(self, matrix: csr_matrix): row = [] col = [] data = [] row_norm = np.sqrt(matrix.sum(axis=1).A1) col_norm = np.sqrt(matrix.sum(axis=0).A1) for i, j in zip(*matrix.nonzero()): row.append(i) col.append(j) data.append(matrix[i, j] / (row_norm[i] * col_norm[j])) return csr_matrix((data, (row, col)), shape=matrix.shape) def _create_matrices(self, train_set): from time import time self.r_mat = train_set.csr_matrix start_time = time() if self.verbose: print("Building matrices started!") sentiment_modality = train_set.sentiment n_users = train_set.num_users n_items = train_set.num_items n_aspects = sentiment_modality.num_aspects X_row = [] X_col = [] X_data = [] Y_row = [] Y_col = [] Y_data = [] for uid, isid in tqdm( sentiment_modality.user_sentiment.items(), disable=not self.verbose, desc="Building matrices", ): for iid, sid in isid.items(): aos = sentiment_modality.sentiment[sid] aids = set(aid for aid, _, _ in aos) # Only one per review/sid for aid in aids: X_row.append(iid) X_col.append(aid) X_data.append(1) Y_row.append(uid) Y_col.append(aid) Y_data.append(1) # Algorithm 1: Offline training line 2 X = csr_matrix((X_data, (X_row, X_col)), shape=(n_items, n_aspects)) Y = csr_matrix((Y_data, (Y_row, Y_col)), shape=(n_users, n_aspects)) # Algorithm 1: Offline training line 3 X.data = np.log2(X.data) + 1 Y.data = np.log2(Y.data) + 1 # Algorithm 1: Offline training line 4 if self.verbose: print("Building symmetric normalized matrices R, X, Y") self.R = self._symmetrical_normalization(train_set.csr_matrix) self.X = self._symmetrical_normalization(X) self.Y = self._symmetrical_normalization(Y) if self.verbose: total_time = time() - start_time print("Building matrices completed in %d s" % total_time)
[docs] def fit(self, train_set, val_set=None): """Fit the model to observations. Parameters ---------- train_set: :obj:`cornac.data.Dataset`, required User-Item preference data as well as additional modalities. val_set: :obj:`cornac.data.Dataset`, optional, default: None User-Item preference data for model selection purposes (e.g., early stopping). Returns ------- self : object """ Recommender.fit(self, train_set, val_set) self._init(train_set) if not self.trainable: return self # Offline training: Build item-aspect matrix X and user-aspect matrix Y self._create_matrices(train_set) return self
def _online_recommendation(self, user): # Algorithm 1: Online recommendation line 5 p_0 = self.r_mat[[user]] p_0.data.fill(1) p_0 = p_0.toarray().squeeze() a_0 = self.Y[user].toarray().squeeze() u_0 = np.zeros(self.r_mat.shape[0]) u_0[user] = 1 # Algorithm 1: Online training line 6 if p_0.any(): p_0 /= np.linalg.norm(p_0, 1) if a_0.any(): a_0 /= np.linalg.norm(a_0, 1) if u_0.any(): u_0 /= np.linalg.norm(u_0, 1) # Algorithm 1: Online recommendation line 7 p = self.p.copy() a = self.a.copy() u = self.u.copy() # Algorithm 1: Online recommendation line 8 prev_p = p prev_a = a prev_u = u inc = 1 while True: # eq. 4 u_denominator = self.alpha + self.gamma + self.eta_U + EPS u = ( self.alpha / u_denominator * self.R * p + self.gamma / u_denominator * self.Y * a + self.eta_U / u_denominator * u_0 ).squeeze() p_denominator = self.alpha + self.beta + self.eta_P + EPS p = ( self.alpha / p_denominator * self.R.T * u + self.beta / p_denominator * self.X * a + self.eta_P / p_denominator * p_0 ).squeeze() a_denominator = self.gamma + self.beta + self.eta_A + EPS a = ( self.gamma / a_denominator * self.Y.T * u + self.beta / a_denominator * self.X.T * p + self.eta_P / a_denominator * a_0 ).squeeze() if (self.max_iter > 0 and inc > self.max_iter) or ( np.all(np.isclose(u, prev_u)) and np.all(np.isclose(p, prev_p)) and np.all(np.isclose(a, prev_a)) ): # stop when converged break prev_p, prev_a, prev_u = p, a, u inc += 1 # Algorithm 1: Online recommendation line 9 return p, a, u
[docs] def score(self, user_idx, item_idx=None): """Predict the scores/ratings of a user for an item. Parameters ---------- user_idx: int, required The index of the user for whom to perform score prediction. item_idx: int, optional, default: None The index of the item for which to perform score prediction. If None, scores for all known items will be returned. Returns ------- res : A scalar or a Numpy array Relative scores that the user gives to the item or to all known items """ if self.is_unknown_user(user_idx): raise ScoreException("Can't make score prediction for user %d" % user_idx) if item_idx is not None and self.is_unknown_item(item_idx): raise ScoreException("Can't make score prediction for item %d" % item_idx) item_scores, *_ = self._online_recommendation(user_idx) # Set already rated items to zero. item_scores[self.r_mat[user_idx].indices] = 0 # Scale to match rating scale. item_scores = ( item_scores * (self.max_rating - self.min_rating) / max(item_scores) + self.min_rating ) return item_scores if item_idx is None else item_scores[item_idx]