Source code for cornac.models.trirank.recom_trirank

# Copyright 2018 The Cornac Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

import numpy as np
from scipy.sparse import csr_matrix
from tqdm.auto import tqdm

from ..recommender import Recommender
from ...utils import get_rng
from ...utils.init_utils import uniform
from ...exception import ScoreException


EPS = 1e-10



[docs]
class TriRank(Recommender):
    """TriRank: Review-aware Explainable Recommendation by Modeling Aspects.

    Parameters
    ----------
    name: string, optional, default: 'TriRank'
        The name of the recommender model.

    alpha: float, optional, default: 1
        The weight of smoothness on user-item relation

    beta: float, optional, default: 1
        The weight of smoothness on item-aspect relation

    gamma: float, optional, default: 1
        The weight of smoothness on user-aspect relation

    eta_U: float, optional, default: 1
        The weight of fitting constraint on users

    eta_P: float, optional, default: 1
        The weight of fitting constraint on items

    eta_A: float, optional, default: 1
        The weight of fitting constraint on aspects

    max_iter: int, optional, default: 100
        Maximum number of iterations to stop online training. If set to `max_iter=-1`, \
        the online training will stop when model parameters are converged.

    trainable: boolean, optional, default: True
        When False, the model is not trained and Cornac assumes that the model already \
        pre-trained (R, X, Y, p, a, u are not None).

    verbose: boolean, optional, default: False
        When True, running logs are displayed.

    init_params: dictionary, optional, default: None
        List of initial parameters, e.g., init_params = {'R':R, 'X':X, 'Y':Y, 'p':p, 'a':a, 'u':u}

        R: csr_matrix, shape (n_users, n_items)
            The symmetric normalized of edge weight matrix of user-item relation, optional initialization via init_params

        X: csr_matrix, shape (n_users, n_aspects)
            The symmetric normalized of edge weight matrix of user-aspect relation, optional initialization via init_params

        Y: csr_matrix, shape (n_items, n_aspects)
            The symmetric normalized of edge weight matrix of item-aspect relation, optional initialization via init_params

        p: ndarray, shape (n_items,)
            Initialized item weights, optional initialization via init_params

        a: ndarray, shape (n_aspects,)
            Initialized aspect weights, optional initialization via init_params

        u: ndarray, shape (n_aspects,)
            Initialized user weights, optional initialization via init_params

    seed: int, optional, default: None
        Random seed for parameters initialization.

    References
    ----------
    He, Xiangnan, Tao Chen, Min-Yen Kan, and Xiao Chen. 2014. \
    TriRank: Review-aware Explainable Recommendation by Modeling Aspects. \
    In the 24th ACM international on conference on information and knowledge management (CIKM'15). \
    ACM, New York, NY, USA, 1661-1670. DOI: https://doi.org/10.1145/2806416.2806504
    """

    def __init__(
        self,
        name="TriRank",
        alpha=1,
        beta=1,
        gamma=1,
        eta_U=1,
        eta_P=1,
        eta_A=1,
        max_iter=100,
        verbose=True,
        init_params=None,
        seed=None,
    ):
        super().__init__(name)
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.eta_U = eta_U
        self.eta_P = eta_P
        self.eta_A = eta_A
        self.max_iter = max_iter
        self.verbose = verbose
        self.seed = seed
        self.rng = get_rng(seed)

        # Init params if provided
        self.init_params = {} if init_params is None else init_params
        self.R = self.init_params.get("R", None)
        self.X = self.init_params.get("X", None)
        self.Y = self.init_params.get("Y", None)
        self.p = self.init_params.get("p", None)
        self.a = self.init_params.get("a", None)
        self.u = self.init_params.get("u", None)

    def _init(self, train_set):
        # Initialize user, item and aspect rank.
        if self.p is None:
            self.p = uniform(train_set.num_items, random_state=self.rng)
        if self.a is None:
            self.a = uniform(train_set.sentiment.num_aspects, random_state=self.rng)
        if self.u is None:
            self.u = uniform(train_set.num_users, random_state=self.rng)

    def _symmetrical_normalization(self, matrix: csr_matrix):
        row = []
        col = []
        data = []
        row_norm = np.sqrt(matrix.sum(axis=1).A1)
        col_norm = np.sqrt(matrix.sum(axis=0).A1)
        for i, j in zip(*matrix.nonzero()):
            row.append(i)
            col.append(j)
            data.append(matrix[i, j] / (row_norm[i] * col_norm[j]))

        return csr_matrix((data, (row, col)), shape=matrix.shape)

    def _create_matrices(self, train_set):
        from time import time

        self.r_mat = train_set.csr_matrix

        start_time = time()
        if self.verbose:
            print("Building matrices started!")
        sentiment_modality = train_set.sentiment
        n_users = train_set.num_users
        n_items = train_set.num_items
        n_aspects = sentiment_modality.num_aspects

        X_row = []
        X_col = []
        X_data = []
        Y_row = []
        Y_col = []
        Y_data = []
        for uid, isid in tqdm(
            sentiment_modality.user_sentiment.items(),
            disable=not self.verbose,
            desc="Building matrices",
        ):
            for iid, sid in isid.items():
                aos = sentiment_modality.sentiment[sid]
                aids = set(aid for aid, _, _ in aos)  # Only one per review/sid
                for aid in aids:
                    X_row.append(iid)
                    X_col.append(aid)
                    X_data.append(1)
                    Y_row.append(uid)
                    Y_col.append(aid)
                    Y_data.append(1)

        # Algorithm 1: Offline training line 2
        X = csr_matrix((X_data, (X_row, X_col)), shape=(n_items, n_aspects))
        Y = csr_matrix((Y_data, (Y_row, Y_col)), shape=(n_users, n_aspects))

        # Algorithm 1: Offline training line 3
        X.data = np.log2(X.data) + 1
        Y.data = np.log2(Y.data) + 1

        # Algorithm 1: Offline training line 4
        if self.verbose:
            print("Building symmetric normalized matrices R, X, Y")
        self.R = self._symmetrical_normalization(train_set.csr_matrix)
        self.X = self._symmetrical_normalization(X)
        self.Y = self._symmetrical_normalization(Y)

        if self.verbose:
            total_time = time() - start_time
            print("Building matrices completed in %d s" % total_time)


[docs]
    def fit(self, train_set, val_set=None):
        """Fit the model to observations.

        Parameters
        ----------
        train_set: :obj:`cornac.data.Dataset`, required
            User-Item preference data as well as additional modalities.

        val_set: :obj:`cornac.data.Dataset`, optional, default: None
            User-Item preference data for model selection purposes (e.g., early stopping).

        Returns
        -------
        self : object
        """
        Recommender.fit(self, train_set, val_set)
        self._init(train_set)

        if not self.trainable:
            return self

        # Offline training: Build item-aspect matrix X and user-aspect matrix Y
        self._create_matrices(train_set)
        return self


    def _online_recommendation(self, user):
        # Algorithm 1: Online recommendation line 5
        p_0 = self.r_mat[[user]]
        p_0.data.fill(1)
        p_0 = p_0.toarray().squeeze()
        a_0 = self.Y[user].toarray().squeeze()
        u_0 = np.zeros(self.r_mat.shape[0])
        u_0[user] = 1

        # Algorithm 1: Online training line 6
        if p_0.any():
            p_0 /= np.linalg.norm(p_0, 1)
        if a_0.any():
            a_0 /= np.linalg.norm(a_0, 1)
        if u_0.any():
            u_0 /= np.linalg.norm(u_0, 1)

        # Algorithm 1: Online recommendation line 7
        p = self.p.copy()
        a = self.a.copy()
        u = self.u.copy()

        # Algorithm 1: Online recommendation line 8
        prev_p = p
        prev_a = a
        prev_u = u
        inc = 1
        while True:
            # eq. 4
            u_denominator = self.alpha + self.gamma + self.eta_U + EPS
            u = (
                self.alpha / u_denominator * self.R * p
                + self.gamma / u_denominator * self.Y * a
                + self.eta_U / u_denominator * u_0
            ).squeeze()
            p_denominator = self.alpha + self.beta + self.eta_P + EPS
            p = (
                self.alpha / p_denominator * self.R.T * u
                + self.beta / p_denominator * self.X * a
                + self.eta_P / p_denominator * p_0
            ).squeeze()
            a_denominator = self.gamma + self.beta + self.eta_A + EPS
            a = (
                self.gamma / a_denominator * self.Y.T * u
                + self.beta / a_denominator * self.X.T * p
                + self.eta_P / a_denominator * a_0
            ).squeeze()

            if (self.max_iter > 0 and inc > self.max_iter) or (
                np.all(np.isclose(u, prev_u))
                and np.all(np.isclose(p, prev_p))
                and np.all(np.isclose(a, prev_a))
            ):  # stop when converged
                break
            prev_p, prev_a, prev_u = p, a, u
            inc += 1

        # Algorithm 1: Online recommendation line 9
        return p, a, u


[docs]
    def score(self, user_idx, item_idx=None):
        """Predict the scores/ratings of a user for an item.

        Parameters
        ----------
        user_idx: int, required
            The index of the user for whom to perform score prediction.

        item_idx: int, optional, default: None
            The index of the item for which to perform score prediction.
            If None, scores for all known items will be returned.

        Returns
        -------
        res : A scalar or a Numpy array
            Relative scores that the user gives to the item or to all known items

        """
        if self.is_unknown_user(user_idx):
            raise ScoreException("Can't make score prediction for user %d" % user_idx)

        if item_idx is not None and self.is_unknown_item(item_idx):
            raise ScoreException("Can't make score prediction for item %d" % item_idx)

        item_scores, *_ = self._online_recommendation(user_idx)
        # Set already rated items to zero.
        item_scores[self.r_mat[user_idx].indices] = 0

        # Scale to match rating scale.
        item_scores = (
            item_scores * (self.max_rating - self.min_rating) / max(item_scores)
            + self.min_rating
        )

        return item_scores if item_idx is None else item_scores[item_idx]