Source code for cornac.models.ngcf.recom_ngcf

# Copyright 2018 The Cornac Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

from ..recommender import Recommender
from ..recommender import ANNMixin, MEASURE_DOT
from ...exception import ScoreException

from tqdm.auto import tqdm, trange



[docs]
class NGCF(Recommender, ANNMixin):
    """
    Neural Graph Collaborative Filtering

    Parameters
    ----------
    name: string, default: 'NGCF'
        The name of the recommender model.

    emb_size: int, default: 64
        Size of the node embeddings.

    layer_sizes: list, default: [64, 64, 64]
        Size of the output of convolution layers.

    dropout_rates: list, default: [0.1, 0.1, 0.1]
        Dropout rate for each of the convolution layers.
        - Number of values should be the same as 'layer_sizes'

    num_epochs: int, default: 1000
        Maximum number of iterations or the number of epochs.

    learning_rate: float, default: 0.001
        The learning rate that determines the step size at each iteration.

    batch_size: int, default: 1024
        Mini-batch size used for training.

    early_stopping: {min_delta: float, patience: int}, optional, default: None
        If `None`, no early stopping. Meaning of the arguments:

        - `min_delta`:  the minimum increase in monitored value on validation
                        set to be considered as improvement,
                        i.e. an increment of less than min_delta will count as
                        no improvement.

        - `patience`:   number of epochs with no improvement after which
                        training should be stopped.

    lambda_reg: float, default: 1e-4
        Weight decay for the L2 normalization.

    trainable: boolean, optional, default: True
        When False, the model is not trained and Cornac assumes that the model
        is already pre-trained.

    verbose: boolean, optional, default: False
        When True, some running logs are displayed.

    seed: int, optional, default: 2020
        Random seed for parameters initialization.

    References
    ----------
    *   Wang, Xiang, et al. "Neural graph collaborative filtering." Proceedings of the 42nd international ACM SIGIR conference on Research and development in Information Retrieval. 2019.
    """

    def __init__(
        self,
        name="NGCF",
        emb_size=64,
        layer_sizes=[64, 64, 64],
        dropout_rates=[0.1, 0.1, 0.1],
        num_epochs=1000,
        learning_rate=0.001,
        batch_size=1024,
        early_stopping=None,
        lambda_reg=1e-4,
        trainable=True,
        verbose=False,
        seed=2020,
    ):
        super().__init__(name=name, trainable=trainable, verbose=verbose)
        self.emb_size = emb_size
        self.layer_sizes = layer_sizes
        self.dropout_rates = dropout_rates
        self.num_epochs = num_epochs
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.early_stopping = early_stopping
        self.lambda_reg = lambda_reg
        self.seed = seed


[docs]
    def fit(self, train_set, val_set=None):
        """Fit the model to observations.

        Parameters
        ----------
        train_set: :obj:`cornac.data.Dataset`, required
            User-Item preference data as well as additional modalities.

        val_set: :obj:`cornac.data.Dataset`, optional, default: None
            User-Item preference data for model selection purposes (e.g., early stopping).

        Returns
        -------
        self : object
        """
        Recommender.fit(self, train_set, val_set)

        if not self.trainable:
            return self

        # model setup
        import torch
        from .ngcf import Model
        from .ngcf import construct_graph

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        if self.seed is not None:
            torch.manual_seed(self.seed)
            if torch.cuda.is_available():
                torch.cuda.manual_seed_all(self.seed)

        graph = construct_graph(train_set, self.total_users, self.total_items).to(
            self.device
        )
        model = Model(
            graph,
            self.emb_size,
            self.layer_sizes,
            self.dropout_rates,
            self.lambda_reg,
        ).to(self.device)

        optimizer = torch.optim.Adam(model.parameters(), lr=self.learning_rate)

        # model training
        pbar = trange(
            self.num_epochs,
            desc="Training",
            unit="iter",
            position=0,
            leave=False,
            disable=not self.verbose,
        )
        for _ in pbar:
            model.train()
            accum_loss = 0.0
            for batch_u, batch_i, batch_j in tqdm(
                train_set.uij_iter(
                    batch_size=self.batch_size,
                    shuffle=True,
                ),
                desc="Epoch",
                total=train_set.num_batches(self.batch_size),
                leave=False,
                position=1,
                disable=not self.verbose,
            ):
                u_g_embeddings, pos_i_g_embeddings, neg_i_g_embeddings = model(
                    graph, batch_u, batch_i, batch_j
                )

                batch_loss, batch_bpr_loss, batch_reg_loss = model.loss_fn(
                    u_g_embeddings, pos_i_g_embeddings, neg_i_g_embeddings
                )
                accum_loss += batch_loss.cpu().item() * len(batch_u)

                optimizer.zero_grad()
                batch_loss.backward()
                optimizer.step()

            accum_loss /= len(train_set.uir_tuple[0])  # normalize over all observations
            pbar.set_postfix(loss=accum_loss)

            # store user and item embedding matrices for prediction
            model.eval()
            u_embs, i_embs, _ = model(graph)
            # we will use numpy for faster prediction in the score function, no need torch
            self.U = u_embs.cpu().detach().numpy()
            self.V = i_embs.cpu().detach().numpy()

            if self.early_stopping is not None and self.early_stop(
                train_set, val_set, **self.early_stopping
            ):
                break



[docs]
    def monitor_value(self, train_set, val_set):
        """Calculating monitored value used for early stopping on validation set (`val_set`).
        This function will be called by `early_stop()` function.

        Parameters
        ----------
        train_set: :obj:`cornac.data.Dataset`, required
            User-Item preference data as well as additional modalities.

        val_set: :obj:`cornac.data.Dataset`, optional, default: None
            User-Item preference data for model selection purposes (e.g., early stopping).

        Returns
        -------
        res : float
            Monitored value on validation set.
            Return `None` if `val_set` is `None`.
        """
        if val_set is None:
            return None

        from ...metrics import Recall
        from ...eval_methods import ranking_eval

        recall_20 = ranking_eval(
            model=self,
            metrics=[Recall(k=20)],
            train_set=train_set,
            test_set=val_set,
            verbose=True,
        )[0][0]

        return recall_20  # Section 4.2.3 in the paper



[docs]
    def score(self, user_idx, item_idx=None):
        """Predict the scores/ratings of a user for an item.

        Parameters
        ----------
        user_idx: int, required
            The index of the user for whom to perform score prediction.

        item_idx: int, optional, default: None
            The index of the item for which to perform score prediction.
            If None, scores for all known items will be returned.

        Returns
        -------
        res : A scalar or a Numpy array
            Relative scores that the user gives to the item or to all known items

        """
        if self.is_unknown_user(user_idx):
            raise ScoreException("Can't make score prediction for user %d" % user_idx)

        if item_idx is not None and self.is_unknown_item(item_idx):
            raise ScoreException("Can't make score prediction for item %d" % item_idx)

        if item_idx is None:
            return self.V.dot(self.U[user_idx, :])

        return self.V[item_idx, :].dot(self.U[user_idx, :])



[docs]
    def get_vector_measure(self):
        """Getting a valid choice of vector measurement in ANNMixin._measures.

        Returns
        -------
        measure: MEASURE_DOT
            Dot product aka. inner product
        """
        return MEASURE_DOT



[docs]
    def get_user_vectors(self):
        """Getting a matrix of user vectors serving as query for ANN search.

        Returns
        -------
        out: numpy.array
            Matrix of user vectors for all users available in the model.
        """
        return self.U



[docs]
    def get_item_vectors(self):
        """Getting a matrix of item vectors used for building the index for ANN search.

        Returns
        -------
        out: numpy.array
            Matrix of item vectors for all items available in the model.
        """
        return self.V