Source code for cornac.models.recvae.recom_recvae

# Copyright 2018 The Cornac Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

import numpy as np
from tqdm.auto import trange

from ..recommender import Recommender
from ...exception import ScoreException



[docs]
class RecVAE(Recommender):
    """
    RecVAE, a recommender system based on a Variational Autoencoder.

    Parameters
    ----------
    name : str, optional, default: 'RecVae'
        Name of the recommender model.

    hidden_dim : int, optional, default: 600
        Dimension of the hidden layer in the VAE architecture.

    latent_dim : int, optional, default: 200
        Dimension of the latent layer in the VAE architecture.

    batch_size : int, optional, default: 500
        Size of the batches used during training.

    beta : float, optional
        Weighting factor for the KL divergence term in the VAE loss function.

    gamma : float, optional, default: 0.005
        Weighting factor for the regularization term in the loss function.

    lr : float, optional, default: 5e-4
        Learning rate for the optimizer.

    n_epochs : int, optional, default: 50
        Number of epochs to train the model.

    n_enc_epochs : int, optional, default: 3
        Number of epochs to train the encoder part of VAE.

    n_dec_epochs : int, optional, default: 1
        Number of epochs to train the decoder part of VAE.

    not_alternating : boolean, optional, default: False
        If True, the model training will not alternate between encoder and decoder.

    trainable : boolean, optional, default: True
        When False, the model will not be re-trained, and input of pre-trained parameters are required.

    verbose : boolean, optional, default: False
        When True, running logs are displayed.

    seed : int, optional
        Random seed for weight initialization and training reproducibility.

    use_gpu : boolean, optional, default: True
        When True, training utilizes GPU if available.

    References
    ----------
    * RecVAE GitHub Repository: https://github.com/ilya-shenbin/RecVAE
    * Paper Link: https://arxiv.org/abs/1912.11160
    

    """
    
    def __init__(
        self,
        name="RecVae",

        hidden_dim = 600,
        latent_dim = 200,
        batch_size = 500,
        beta = None,
        gamma = 0.005,
        lr = 5e-4,
        n_epochs = 50,
        n_enc_epochs = 3,
        n_dec_epochs = 1,
        not_alternating = False,

        trainable=True,
        verbose=False,
        seed=None,
        use_gpu=True,
    ):

    

        Recommender.__init__(self,name=name, trainable=trainable, verbose=verbose)

        self.hidden_dim = hidden_dim
        self.latent_dim = latent_dim
        self.batch_size = batch_size
        self.beta = beta
        self.gamma = gamma
        self.lr = lr
        self.n_epochs = n_epochs
        self.n_enc_epochs = n_enc_epochs
        self.n_dec_epochs = n_dec_epochs
        self.not_alternating = not_alternating
        self.seed = seed


        import torch
        if use_gpu and torch.cuda.is_available():
            self.device = torch.device("cuda:0")
        else:
            self.device = torch.device("cpu") 


    def run(self,model, opts, train_set, my_batch_size, n_epochs, beta, gamma, dropout_rate):
        import torch
        train_data = train_set.csr_matrix 
        model.train()
        for _ in range(n_epochs):
            for i, batch_ids in enumerate(
                train_set.user_iter(my_batch_size, shuffle=True)
            ):

                ratings = torch.Tensor((train_data[batch_ids,:]).toarray()).to(self.device)                

                for optimizer in opts:
                    optimizer.zero_grad()

                _, loss = model(ratings, beta=beta, gamma=gamma, dropout_rate=dropout_rate)
                loss.backward()

                for optimizer in opts:
                    optimizer.step()
                    


[docs]
    def fit(self, train_set, val_set=None):
        """Fit the model to observations.

        Parameters
        ----------
        train_set: :obj:`cornac.data.Dataset`, required
            User-Item preference data as well as additional modalities.

        val_set: :obj:`cornac.data.Dataset`, optional, default: None
            User-Item preference data for model selection purposes (e.g., early stopping).

        Returns
        -------
        self : object
        """
        Recommender.fit(self, train_set, val_set)

        from .recvae import VAE
        import torch
        from torch import optim

        from ...metrics import NDCG
        from ...eval_methods import ranking_eval
                
        if self.trainable:

            if self.verbose:
                print("Learning...")
            if self.seed is not None:
                np.random.seed(self.seed)
                torch.manual_seed(self.seed)
                torch.cuda.manual_seed(self.seed)
                torch.cuda.manual_seed_all(self.seed)
                torch.backends.cudnn.deterministic = True
                torch.backends.cudnn.benchmark = False
                torch.backends.cudnn.enabled = False


            model_kwargs = {
                'hidden_dim': self.hidden_dim,
                'latent_dim': self.latent_dim,
                'input_dim': train_set.num_items,
            }

            self.recvae_model = VAE(**model_kwargs).to(self.device)



            learning_kwargs = {
                'model': self.recvae_model,
                'train_set': train_set,
                'my_batch_size': self.batch_size,
                'beta': self.beta,
                'gamma': self.gamma
            }

            self.mydata = train_set
            decoder_params = set(self.recvae_model.decoder.parameters())
            encoder_params = set(self.recvae_model.encoder.parameters())

            optimizer_encoder = optim.Adam(encoder_params, lr=self.lr)
            optimizer_decoder = optim.Adam(decoder_params, lr=self.lr)

            progress_bar = trange(1, self.n_epochs + 1, desc="RecVAE", disable=not self.verbose)

            for _ in progress_bar:
                if self.not_alternating:
                    self.run(opts=[optimizer_encoder, optimizer_decoder], n_epochs=1, dropout_rate=0.5, **learning_kwargs)
                else:
                    self.run(opts=[optimizer_encoder], n_epochs=self.n_enc_epochs, dropout_rate=0.5, **learning_kwargs)
                    self.recvae_model.update_prior()
                    self.run(opts=[optimizer_decoder], n_epochs=self.n_dec_epochs, dropout_rate=0, **learning_kwargs)

        
                ndcg_100 = ranking_eval(
                model=self,
                metrics=[NDCG(k=100)],
                train_set=train_set,
                test_set=train_set,
                )[0][0]
                        
                
                progress_bar.set_postfix(ndcg100 = ndcg_100)

            if self.verbose:
                print(f"Learning completed : [{ndcg_100}]")

        elif self.verbose:
            print("%s is trained already (trainable = False)" % (self.name))

          
        return self



[docs]
    def score(self, user_idx, item_idx=None):
        """Predict the scores/ratings of a user for an item.

        Parameters
        ----------
        user_id: int, required
            The index of the user for whom to perform score prediction.

        item_id: int, optional, default: None
            The index of the item for that to perform score prediction.
            If None, scores for all known items will be returned.

        Returns
        -------
        res : A scalar or a Numpy array
            Relative scores that the user gives to the item or to all known items
        """
        import torch

        ratings_in = self.mydata.matrix[user_idx,:]
        ratings_pred = self.recvae_model(torch.Tensor(ratings_in.toarray()).to(self.device), calculate_loss=False).cpu().detach().numpy().flatten()


        if item_idx is None:
            if not self.knows_user(user_idx):
                raise ScoreException(
                    "Can't make score prediction for (user_id=%d)" % user_idx
                )

            return ratings_pred
        else:
            if not (self.knows_user(user_idx) and self.knows_item(item_idx)):
                raise ScoreException(
                    "Can't make score prediction for (user_id=%d, item_id=%d)"
                    % (user_idx, item_idx)
                )

            return ratings_pred[item_idx]