Source code for cornac.models.recvae.recom_recvae

# Copyright 2018 The Cornac Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

import numpy as np
from tqdm.auto import trange

from ..recommender import Recommender
from ...exception import ScoreException


[docs] class RecVAE(Recommender): """ RecVAE, a recommender system based on a Variational Autoencoder. Parameters ---------- name : str, optional, default: 'RecVae' Name of the recommender model. hidden_dim : int, optional, default: 600 Dimension of the hidden layer in the VAE architecture. latent_dim : int, optional, default: 200 Dimension of the latent layer in the VAE architecture. batch_size : int, optional, default: 500 Size of the batches used during training. beta : float, optional Weighting factor for the KL divergence term in the VAE loss function. gamma : float, optional, default: 0.005 Weighting factor for the regularization term in the loss function. lr : float, optional, default: 5e-4 Learning rate for the optimizer. n_epochs : int, optional, default: 50 Number of epochs to train the model. n_enc_epochs : int, optional, default: 3 Number of epochs to train the encoder part of VAE. n_dec_epochs : int, optional, default: 1 Number of epochs to train the decoder part of VAE. not_alternating : boolean, optional, default: False If True, the model training will not alternate between encoder and decoder. trainable : boolean, optional, default: True When False, the model will not be re-trained, and input of pre-trained parameters are required. verbose : boolean, optional, default: False When True, running logs are displayed. seed : int, optional Random seed for weight initialization and training reproducibility. use_gpu : boolean, optional, default: True When True, training utilizes GPU if available. References ---------- * RecVAE GitHub Repository: https://github.com/ilya-shenbin/RecVAE * Paper Link: https://arxiv.org/abs/1912.11160 """ def __init__( self, name="RecVae", hidden_dim = 600, latent_dim = 200, batch_size = 500, beta = None, gamma = 0.005, lr = 5e-4, n_epochs = 50, n_enc_epochs = 3, n_dec_epochs = 1, not_alternating = False, trainable=True, verbose=False, seed=None, use_gpu=True, ): Recommender.__init__(self,name=name, trainable=trainable, verbose=verbose) self.hidden_dim = hidden_dim self.latent_dim = latent_dim self.batch_size = batch_size self.beta = beta self.gamma = gamma self.lr = lr self.n_epochs = n_epochs self.n_enc_epochs = n_enc_epochs self.n_dec_epochs = n_dec_epochs self.not_alternating = not_alternating self.seed = seed import torch if use_gpu and torch.cuda.is_available(): self.device = torch.device("cuda:0") else: self.device = torch.device("cpu") def run(self,model, opts, train_set, my_batch_size, n_epochs, beta, gamma, dropout_rate): import torch train_data = train_set.csr_matrix model.train() for _ in range(n_epochs): for i, batch_ids in enumerate( train_set.user_iter(my_batch_size, shuffle=True) ): ratings = torch.Tensor((train_data[batch_ids,:]).toarray()).to(self.device) for optimizer in opts: optimizer.zero_grad() _, loss = model(ratings, beta=beta, gamma=gamma, dropout_rate=dropout_rate) loss.backward() for optimizer in opts: optimizer.step()
[docs] def fit(self, train_set, val_set=None): """Fit the model to observations. Parameters ---------- train_set: :obj:`cornac.data.Dataset`, required User-Item preference data as well as additional modalities. val_set: :obj:`cornac.data.Dataset`, optional, default: None User-Item preference data for model selection purposes (e.g., early stopping). Returns ------- self : object """ Recommender.fit(self, train_set, val_set) from .recvae import VAE import torch from torch import optim from ...metrics import NDCG from ...eval_methods import ranking_eval if self.trainable: if self.verbose: print("Learning...") if self.seed is not None: np.random.seed(self.seed) torch.manual_seed(self.seed) torch.cuda.manual_seed(self.seed) torch.cuda.manual_seed_all(self.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False torch.backends.cudnn.enabled = False model_kwargs = { 'hidden_dim': self.hidden_dim, 'latent_dim': self.latent_dim, 'input_dim': train_set.num_items, } self.recvae_model = VAE(**model_kwargs).to(self.device) learning_kwargs = { 'model': self.recvae_model, 'train_set': train_set, 'my_batch_size': self.batch_size, 'beta': self.beta, 'gamma': self.gamma } self.mydata = train_set decoder_params = set(self.recvae_model.decoder.parameters()) encoder_params = set(self.recvae_model.encoder.parameters()) optimizer_encoder = optim.Adam(encoder_params, lr=self.lr) optimizer_decoder = optim.Adam(decoder_params, lr=self.lr) progress_bar = trange(1, self.n_epochs + 1, desc="RecVAE", disable=not self.verbose) for _ in progress_bar: if self.not_alternating: self.run(opts=[optimizer_encoder, optimizer_decoder], n_epochs=1, dropout_rate=0.5, **learning_kwargs) else: self.run(opts=[optimizer_encoder], n_epochs=self.n_enc_epochs, dropout_rate=0.5, **learning_kwargs) self.recvae_model.update_prior() self.run(opts=[optimizer_decoder], n_epochs=self.n_dec_epochs, dropout_rate=0, **learning_kwargs) ndcg_100 = ranking_eval( model=self, metrics=[NDCG(k=100)], train_set=train_set, test_set=train_set, )[0][0] progress_bar.set_postfix(ndcg100 = ndcg_100) if self.verbose: print(f"Learning completed : [{ndcg_100}]") elif self.verbose: print("%s is trained already (trainable = False)" % (self.name)) return self
[docs] def score(self, user_idx, item_idx=None): """Predict the scores/ratings of a user for an item. Parameters ---------- user_id: int, required The index of the user for whom to perform score prediction. item_id: int, optional, default: None The index of the item for that to perform score prediction. If None, scores for all known items will be returned. Returns ------- res : A scalar or a Numpy array Relative scores that the user gives to the item or to all known items """ import torch ratings_in = self.mydata.matrix[user_idx,:] ratings_pred = self.recvae_model(torch.Tensor(ratings_in.toarray()).to(self.device), calculate_loss=False).cpu().detach().numpy().flatten() if item_idx is None: if not self.knows_user(user_idx): raise ScoreException( "Can't make score prediction for (user_id=%d)" % user_idx ) return ratings_pred else: if not (self.knows_user(user_idx) and self.knows_item(item_idx)): raise ScoreException( "Can't make score prediction for (user_id=%d, item_id=%d)" % (user_idx, item_idx) ) return ratings_pred[item_idx]