Source code for cornac.data.modality

# Copyright 2018 The Cornac Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

import numpy as np


[docs] class Modality: """Generic class of Modality to extend from """ def __init__(self, **kwargs): pass
[docs] def fallback_feature(func): """Decorator to fallback to `batch_feature` in FeatureModality """ def wrapper(self, *args, **kwargs): if self.features is not None: ids = args[0] if len(args) > 0 else kwargs['batch_ids'] return FeatureModality.batch_feature(self, batch_ids=ids) else: return func(self, *args, **kwargs) return wrapper
[docs] class FeatureModality(Modality): """Modality that contains features in general Parameters ---------- features: numpy.ndarray or scipy.sparse.csr_matrix, default = None Numpy 2d-array that the row indices are aligned with user/item in `ids`. ids: List, default = None List of user/item ids that the indices are aligned with `corpus`. If None, the indices of provided `features` will be used as `ids`. normalized: bool, default = False Whether the features will be normalized using min-max normalization. """ def __init__(self, features=None, ids=None, normalized=False, **kwargs): super().__init__(**kwargs) self.features = features self.ids = ids self.normalized = normalized @property def features(self): """Return the whole feature matrix """ return self.__features @features.setter def features(self, input_features): if input_features is not None: assert len(input_features.shape) == 2 self.__features = input_features @property def feature_dim(self): """Return the dimensionality of the feature vectors """ return self.features.shape[1] def _swap_feature(self, id_map): new_feats = np.copy(self.features) new_ids = self.ids.copy() for old_idx, raw_id in enumerate(self.ids): new_idx = id_map.get(raw_id, None) if new_idx is None: continue assert new_idx < new_feats.shape[0] new_feats[new_idx] = self.features[old_idx] new_ids[new_idx] = raw_id self.features = new_feats self.ids = new_ids
[docs] def build(self, id_map=None, **kwargs): """Build the feature matrix. Features will be swapped if the id_map is provided """ if self.features is None: return if (self.ids is not None) and (id_map is not None): self._swap_feature(id_map) if self.normalized: self.features = self.features - np.min(self.features) self.features = self.features / (np.max(self.features) + 1e-10) return self
[docs] def batch_feature(self, batch_ids): """Return a matrix (batch of feature vectors) corresponding to provided batch_ids """ assert self.features is not None return self.features[batch_ids]