Skip to content

Hybrid - API Reference

Auto-generated documentation for hybrid recommender model classes.

Hybrid Autoencoders

warprec.recommenders.hybrid_recommender.addease.AddEASE

Bases: ItemSimRecommender

Implementation of AddEASE algorithm from Closed-Form Models for Collaborative Filtering with Side-Information 2020.

Parameters:

Name Type Description Default
params dict

The dictionary with the model params.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Argument for PyTorch nn.Module.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Keyword argument for PyTorch nn.Module.

{}

Attributes:

Name Type Description
l2 float

The normalization value.

alpha float

The alpha constant value.

Source code in warprec/recommenders/hybrid_recommender/addease.py
@model_registry.register(name="AddEASE")
class AddEASE(ItemSimRecommender):
    """Implementation of AddEASE algorithm from
        Closed-Form Models for Collaborative Filtering with Side-Information 2020.

    Args:
        params (dict): The dictionary with the model params.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Argument for PyTorch nn.Module.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Keyword argument for PyTorch nn.Module.

    Attributes:
        l2 (float): The normalization value.
        alpha (float): The alpha constant value.
    """

    l2: float
    alpha: float

    @classmethod
    def estimate_space(
        cls,
        params: dict,
        info: dict,
        interactions: Optional[Interactions] = None,
        **kwargs: Any,
    ) -> dict:
        interactions = cls._require_interactions_for_estimate(
            interactions, cls.__name__
        )
        X = interactions.get_sparse()
        item_profile = interactions.get_side_sparse()
        if item_profile is None:
            raise ValueError("AddEASE requires side information to estimate space.")

        n_items = info["n_items"]
        collab_sparse_gram_mb = cls._estimated_sparse_square_size_mb(
            source_nnz=X.nnz,
            side_len=n_items,
            data_dtype=X.dtype,
        )
        side_sparse_gram_mb = cls._estimated_sparse_square_size_mb(
            source_nnz=item_profile.nnz,
            side_len=n_items,
            data_dtype=item_profile.dtype,
        )
        collab_dense_conversion_mb = cls._dense_size_mb((n_items, n_items), X.dtype)
        side_dense_conversion_mb = cls._dense_size_mb(
            (n_items, n_items), item_profile.dtype
        )
        dense_inverse_mb = cls._dense_size_mb((n_items, n_items), np.float64)

        collab_peak_mb = cls._peak_size_mb(
            collab_sparse_gram_mb + collab_dense_conversion_mb + 2 * dense_inverse_mb,
            collab_dense_conversion_mb + 4 * dense_inverse_mb,
        )
        side_peak_mb = cls._peak_size_mb(
            side_sparse_gram_mb + side_dense_conversion_mb + 2 * dense_inverse_mb,
            side_dense_conversion_mb + 4 * dense_inverse_mb,
        )
        merge_peak_mb = 3 * dense_inverse_mb

        train_ram_mb = cls._peak_size_mb(
            collab_peak_mb,
            side_peak_mb,
            merge_peak_mb,
        )
        return {
            "train_ram_mb": train_ram_mb,
            "notes": "AddEASE analytical train-space estimate",
        }

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, interactions, *args, seed=seed, **kwargs)

        X = self.train_matrix

        # Fist solution
        G = X.T @ X + self.l2 * np.identity(X.shape[1])
        B = np.linalg.inv(G)
        B /= -np.diag(B)
        np.fill_diagonal(B, 0.0)

        # Second solution
        item_profile = interactions.get_side_sparse()
        P = item_profile @ item_profile.T + self.l2 * np.identity(item_profile.shape[0])
        U = np.linalg.inv(P)
        U /= -np.diag(U)
        np.fill_diagonal(U, 0.0)

        # Linear combination
        self.item_similarity = self.alpha * B + (1 - self.alpha) * U

warprec.recommenders.hybrid_recommender.cease.CEASE

Bases: ItemSimRecommender

Implementation of CEASE algorithm from Closed-Form Models for Collaborative Filtering with Side-Information 2020.

Parameters:

Name Type Description Default
params dict

The dictionary with the model params.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Argument for PyTorch nn.Module.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Keyword argument for PyTorch nn.Module.

{}

Attributes:

Name Type Description
l2 float

The normalization value.

alpha float

The alpha constant value.

Source code in warprec/recommenders/hybrid_recommender/cease.py
@model_registry.register(name="CEASE")
class CEASE(ItemSimRecommender):
    """Implementation of CEASE algorithm from
        Closed-Form Models for Collaborative Filtering with Side-Information 2020.

    Args:
        params (dict): The dictionary with the model params.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Argument for PyTorch nn.Module.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Keyword argument for PyTorch nn.Module.

    Attributes:
        l2 (float): The normalization value.
        alpha (float): The alpha constant value.
    """

    l2: float
    alpha: float

    @classmethod
    def estimate_space(
        cls,
        params: dict,
        info: dict,
        interactions: Optional[Interactions] = None,
        **kwargs: Any,
    ) -> dict:
        interactions = cls._require_interactions_for_estimate(
            interactions, cls.__name__
        )
        X = interactions.get_sparse()
        item_profile = interactions.get_side_sparse()
        if item_profile is None:
            raise ValueError("CEASE requires side information to estimate space.")

        n_items = info["n_items"]
        extended_nnz = X.nnz + item_profile.nnz
        extended_matrix_mb = cls._compressed_sparse_size_mb(
            nnz=extended_nnz,
            ptr_len=X.shape[0] + item_profile.shape[1] + 1,
            data_dtype=X.dtype,
        )
        gram_matrix_mb = cls._estimated_sparse_square_size_mb(
            source_nnz=extended_nnz,
            side_len=n_items,
            data_dtype=X.dtype,
        )
        dense_conversion_mb = cls._dense_size_mb((n_items, n_items), X.dtype)
        dense_inverse_mb = cls._dense_size_mb((n_items, n_items), np.float64)

        regularization_peak_mb = (
            extended_matrix_mb
            + gram_matrix_mb
            + dense_conversion_mb
            + 2 * dense_inverse_mb
        )
        inverse_peak_mb = dense_conversion_mb + 4 * dense_inverse_mb

        train_ram_mb = cls._peak_size_mb(
            regularization_peak_mb,
            inverse_peak_mb,
        )
        return {
            "train_ram_mb": train_ram_mb,
            "notes": "CEASE analytical train-space estimate",
        }

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, interactions, *args, seed=seed, **kwargs)

        X = self.train_matrix
        item_profile = interactions.get_side_sparse()

        # Extend interaction matrix
        X_extended = vstack((X, item_profile.T * self.alpha))

        G = X_extended.T @ X_extended + self.l2 * np.identity(X_extended.shape[1])
        B = np.linalg.inv(G)
        B /= -np.diag(B)
        np.fill_diagonal(B, 0.0)

        self.item_similarity = B

Hybrid KNN

warprec.recommenders.hybrid_recommender.attributeitemknn.AttributeItemKNN

Bases: ItemSimRecommender

Implementation of AttributeItemKNN algorithm from MyMediaLite: A free recommender system library 2011.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
k int

Number of nearest neighbors.

similarity str

Similarity measure.

Source code in warprec/recommenders/hybrid_recommender/attributeitemknn.py
@model_registry.register(name="AttributeItemKNN")
class AttributeItemKNN(ItemSimRecommender):
    """Implementation of AttributeItemKNN algorithm from
        MyMediaLite: A free recommender system library 2011.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        k (int): Number of nearest neighbors.
        similarity (str): Similarity measure.
    """

    k: int
    similarity: str

    @classmethod
    def estimate_space(
        cls,
        params: dict,
        info: dict,
        interactions: Optional[Interactions] = None,
        **kwargs: Any,
    ) -> dict:
        interactions = cls._require_interactions_for_estimate(
            interactions, cls.__name__
        )
        X = interactions.get_sparse()
        X_feat = interactions.get_side_sparse()
        if X_feat is None:
            raise ValueError(
                "AttributeItemKNN requires side information to estimate space."
            )

        n_items = info["n_items"]

        train_matrix_mb = cls._sparse_size_mb(X)
        feature_matrix_mb = cls._sparse_size_mb(X_feat)
        similarity_matrix_mb = cls._dense_size_mb((n_items, n_items), X_feat.dtype)

        return {
            "train_ram_mb": train_matrix_mb + feature_matrix_mb + similarity_matrix_mb,
            "notes": "AttributeItemKNN analytical train-space estimate",
        }

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, interactions, *args, seed=seed, **kwargs)

        X_feat = interactions.get_side_sparse()
        similarity = similarities_registry.get(self.similarity)

        # Compute similarity matrix
        sim_matrix = torch.from_numpy(similarity.compute(X_feat))

        # Compute top_k filtering
        filtered_sim_matrix = self._apply_topk_filtering(sim_matrix, self.k)

        # Update item_similarity
        self.item_similarity = filtered_sim_matrix.numpy()

warprec.recommenders.hybrid_recommender.attributeuserknn.AttributeUserKNN

Bases: Recommender

Implementation of AttributeUserKNN algorithm from MyMediaLite: A free recommender system library 2011.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
k int

Number of nearest neighbors.

similarity str

Similarity measure.

user_profile str

The computation of the user profile.

Source code in warprec/recommenders/hybrid_recommender/attributeuserknn.py
@model_registry.register(name="AttributeUserKNN")
class AttributeUserKNN(Recommender):
    """Implementation of AttributeUserKNN algorithm from
        MyMediaLite: A free recommender system library 2011.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        k (int): Number of nearest neighbors.
        similarity (str): Similarity measure.
        user_profile (str): The computation of the user profile.
    """

    k: int
    similarity: str
    user_profile: str

    @classmethod
    def estimate_space(
        cls,
        params: dict,
        info: dict,
        interactions: Optional[Interactions] = None,
        **kwargs: Any,
    ) -> dict:
        interactions = cls._require_interactions_for_estimate(
            interactions, cls.__name__
        )
        X_inter = interactions.get_sparse()
        X_feat = interactions.get_side_sparse()
        if X_feat is None:
            raise ValueError(
                "AttributeUserKNN requires side information to estimate space."
            )

        n_users = info["n_users"]
        n_items = info["n_items"]
        n_features = X_feat.shape[1]

        avg_features_per_item = X_feat.nnz / max(n_items, 1)
        profile_nnz = int(
            min(n_users * n_features, np.ceil(X_inter.nnz * avg_features_per_item))
        )

        train_matrix_mb = cls._sparse_size_mb(X_inter)
        feature_matrix_mb = cls._sparse_size_mb(X_feat)
        profile_matrix_mb = cls._compressed_sparse_size_mb(
            nnz=profile_nnz,
            ptr_len=n_users + 1,
            data_dtype=X_inter.dtype,
        )
        similarity_matrix_mb = cls._dense_size_mb((n_users, n_users), X_inter.dtype)

        train_ram_mb = cls._peak_size_mb(
            train_matrix_mb + feature_matrix_mb + profile_matrix_mb,
            train_matrix_mb + profile_matrix_mb + similarity_matrix_mb,
        )
        if params.get("user_profile") == "tfidf":
            train_ram_mb = cls._peak_size_mb(
                train_ram_mb,
                train_matrix_mb
                + feature_matrix_mb
                + 2 * profile_matrix_mb
                + cls._dense_size_mb((n_users,), np.float64),
            )

        return {
            "train_ram_mb": train_ram_mb,
            "notes": "AttributeUserKNN analytical train-space estimate",
        }

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # Store the training matrix for prediction
        self.train_matrix = interactions.get_sparse()

        X_inter = self.train_matrix
        X_feat = interactions.get_side_sparse()
        similarity = similarities_registry.get(self.similarity)

        # Compute user profile
        X_profile = X_inter @ X_feat

        # Compute tfidf profile if requested
        if self.user_profile == "tfidf":
            X_profile = self._compute_user_tfidf(X_profile)

        # Compute similarity matrix
        sim_matrix = torch.from_numpy(similarity.compute(X_profile))

        # Compute top_k filtering
        filtered_sim_matrix = self._apply_topk_filtering(sim_matrix, self.k)

        # Update item_similarity
        self.user_similarity = filtered_sim_matrix.numpy()

    def _compute_user_tfidf(self, user_profile: csr_matrix) -> csr_matrix:
        """Computes TF-IDF for user features.

        Args:
            user_profile (csr_matrix): The profile of the users.

        Returns:
            csr_matrix: The computed TF-IDF for users.
        """
        # Convert to average instead of sum
        user_counts = user_profile.sum(axis=1).A.ravel()
        user_counts[user_counts == 0] = 1  # Avoid division by zero
        user_profile = user_profile.multiply(1 / user_counts[:, np.newaxis])

        # L2 normalize
        return normalize(user_profile, norm="l2", axis=1)

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction in the form of B@X where B is a {user x user} similarity matrix.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Compute predictions and convert to Tensor
        predictions = self.user_similarity[user_indices.cpu(), :] @ self.train_matrix
        predictions = torch.from_numpy(predictions)

        if item_indices is None:
            # Case 'full': prediction on all items
            return predictions  # [batch_size, n_items]

        # Case 'sampled': prediction on a sampled set of items
        return predictions.gather(
            1,
            item_indices.to(predictions.device).clamp(
                max=self.n_items - 1
            ),  # [batch_size, pad_seq]
        )

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction in the form of B@X where B is a {user x user} similarity matrix.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/hybrid_recommender/attributeuserknn.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction in the form of B@X where B is a {user x user} similarity matrix.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Compute predictions and convert to Tensor
    predictions = self.user_similarity[user_indices.cpu(), :] @ self.train_matrix
    predictions = torch.from_numpy(predictions)

    if item_indices is None:
        # Case 'full': prediction on all items
        return predictions  # [batch_size, n_items]

    # Case 'sampled': prediction on a sampled set of items
    return predictions.gather(
        1,
        item_indices.to(predictions.device).clamp(
            max=self.n_items - 1
        ),  # [batch_size, pad_seq]
    )