Skip to content

Collaborative Filtering - API Reference

Auto-generated documentation for all collaborative filtering model classes.

Autoencoders

warprec.recommenders.collaborative_filtering_recommender.autoencoder.ease.EASE

Bases: ItemSimRecommender

Implementation of EASE algorithm from Embarrassingly Shallow Autoencoders for Sparse Data 2019.

Parameters:

Name Type Description Default
params dict

The dictionary with the model params.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Argument for PyTorch nn.Module.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Keyword argument for PyTorch nn.Module.

{}

Attributes:

Name Type Description
l2 float

The normalization value.

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/ease.py
@model_registry.register(name="EASE")
class EASE(ItemSimRecommender):
    """Implementation of EASE algorithm from
        Embarrassingly Shallow Autoencoders for Sparse Data 2019.

    Args:
        params (dict): The dictionary with the model params.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Argument for PyTorch nn.Module.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Keyword argument for PyTorch nn.Module.

    Attributes:
        l2 (float): The normalization value.
    """

    l2: float

    @classmethod
    def estimate_space(
        cls,
        params: dict,
        info: dict,
        interactions: Optional[Interactions] = None,
        **kwargs: Any,
    ) -> dict:
        interactions = cls._require_interactions_for_estimate(
            interactions, cls.__name__
        )
        X = interactions.get_sparse()
        n_items = info["n_items"]

        gram_matrix_mb = cls._estimated_sparse_square_size_mb(
            source_nnz=X.nnz,
            side_len=n_items,
            data_dtype=X.dtype,
        )
        dense_conversion_mb = cls._dense_size_mb((n_items, n_items), X.dtype)
        dense_inverse_mb = cls._dense_size_mb((n_items, n_items), np.float64)

        regularization_peak_mb = (
            gram_matrix_mb + dense_conversion_mb + 2 * dense_inverse_mb
        )
        inverse_peak_mb = dense_conversion_mb + 4 * dense_inverse_mb

        return {
            "train_ram_mb": cls._peak_size_mb(
                regularization_peak_mb,
                inverse_peak_mb,
            ),
            "notes": "EASE analytical train-space estimate",
        }

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, interactions, *args, seed=seed, **kwargs)

        X = self.train_matrix
        G = X.T @ X + self.l2 * np.identity(X.shape[1])
        B = np.linalg.inv(G)
        B /= -np.diag(B)
        np.fill_diagonal(B, 0.0)

        self.item_similarity = B

warprec.recommenders.collaborative_filtering_recommender.autoencoder.elsa.ELSA

Bases: IterativeRecommender

Implementation of ELSA algorithm from "Scalable Linear Shallow Autoencoder for Collaborative Filtering" in RecSys 22.

Parameters:

Name Type Description Default
params dict

The dictionary with the model params.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Argument for PyTorch nn.Module.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Keyword argument for PyTorch nn.Module.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

n_dims int

The number of dimensions for the latent space.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/elsa.py
@model_registry.register(name="ELSA")
class ELSA(IterativeRecommender):
    """Implementation of ELSA algorithm from
        "Scalable Linear Shallow Autoencoder for Collaborative Filtering" in RecSys 22.

    Args:
        params (dict): The dictionary with the model params.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Argument for PyTorch nn.Module.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Keyword argument for PyTorch nn.Module.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        n_dims (int): The number of dimensions for the latent space.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
    """

    DATALOADER_TYPE = DataLoaderType.INTERACTION_LOADER

    n_dims: int
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, interactions, *args, seed=seed, **kwargs)

        # Store the training matrix for prediction
        self.train_matrix = interactions.get_sparse()

        # Weight matrix W
        self.W = nn.Parameter(torch.empty([self.n_items, self.n_dims]))
        nn.init.xavier_uniform_(self.W)

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_interaction_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        rating_matrix = batch[0]

        # Prediction
        output = self.forward(rating_matrix)

        # Normalized MSE loss
        loss = F.mse_loss(
            F.normalize(output, p=2, dim=-1),
            F.normalize(rating_matrix, p=2, dim=-1),
            reduction="mean",
        )

        # Loss logging
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(self, x: Tensor) -> Tensor:
        """Forward pass of ELSA. Computes (xAA^T - x)

        Args:
            x (Tensor): Batch of user-item interactions [batch_size, n_items].

        Returns:
            Tensor: Predicted scores [batch_size, n_items].
        """
        # Normalization of W weight matrix (A)
        A = F.normalize(self.W, p=2, dim=-1)

        # Projection: x @ A (xA)
        latent = torch.matmul(x, A)

        # Reconstruction: latent @ A^T (xAA^T)
        reconstruction = torch.matmul(latent, A.t())

        # Diagonal constraint: x - xAA^T
        return reconstruction - x

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction in the form of X@B where B is a {item x item} similarity matrix.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Convert sparse batch to dense tensor and compute predictions
        train_batch = self.train_matrix[user_indices.tolist(), :].toarray()
        x = torch.from_numpy(train_batch).to(self.device).float()
        predictions = self.forward(x)

        # Return full or sampled predictions
        if item_indices is None:
            # Case 'full': prediction on all items
            return predictions  # [batch_size, n_items]

        # Case 'sampled': prediction on a sampled set of items
        return predictions.gather(
            1,
            item_indices.to(predictions.device).clamp(
                max=self.n_items - 1
            ),  # [batch_size, pad_seq]
        )

forward(x)

Forward pass of ELSA. Computes (xAA^T - x)

Parameters:

Name Type Description Default
x Tensor

Batch of user-item interactions [batch_size, n_items].

required

Returns:

Name Type Description
Tensor Tensor

Predicted scores [batch_size, n_items].

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/elsa.py
def forward(self, x: Tensor) -> Tensor:
    """Forward pass of ELSA. Computes (xAA^T - x)

    Args:
        x (Tensor): Batch of user-item interactions [batch_size, n_items].

    Returns:
        Tensor: Predicted scores [batch_size, n_items].
    """
    # Normalization of W weight matrix (A)
    A = F.normalize(self.W, p=2, dim=-1)

    # Projection: x @ A (xA)
    latent = torch.matmul(x, A)

    # Reconstruction: latent @ A^T (xAA^T)
    reconstruction = torch.matmul(latent, A.t())

    # Diagonal constraint: x - xAA^T
    return reconstruction - x

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction in the form of X@B where B is a {item x item} similarity matrix.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/elsa.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction in the form of X@B where B is a {item x item} similarity matrix.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Convert sparse batch to dense tensor and compute predictions
    train_batch = self.train_matrix[user_indices.tolist(), :].toarray()
    x = torch.from_numpy(train_batch).to(self.device).float()
    predictions = self.forward(x)

    # Return full or sampled predictions
    if item_indices is None:
        # Case 'full': prediction on all items
        return predictions  # [batch_size, n_items]

    # Case 'sampled': prediction on a sampled set of items
    return predictions.gather(
        1,
        item_indices.to(predictions.device).clamp(
            max=self.n_items - 1
        ),  # [batch_size, pad_seq]
    )

warprec.recommenders.collaborative_filtering_recommender.autoencoder.cdae.CDAE

Bases: IterativeRecommender

Implementation of CDAE algorithm from "Collaborative Denoising Auto-Encoders for Top-N Recommender Systems." in WSDM 2016.

This model learns latent representations by training a denoising autoencoder on corrupted user-item interaction vectors, incorporating a user-specific embedding to guide the reconstruction.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The dimension of the user embeddings and the hidden layer.

corruption float

The probability of dropout applied to the input layer (denoising).

hid_activation str

The activation function for the hidden layer ('relu', 'tanh', 'sigmoid').

out_activation str

The activation function for the output layer ('relu', 'sigmoid').

loss_type str

The loss function to use for backpropagation ('bce', 'mse').

reg_weight float

The L2 regularization weight.

weight_decay float

The value of weight decay used in the optimizer.

batch_size int

The batch size used during training.

epochs int

The number of training epochs.

learning_rate float

The learning rate value.

Raises:

Type Description
ValueError

If the loss_type is not supported.

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/cdae.py
@model_registry.register(name="CDAE")
class CDAE(IterativeRecommender):
    """Implementation of CDAE algorithm from
    "Collaborative Denoising Auto-Encoders for Top-N Recommender Systems." in WSDM 2016.

    This model learns latent representations by training a denoising autoencoder on corrupted
    user-item interaction vectors, incorporating a user-specific embedding to guide the
    reconstruction.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The dimension of the user embeddings and the hidden layer.
        corruption (float): The probability of dropout applied to the input layer (denoising).
        hid_activation (str): The activation function for the hidden layer ('relu', 'tanh', 'sigmoid').
        out_activation (str): The activation function for the output layer ('relu', 'sigmoid').
        loss_type (str): The loss function to use for backpropagation ('bce', 'mse').
        reg_weight (float): The L2 regularization weight.
        weight_decay (float): The value of weight decay used in the optimizer.
        batch_size (int): The batch size used during training.
        epochs (int): The number of training epochs.
        learning_rate (float): The learning rate value.

    Raises:
        ValueError: If the loss_type is not supported.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.INTERACTION_LOADER_WITH_USER_ID

    # Model hyperparameters
    embedding_size: int
    corruption: float
    hid_activation: str
    out_activation: str
    loss_type: str
    reg_weight: float
    weight_decay: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # Store the training matrix for prediction
        self.train_matrix = interactions.get_sparse()

        # User-specific embedding
        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)

        # Encoder for the item interaction vector
        self.item_encoder = nn.Linear(self.n_items, self.embedding_size)

        # Decoder to reconstruct the interaction vector
        self.decoder = nn.Linear(self.embedding_size, self.n_items)

        # Dropout layer for the "denoising" aspect
        self.corruption_dropout = nn.Dropout(p=self.corruption)

        # Activation functions
        self.h_act = self._get_activation(self.hid_activation)
        self.o_act = self._get_activation(self.out_activation)

        # Define loss type to use
        self.main_loss: nn.Module
        if self.loss_type == "MSE":
            self.main_loss = nn.MSELoss()
        elif self.loss_type == "BCE":
            self.main_loss = nn.BCEWithLogitsLoss()
        else:
            raise ValueError("Invalid loss_type, loss_type must be in [MSE, BCE]")
        self.reg_loss = EmbLoss()

        # Initialize weights
        self.apply(self._init_weights)

    def _get_activation(self, name: str) -> nn.Module:
        """Helper to get an activation function module from its name."""
        if name == "relu":
            return nn.ReLU()
        if name == "tanh":
            return nn.Tanh()
        if name == "sigmoid":
            return nn.Sigmoid()
        raise ValueError(f"Invalid activation function name: {name}")

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_interaction_dataloader(
            batch_size=self.batch_size,
            include_user_id=True,
            **kwargs,
        )

    def forward(
        self, user_history: Tensor, user_indices: Tensor, *args: Any, **kwargs: Any
    ) -> Tensor:
        """Performs the forward pass of the CDAE model.

        Args:
            user_history (Tensor): The user-item interaction vector [batch_size, n_items].
            user_indices (Tensor): The user indices for the batch [batch_size].
            *args (Any): List of arguments.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The reconstructed interaction vector (logits) [batch_size, n_items].
        """
        # Encode the item history
        item_hidden = self.item_encoder(user_history)

        # Get the user-specific bias
        user_bias = self.user_embedding(user_indices)

        # Combine them (the "Collaborative" part)
        combined = self.h_act(item_hidden + user_bias)

        # Decode to reconstruct the original vector
        reconstructed_logits = self.decoder(combined)

        return reconstructed_logits

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user_indices, user_history = batch

        # Apply corruption ("Denoising" part)
        corrupted_history = self.corruption_dropout(user_history)

        # Get the reconstructed logits from the corrupted input
        reconstructed_logits = self.forward(corrupted_history, user_indices)

        # Calculate the reconstruction loss against the original history
        if self.loss_type == "MSE":
            prediction = self.o_act(reconstructed_logits)
            main_loss = self.main_loss(prediction, user_history)
        else:
            main_loss = self.main_loss(reconstructed_logits, user_history)

        # Calculate L2 regularization
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user_indices),
        )

        # Loss logging
        loss = main_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the the encoder and decoder modules.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Compute predictions and convert to Tensor
        train_batch = self.train_matrix[user_indices.tolist(), :].toarray()
        predictions_logits = self.forward(
            torch.from_numpy(train_batch).float().to(self.device),
            user_indices,
        )

        # Apply the final activation function to get scores
        predictions: Tensor = self.o_act(predictions_logits)

        if item_indices is None:
            # Case 'full': prediction on all items
            return predictions  # [batch_size, n_items]

        # Case 'sampled': prediction on a sampled set of items
        return predictions.gather(
            1,
            item_indices.to(predictions.device).clamp(
                max=self.n_items - 1
            ),  # [batch_size, pad_seq]
        )

forward(user_history, user_indices, *args, **kwargs)

Performs the forward pass of the CDAE model.

Parameters:

Name Type Description Default
user_history Tensor

The user-item interaction vector [batch_size, n_items].

required
user_indices Tensor

The user indices for the batch [batch_size].

required
*args Any

List of arguments.

()
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The reconstructed interaction vector (logits) [batch_size, n_items].

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/cdae.py
def forward(
    self, user_history: Tensor, user_indices: Tensor, *args: Any, **kwargs: Any
) -> Tensor:
    """Performs the forward pass of the CDAE model.

    Args:
        user_history (Tensor): The user-item interaction vector [batch_size, n_items].
        user_indices (Tensor): The user indices for the batch [batch_size].
        *args (Any): List of arguments.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The reconstructed interaction vector (logits) [batch_size, n_items].
    """
    # Encode the item history
    item_hidden = self.item_encoder(user_history)

    # Get the user-specific bias
    user_bias = self.user_embedding(user_indices)

    # Combine them (the "Collaborative" part)
    combined = self.h_act(item_hidden + user_bias)

    # Decode to reconstruct the original vector
    reconstructed_logits = self.decoder(combined)

    return reconstructed_logits

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the the encoder and decoder modules.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/cdae.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the the encoder and decoder modules.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Compute predictions and convert to Tensor
    train_batch = self.train_matrix[user_indices.tolist(), :].toarray()
    predictions_logits = self.forward(
        torch.from_numpy(train_batch).float().to(self.device),
        user_indices,
    )

    # Apply the final activation function to get scores
    predictions: Tensor = self.o_act(predictions_logits)

    if item_indices is None:
        # Case 'full': prediction on all items
        return predictions  # [batch_size, n_items]

    # Case 'sampled': prediction on a sampled set of items
    return predictions.gather(
        1,
        item_indices.to(predictions.device).clamp(
            max=self.n_items - 1
        ),  # [batch_size, pad_seq]
    )

warprec.recommenders.collaborative_filtering_recommender.autoencoder.macridvae.MacridVAE

Bases: IterativeRecommender

Implementation of MacridVAE algorithm from Learning Disentangled Representations for Recommendation (NeurIPS 2019).

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of user and item.

encoder_hidden_dims List[int]

The hidden dimensions of the encoder MLP.

k_fac int

The number of macro concepts (K).

tau float

The temperature for cosine similarity.

corruption float

The input corruption rate.

nogb bool

If True, use Softmax instead of Gumbel-Soft

std float

The standard deviation for reparameterization.

anneal_cap float

The maximum value for KL annealing.

total_anneal_steps int

The number of annealing steps.

reg_weight float

The L2 regularization weight.

weight_decay float

The weight decay for optimizer.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/macridvae.py
@model_registry.register(name="MacridVAE")
class MacridVAE(IterativeRecommender):
    """Implementation of MacridVAE algorithm from
        Learning Disentangled Representations for Recommendation (NeurIPS 2019).

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of user and item.
        encoder_hidden_dims (List[int]): The hidden dimensions of the encoder MLP.
        k_fac (int): The number of macro concepts (K).
        tau (float): The temperature for cosine similarity.
        corruption (float): The input corruption rate.
        nogb (bool): If True, use Softmax instead of Gumbel-Soft
        std (float): The standard deviation for reparameterization.
        anneal_cap (float): The maximum value for KL annealing.
        total_anneal_steps (int): The number of annealing steps.
        reg_weight (float): The L2 regularization weight.
        weight_decay (float): The weight decay for optimizer.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
    """

    # Dataloader definition (Same as MultiVAE)
    DATALOADER_TYPE = DataLoaderType.INTERACTION_LOADER

    # Model hyperparameters
    embedding_size: int
    encoder_hidden_dims: List[int]
    k_fac: int
    tau: float
    corruption: float
    nogb: bool
    std: float
    anneal_cap: float
    total_anneal_steps: int
    reg_weight: float
    weight_decay: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # Store the training matrix for prediction
        self.train_matrix = interactions.get_sparse()

        self.item_embedding = nn.Embedding(self.n_items, self.embedding_size)
        self.k_embedding = nn.Embedding(self.k_fac, self.embedding_size)

        # Macrid Encoder
        self.encoder = MacridEncoder(
            input_dim=self.n_items,
            hidden_dims=self.encoder_hidden_dims,
            latent_dim=self.embedding_size,
            dropout_rate=self.corruption,
        )

        # Sampling layer
        self.sampling = Sampling()

        # Initialize weights and regularization
        self.reg_loss = EmbLoss()
        self.apply(self._init_weights)

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_interaction_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def on_train_epoch_start(self):
        self.beta_anneal = (
            min(
                self.anneal_cap * self.current_epoch / self.total_anneal_steps,
                self.anneal_cap,
            )
            if self.total_anneal_steps > 0
            else self.anneal_cap
        )

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        rating_matrix = batch[0]

        logits, z_mean, z_log_var = self.forward(rating_matrix)

        # Calculate Reconstruction loss
        recon_loss = -(F.log_softmax(logits, dim=1) * rating_matrix).sum(dim=1).mean()

        # KL Divergence (Corrected for Prior N(0, sigma_0^2))
        var_prior = self.std**2 + 1e-10  # Small epsilon for numerical stability

        # Term 1: log(var_prior / var_posterior) = log(var_prior) - z_log_var
        term1 = torch.log(torch.tensor(var_prior, device=self.device)) - z_log_var

        # Term 2: (var_posterior + mu^2) / var_prior
        term2 = (z_log_var.exp() + z_mean.pow(2)) / var_prior

        # KL = 0.5 * sum( term1 + term2 - 1 )
        kl_loss = self.beta_anneal * (
            0.5 * torch.sum(term1 + term2 - 1, dim=[1, 2]).mean()
        )

        # Calculate L2 loss
        reg_tensors = []
        for name, param in self.encoder.named_parameters():  # Include encoder params
            if name.endswith("weight"):
                reg_tensors.append(param)

        reg_loss = self.reg_weight * self.reg_loss(
            self.item_embedding.weight, self.k_embedding.weight * reg_tensors
        )

        # Loss logging
        loss = recon_loss + kl_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(self, rating_matrix: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
        """Forward pass handling Macro-Disentanglement logic.

        Args:
            rating_matrix (Tensor): (Batch, n_items)

        Returns:
            Tuple[Tensor, Tensor, Tensor]: A tuple containing:
                logits: (Batch, n_items) - Log probabilities of reconstruction
                z_mean: (Batch, K, D)
                z_log_var: (Batch, K, D)
        """
        batch_size = rating_matrix.size(0)

        # Concept Assignment (Macro)
        # Normalize embeddings for Cosine Similarity
        items_norm = F.normalize(self.item_embedding.weight, dim=1)  # (M, D)
        cores_norm = F.normalize(self.k_embedding.weight, dim=1)  # (K, D)

        # Similarity: (M, K)
        cates_logits = torch.matmul(items_norm, cores_norm.t()) / self.tau

        if self.nogb:
            cates = torch.softmax(cates_logits, dim=-1)
        else:
            if self.training:
                cates = F.gumbel_softmax(cates_logits, tau=1, hard=False, dim=-1)
            else:
                cates = torch.softmax(cates_logits, dim=-1)

        # Encoder (Vectorized)
        # We mask the input rating matrix with concept probabilities.
        # Input: (B, M). Cates: (M, K).
        # We want (B, K, M) where x_{u,k} = x_u * c_k

        # (B, 1, M) * (1, K, M) -> (B, K, M)
        x_k = rating_matrix.unsqueeze(1) * cates.t().unsqueeze(0)

        # Flatten to (B*K, M) for parallel processing in MLP
        x_k_flat = x_k.reshape(batch_size * self.k_fac, self.n_items)

        # Encode
        z_mean_flat, z_log_var_flat = self.encoder(x_k_flat)

        # Reshape back to (B, K, D)
        z_mean = z_mean_flat.view(batch_size, self.k_fac, -1)
        z_log_var = z_log_var_flat.view(batch_size, self.k_fac, -1)

        # Sample z
        z = self.sampling(z_mean, z_log_var)  # (B, K, D)

        # Decoder
        # Reconstruct based on cosine similarity between z_u^k and h_i
        # z: (B, K, D), items: (M, D)
        z_norm = F.normalize(z, dim=-1)

        # (B, K, D) @ (D, M) -> (B, K, M)
        concept_scores = torch.matmul(z_norm, items_norm.t()) / self.tau

        # Weight by concept probability c_{i,k}
        # (B, K, M) * (1, K, M) -> (B, K, M)
        weighted_scores = torch.exp(concept_scores) * cates.t().unsqueeze(0)

        # Sum over concepts K -> (B, M)
        probs = weighted_scores.sum(dim=1)

        # Logits for numerical stability in loss
        logits = torch.log(probs + 1e-10)

        return logits, z_mean, z_log_var

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the the encoder and decoder modules.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Compute predictions and convert to Tensor
        train_batch = self.train_matrix[user_indices.tolist(), :].toarray()
        predictions, _, _ = self.forward(
            torch.from_numpy(train_batch).float().to(self.device)
        )

        if item_indices is None:
            # Case 'full': prediction on all items
            return predictions  # [batch_size, n_items]

        # Case 'sampled': prediction on a sampled set of items
        return predictions.gather(
            1,
            item_indices.to(predictions.device).clamp(
                max=self.n_items - 1
            ),  # [batch_size, pad_seq]
        )

forward(rating_matrix)

Forward pass handling Macro-Disentanglement logic.

Parameters:

Name Type Description Default
rating_matrix Tensor

(Batch, n_items)

required

Returns:

Type Description
Tuple[Tensor, Tensor, Tensor]

Tuple[Tensor, Tensor, Tensor]: A tuple containing: logits: (Batch, n_items) - Log probabilities of reconstruction z_mean: (Batch, K, D) z_log_var: (Batch, K, D)

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/macridvae.py
def forward(self, rating_matrix: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
    """Forward pass handling Macro-Disentanglement logic.

    Args:
        rating_matrix (Tensor): (Batch, n_items)

    Returns:
        Tuple[Tensor, Tensor, Tensor]: A tuple containing:
            logits: (Batch, n_items) - Log probabilities of reconstruction
            z_mean: (Batch, K, D)
            z_log_var: (Batch, K, D)
    """
    batch_size = rating_matrix.size(0)

    # Concept Assignment (Macro)
    # Normalize embeddings for Cosine Similarity
    items_norm = F.normalize(self.item_embedding.weight, dim=1)  # (M, D)
    cores_norm = F.normalize(self.k_embedding.weight, dim=1)  # (K, D)

    # Similarity: (M, K)
    cates_logits = torch.matmul(items_norm, cores_norm.t()) / self.tau

    if self.nogb:
        cates = torch.softmax(cates_logits, dim=-1)
    else:
        if self.training:
            cates = F.gumbel_softmax(cates_logits, tau=1, hard=False, dim=-1)
        else:
            cates = torch.softmax(cates_logits, dim=-1)

    # Encoder (Vectorized)
    # We mask the input rating matrix with concept probabilities.
    # Input: (B, M). Cates: (M, K).
    # We want (B, K, M) where x_{u,k} = x_u * c_k

    # (B, 1, M) * (1, K, M) -> (B, K, M)
    x_k = rating_matrix.unsqueeze(1) * cates.t().unsqueeze(0)

    # Flatten to (B*K, M) for parallel processing in MLP
    x_k_flat = x_k.reshape(batch_size * self.k_fac, self.n_items)

    # Encode
    z_mean_flat, z_log_var_flat = self.encoder(x_k_flat)

    # Reshape back to (B, K, D)
    z_mean = z_mean_flat.view(batch_size, self.k_fac, -1)
    z_log_var = z_log_var_flat.view(batch_size, self.k_fac, -1)

    # Sample z
    z = self.sampling(z_mean, z_log_var)  # (B, K, D)

    # Decoder
    # Reconstruct based on cosine similarity between z_u^k and h_i
    # z: (B, K, D), items: (M, D)
    z_norm = F.normalize(z, dim=-1)

    # (B, K, D) @ (D, M) -> (B, K, M)
    concept_scores = torch.matmul(z_norm, items_norm.t()) / self.tau

    # Weight by concept probability c_{i,k}
    # (B, K, M) * (1, K, M) -> (B, K, M)
    weighted_scores = torch.exp(concept_scores) * cates.t().unsqueeze(0)

    # Sum over concepts K -> (B, M)
    probs = weighted_scores.sum(dim=1)

    # Logits for numerical stability in loss
    logits = torch.log(probs + 1e-10)

    return logits, z_mean, z_log_var

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the the encoder and decoder modules.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/macridvae.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the the encoder and decoder modules.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Compute predictions and convert to Tensor
    train_batch = self.train_matrix[user_indices.tolist(), :].toarray()
    predictions, _, _ = self.forward(
        torch.from_numpy(train_batch).float().to(self.device)
    )

    if item_indices is None:
        # Case 'full': prediction on all items
        return predictions  # [batch_size, n_items]

    # Case 'sampled': prediction on a sampled set of items
    return predictions.gather(
        1,
        item_indices.to(predictions.device).clamp(
            max=self.n_items - 1
        ),  # [batch_size, pad_seq]
    )

warprec.recommenders.collaborative_filtering_recommender.autoencoder.multidae.MultiDAE

Bases: IterativeRecommender

Implementation of MultiDAE algorithm from Variational Autoencoders for Collaborative Filtering 2018.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

intermediate_dim int

Intermediate dimension size.

latent_dim int

Latent dimension size.

corruption float

The probability of dropout applied to the input layer (denoising).

weight_decay float

The value of weight decay used in the optimizer.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/multidae.py
@model_registry.register(name="MultiDAE")
class MultiDAE(IterativeRecommender):
    """Implementation of MultiDAE algorithm from
        Variational Autoencoders for Collaborative Filtering 2018.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        intermediate_dim (int): Intermediate dimension size.
        latent_dim (int): Latent dimension size.
        corruption (float): The probability of dropout applied to the input layer (denoising).
        weight_decay (float): The value of weight decay used in the optimizer.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.INTERACTION_LOADER

    intermediate_dim: int
    latent_dim: int
    corruption: float
    weight_decay: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # Store the training matrix for prediction
        self.train_matrix = interactions.get_sparse()

        # Encoder layers
        self.encoder = Encoder(
            original_dim=self.n_items,
            intermediate_dim=self.intermediate_dim,
            latent_dim=self.latent_dim,
            dropout_rate=self.corruption,
        )

        # Decoder layers
        self.decoder = Decoder(
            original_dim=self.n_items,
            intermediate_dim=self.intermediate_dim,
            latent_dim=self.latent_dim,
        )

        # Initialize weights
        self.apply(self._init_weights)
        self.loss = MultiDAELoss()

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_interaction_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def training_step(self, batch: Any, batch_idx: int):
        rating_matrix = batch[0]

        reconstructed = self(rating_matrix)
        loss = self.loss(rating_matrix, reconstructed)

        # Loss logging
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(self, rating_matrix: Tensor) -> Tensor:
        """Forward pass with normalization and dropout.

        Args:
            rating_matrix (Tensor): The input rating matrix.

        Returns:
            Tensor: The reconstructed rating matrix.
        """
        # Normalize input
        h = F.normalize(rating_matrix, dim=1)

        # Apply dropout
        h = F.dropout(h, self.corruption, training=self.training)

        # Encode and decode
        h = self.encoder(h)
        return self.decoder(h)

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the the encoder and decoder modules.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Compute predictions and convert to Tensor
        train_batch = self.train_matrix[user_indices.tolist(), :].toarray()
        predictions = self.forward(
            torch.from_numpy(train_batch).float().to(self.device)
        )

        if item_indices is None:
            # Case 'full': prediction on all items
            return predictions  # [batch_size, n_items]

        # Case 'sampled': prediction on a sampled set of items
        return predictions.gather(
            1,
            item_indices.to(predictions.device).clamp(
                max=self.n_items - 1
            ),  # [batch_size, pad_seq]
        )

forward(rating_matrix)

Forward pass with normalization and dropout.

Parameters:

Name Type Description Default
rating_matrix Tensor

The input rating matrix.

required

Returns:

Name Type Description
Tensor Tensor

The reconstructed rating matrix.

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/multidae.py
def forward(self, rating_matrix: Tensor) -> Tensor:
    """Forward pass with normalization and dropout.

    Args:
        rating_matrix (Tensor): The input rating matrix.

    Returns:
        Tensor: The reconstructed rating matrix.
    """
    # Normalize input
    h = F.normalize(rating_matrix, dim=1)

    # Apply dropout
    h = F.dropout(h, self.corruption, training=self.training)

    # Encode and decode
    h = self.encoder(h)
    return self.decoder(h)

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the the encoder and decoder modules.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/multidae.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the the encoder and decoder modules.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Compute predictions and convert to Tensor
    train_batch = self.train_matrix[user_indices.tolist(), :].toarray()
    predictions = self.forward(
        torch.from_numpy(train_batch).float().to(self.device)
    )

    if item_indices is None:
        # Case 'full': prediction on all items
        return predictions  # [batch_size, n_items]

    # Case 'sampled': prediction on a sampled set of items
    return predictions.gather(
        1,
        item_indices.to(predictions.device).clamp(
            max=self.n_items - 1
        ),  # [batch_size, pad_seq]
    )

warprec.recommenders.collaborative_filtering_recommender.autoencoder.multivae.MultiVAE

Bases: IterativeRecommender

Implementation of MultiVAE algorithm from Variational Autoencoders for Collaborative Filtering 2018.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

intermediate_dim int

Intermediate dimension size.

latent_dim int

Latent dimension size.

corruption float

The probability of dropout applied to the input layer (denoising).

weight_decay float

The value of weight decay used in the optimizer.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

anneal_cap float

Annealing cap for KL divergence.

anneal_step int

Annealing step for KL divergence.

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/multivae.py
@model_registry.register(name="MultiVAE")
class MultiVAE(IterativeRecommender):
    """Implementation of MultiVAE algorithm from
        Variational Autoencoders for Collaborative Filtering 2018.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        intermediate_dim (int): Intermediate dimension size.
        latent_dim (int): Latent dimension size.
        corruption (float): The probability of dropout applied to the input layer (denoising).
        weight_decay (float): The value of weight decay used in the optimizer.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
        anneal_cap (float): Annealing cap for KL divergence.
        anneal_step (int): Annealing step for KL divergence.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.INTERACTION_LOADER

    intermediate_dim: int
    latent_dim: int
    corruption: float
    weight_decay: float
    batch_size: int
    epochs: int
    learning_rate: float
    anneal_cap: float
    anneal_step: int

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # Store the training matrix for prediction
        self.train_matrix = interactions.get_sparse()

        # Encoder with variational components
        self.encoder = VAEncoder(
            original_dim=self.n_items,
            intermediate_dim=self.intermediate_dim,
            latent_dim=self.latent_dim,
            dropout_rate=self.corruption,
        )

        # Decoder
        self.decoder = VADecoder(
            original_dim=self.n_items,
            intermediate_dim=self.intermediate_dim,
            latent_dim=self.latent_dim,
        )

        # Initialize weights
        self.apply(self._init_weights)
        self.loss = MultiVAELoss()

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_interaction_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def on_train_epoch_start(self):
        self.current_anneal = (
            min(
                self.anneal_cap * self.current_epoch / self.anneal_step, self.anneal_cap
            )
            if self.anneal_step > 0
            else self.anneal_cap
        )

    def training_step(self, batch: Any, batch_idx: int):
        rating_matrix = batch[0]

        reconstructed, kl_loss = self.forward(rating_matrix)
        loss = self.loss(rating_matrix, reconstructed, kl_loss, self.current_anneal)

        # Loss logging
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(self, rating_matrix: Tensor) -> Tuple[Tensor, Tensor]:
        """Returns reconstruction and KL divergence.

        Args:
            rating_matrix (Tensor): The input rating matrix.

        Returns:
            Tuple[Tensor, Tensor]:
                - Tensor: The reconstructed rating matrix.
                - Tensor: The KL divergence loss.
        """
        z_mean, z_log_var, z = self.encoder(rating_matrix)
        reconstructed = self.decoder(z)

        # KL divergence calculation
        kl_loss = -0.5 * torch.mean(
            z_log_var - torch.square(z_mean) - torch.exp(z_log_var) + 1
        )
        return reconstructed, kl_loss

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the the encoder and decoder modules.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Compute predictions and convert to Tensor
        train_batch = self.train_matrix[user_indices.tolist(), :].toarray()
        predictions, _ = self.forward(
            torch.from_numpy(train_batch).float().to(self.device)
        )

        if item_indices is None:
            # Case 'full': prediction on all items
            return predictions  # [batch_size, n_items]

        # Case 'sampled': prediction on a sampled set of items
        return predictions.gather(
            1,
            item_indices.to(predictions.device).clamp(
                max=self.n_items - 1
            ),  # [batch_size, pad_seq]
        )

forward(rating_matrix)

Returns reconstruction and KL divergence.

Parameters:

Name Type Description Default
rating_matrix Tensor

The input rating matrix.

required

Returns:

Type Description
Tuple[Tensor, Tensor]

Tuple[Tensor, Tensor]: - Tensor: The reconstructed rating matrix. - Tensor: The KL divergence loss.

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/multivae.py
def forward(self, rating_matrix: Tensor) -> Tuple[Tensor, Tensor]:
    """Returns reconstruction and KL divergence.

    Args:
        rating_matrix (Tensor): The input rating matrix.

    Returns:
        Tuple[Tensor, Tensor]:
            - Tensor: The reconstructed rating matrix.
            - Tensor: The KL divergence loss.
    """
    z_mean, z_log_var, z = self.encoder(rating_matrix)
    reconstructed = self.decoder(z)

    # KL divergence calculation
    kl_loss = -0.5 * torch.mean(
        z_log_var - torch.square(z_mean) - torch.exp(z_log_var) + 1
    )
    return reconstructed, kl_loss

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the the encoder and decoder modules.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/multivae.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the the encoder and decoder modules.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Compute predictions and convert to Tensor
    train_batch = self.train_matrix[user_indices.tolist(), :].toarray()
    predictions, _ = self.forward(
        torch.from_numpy(train_batch).float().to(self.device)
    )

    if item_indices is None:
        # Case 'full': prediction on all items
        return predictions  # [batch_size, n_items]

    # Case 'sampled': prediction on a sampled set of items
    return predictions.gather(
        1,
        item_indices.to(predictions.device).clamp(
            max=self.n_items - 1
        ),  # [batch_size, pad_seq]
    )

warprec.recommenders.collaborative_filtering_recommender.autoencoder.sansa.SANSA

Bases: ItemSimRecommender

Implementation of SANSA algorithm from "Scalable Approximate NonSymmetric Autoencoder forCollaborative Filtering" in RecSys 23.

This model implements a sparse approximate inversion of the Gram matrix. It attempts to use scikit-sparse (CHOLMOD) for high-performance Cholesky factorization. If not available, it falls back to scipy.sparse inversion.

Parameters:

Name Type Description Default
params dict

The dictionary with the model params.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Argument for PyTorch nn.Module.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Keyword argument for PyTorch nn.Module.

{}

Attributes:

Name Type Description
l2 float

The L2 regularization value.

target_density float

The desired density of the weight matrix B.

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/sansa.py
@model_registry.register(name="SANSA")
class SANSA(ItemSimRecommender):
    """Implementation of SANSA algorithm from
        "Scalable Approximate NonSymmetric Autoencoder forCollaborative Filtering" in RecSys 23.

    This model implements a sparse approximate inversion of the Gram matrix.
    It attempts to use `scikit-sparse` (CHOLMOD) for high-performance Cholesky
    factorization. If not available, it falls back to `scipy.sparse` inversion.

    Args:
        params (dict): The dictionary with the model params.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Argument for PyTorch nn.Module.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Keyword argument for PyTorch nn.Module.

    Attributes:
        l2 (float): The L2 regularization value.
        target_density (float): The desired density of the weight matrix B.
    """

    l2: float
    target_density: float

    @classmethod
    def estimate_space(
        cls,
        params: dict,
        info: dict,
        interactions: Optional[Interactions] = None,
        **kwargs: Any,
    ) -> dict:
        interactions = cls._require_interactions_for_estimate(
            interactions, cls.__name__
        )
        X = interactions.get_sparse()
        n_items = info["n_items"]
        target_density = min(max(params.get("target_density", 1.0), 0.0), 1.0)
        final_nnz = int(np.ceil(n_items * n_items * target_density))
        gram_nnz = max(n_items, int(np.ceil(X.nnz * 0.9)))

        diagonal_mb = cls._dense_size_mb((n_items,), X.dtype)
        gram_matrix_mb = cls._estimated_sparse_square_size_mb(
            source_nnz=X.nnz,
            side_len=n_items,
            data_dtype=X.dtype,
        )
        inverse_matrix_mb = cls._compressed_sparse_size_mb(
            nnz=max(gram_nnz, final_nnz),
            ptr_len=n_items + 1,
            data_dtype=X.dtype,
        )
        similarity_sparse_mb = cls._compressed_sparse_size_mb(
            nnz=final_nnz,
            ptr_len=n_items + 1,
            data_dtype=X.dtype,
        )
        sparsify_coo_mb = cls._coo_size_mb(final_nnz, X.dtype)

        train_ram_mb = cls._peak_size_mb(
            gram_matrix_mb,
            gram_matrix_mb + inverse_matrix_mb + diagonal_mb,
            inverse_matrix_mb + similarity_sparse_mb + diagonal_mb,
        )
        if target_density < 1.0:
            train_ram_mb = cls._peak_size_mb(
                train_ram_mb,
                inverse_matrix_mb
                + similarity_sparse_mb
                + sparsify_coo_mb
                + diagonal_mb,
            )

        return {
            "train_ram_mb": train_ram_mb,
            "notes": "SANSA analytical train-space estimate",
        }

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, interactions, *args, seed=seed, **kwargs)

        X = self.train_matrix

        # Compute Sparse Gram Matrix: G = X^T X
        G = (X.T @ X).tocsc()

        # Add L2 regularization to the diagonal
        diag_indices = np.arange(G.shape[0])
        G[diag_indices, diag_indices] += self.l2

        # Compute Approximate Inverse P = G^-1
        P = self._invert_matrix(G)

        # Extract diagonal of P
        diag_P = P.diagonal()

        # Handle division by zero (safety check)
        inv_diag = np.where(diag_P != 0, 1.0 / -diag_P, 0.0)

        # B = P * diag(inv_diag)
        # Scaling columns of P by inv_diag values.
        # Since P is symmetric (or close to), this implements the EASE formula.
        B = P.multiply(inv_diag[None, :])

        # Zero out the diagonal (constraint of the autoencoder)
        B.setdiag(0.0)
        B.eliminate_zeros()

        # Sparsification
        if self.target_density < 1.0:
            B = self._sparsify_matrix(B, self.target_density)

        # Store as CSR for fast multiplication during predict
        self.item_similarity = B.tocsr()

    def _invert_matrix(self, G: sp.csc_matrix) -> sp.csc_matrix:
        """Inverts the Gram matrix using the best available solver."""

        if HAS_SKSPARSE:
            try:
                # Factorize G = L D L^T
                factor = cholesky(G)
                # Solve G * P = I to get P
                # This is much faster and numerically stable than generic inversion
                P = factor.solve_A(sp.eye(G.shape[0], format="csc"))
                return P
            except Exception as e:
                print(
                    f"CHOLMOD failed: {e}"
                )  # If CHOLMOD fails, we will fall back to SciPy inversion

        # Fallback path: SciPy
        try:
            # Note: The inverse of a sparse matrix can have significant fill-in
            P = sp.linalg.inv(G)
            return P
        except RuntimeError:
            raise RuntimeError(
                "SANSA: Matrix inversion failed. The dataset might be too large "
                "or the matrix is singular."
            )

    def _sparsify_matrix(self, matrix: sp.spmatrix, density: float) -> sp.csr_matrix:
        """Retains only the top-k elements globally to achieve target density.

        Args:
            matrix (sp.spmatrix): The matrix to sparsify.
            density (float): The desired density ratio (0.0 to 1.0).

        Returns:
            sp.csr_matrix: The sparsified matrix.
        """
        if density >= 1.0:
            return matrix.tocsr()

        # Convert to COO to access data easily
        matrix_coo = matrix.tocoo()
        n_total = matrix.shape[0] * matrix.shape[1]
        k = int(n_total * density)

        # If the matrix is already sparser than the target, return it
        if matrix_coo.nnz <= k:
            return matrix.tocsr()

        # Find threshold value using numpy's partition (O(N) complexity)
        data_abs = np.abs(matrix_coo.data)

        # We want the indices of the top k elements.
        # argpartition puts the k-th largest element at index -k,
        # and all larger elements after it.
        partition_idx = np.argpartition(data_abs, -k)[-k]
        threshold = data_abs[partition_idx]

        # Filter elements below threshold
        mask = data_abs >= threshold

        # Create new sparse matrix
        new_data = matrix_coo.data[mask]
        new_row = matrix_coo.row[mask]
        new_col = matrix_coo.col[mask]

        return sp.csr_matrix((new_data, (new_row, new_col)), shape=matrix.shape)

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction override to handle Sparse Matrix multiplication.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix.
        """
        # Compute predictions: Sparse (Batch x Items) @ Sparse (Items x Items)
        # Result is Sparse (Batch x Items)
        train_batch = self.train_matrix[user_indices.tolist(), :]
        predictions_sparse = train_batch @ self.item_similarity

        # Convert to dense Tensor
        predictions = torch.from_numpy(predictions_sparse.toarray()).float()

        # Return full or sampled predictions
        if item_indices is None:
            # Case 'full': prediction on all items
            return predictions  # [batch_size, n_items]

        # Case 'sampled': prediction on a sampled set of items
        return predictions.gather(
            1,
            item_indices.to(predictions.device).clamp(
                max=self.n_items - 1
            ),  # [batch_size, pad_seq]
        )

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction override to handle Sparse Matrix multiplication.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix.

Source code in warprec/recommenders/collaborative_filtering_recommender/autoencoder/sansa.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction override to handle Sparse Matrix multiplication.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix.
    """
    # Compute predictions: Sparse (Batch x Items) @ Sparse (Items x Items)
    # Result is Sparse (Batch x Items)
    train_batch = self.train_matrix[user_indices.tolist(), :]
    predictions_sparse = train_batch @ self.item_similarity

    # Convert to dense Tensor
    predictions = torch.from_numpy(predictions_sparse.toarray()).float()

    # Return full or sampled predictions
    if item_indices is None:
        # Case 'full': prediction on all items
        return predictions  # [batch_size, n_items]

    # Case 'sampled': prediction on a sampled set of items
    return predictions.gather(
        1,
        item_indices.to(predictions.device).clamp(
            max=self.n_items - 1
        ),  # [batch_size, pad_seq]
    )

Graph-Based

warprec.recommenders.collaborative_filtering_recommender.graph_based.dgcf.DGCF

Bases: IterativeRecommender, GraphRecommenderUtils

Implementation of DGCF algorithm from "Disentangled Graph Collaborative Filtering" (SIGIR 2020).

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of user and item.

n_layers int

The number of graph convolution layers.

n_factors int

The number of factors.

n_iterations int

The number of routing steps.

cor_weight float

The weight of correlation loss.

reg_weight float

The L2 regularization weight.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Raises:

Type Description
ValueError

If embedding size is not divisible by n_factors.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/dgcf.py
@model_registry.register(name="DGCF")
class DGCF(IterativeRecommender, GraphRecommenderUtils):
    """Implementation of DGCF algorithm from
    "Disentangled Graph Collaborative Filtering" (SIGIR 2020).

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of user and item.
        n_layers (int): The number of graph convolution layers.
        n_factors (int): The number of factors.
        n_iterations (int): The number of routing steps.
        cor_weight (float): The weight of correlation loss.
        reg_weight (float): The L2 regularization weight.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.

    Raises:
        ValueError: If embedding size is not divisible by n_factors.
    """

    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Model hyperparameters
    embedding_size: int
    n_layers: int
    n_factors: int
    n_iterations: int
    cor_weight: float
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, seed=seed, *args, **kwargs)

        if self.embedding_size % self.n_factors != 0:
            raise ValueError(
                f"embedding_size ({self.embedding_size}) must be divisible by n_factors ({self.n_factors})."
            )

        self.factor_dim = self.embedding_size // self.n_factors

        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # Graph Structure
        # We extract indices once. We do NOT use SparseTensor for propagation
        # because weights change every iteration. Raw scatter/gather is faster.
        self._init_graph_indices(interactions)

        # Init embedding weights
        self.apply(self._init_weights)
        self.bpr_loss = BPRLoss()
        self.reg_loss = EmbLoss()

    def _init_graph_indices(self, interactions: Interactions):
        """Prepares graph indices for efficient scatter operations."""
        # Get COO matrix
        adj = interactions.get_sparse().tocoo()

        # Convert to tensors
        row = torch.from_numpy(adj.row).long()
        col = torch.from_numpy(adj.col).long()

        # Create Bi-partite Graph Indices (User->Item AND Item->User)
        # Source Nodes
        src = torch.cat([row, col + self.n_users])
        # Target Nodes
        dst = torch.cat([col + self.n_users, row])

        # Register as buffers (automatically moved to GPU)
        self.register_buffer("edge_index_src", src)
        self.register_buffer("edge_index_dst", dst)

        self.num_nodes = self.n_users + self.n_items + 1
        self.num_edges = src.size(0)

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, pos_item, neg_item = batch

        # Get propagated embeddings
        user_all_embeddings, item_all_embeddings = self.forward()

        # Get embeddings for current batch users and items
        u_embeddings = user_all_embeddings[user]
        pos_embeddings = item_all_embeddings[pos_item]
        neg_embeddings = item_all_embeddings[neg_item]

        # Calculate BPR loss
        pos_scores = torch.mul(u_embeddings, pos_embeddings).sum(dim=1)
        neg_scores = torch.mul(u_embeddings, neg_embeddings).sum(dim=1)
        bpr_loss = self.bpr_loss(pos_scores, neg_scores)

        # Calculate L2 loss
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user),
            self.item_embedding(pos_item),
            self.item_embedding(neg_item),
        )

        # Calculate Correlation loss
        cor_loss = self._correlation_loss(u_embeddings, pos_embeddings)
        cor_loss = self.cor_weight * cor_loss

        # Loss logging
        loss = bpr_loss + reg_loss + cor_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(self) -> Tuple[Tensor, Tensor]:
        """Performs DGCF propagation using vectorized operations."""

        # Initial Embeddings: [n_nodes, embedding_size]
        ego_embeddings = torch.cat(
            [self.user_embedding.weight, self.item_embedding.weight], dim=0
        )

        # Reshape to [n_nodes, n_factors, factor_dim] for vectorized processing
        ego_embeddings = ego_embeddings.view(
            self.num_nodes, self.n_factors, self.factor_dim
        )

        all_embeddings = [ego_embeddings]

        # Initialize Routing Logits (S): [n_edges, n_factors]
        routing_logits = torch.zeros(
            self.num_edges, self.n_factors, device=self.device, dtype=torch.float32
        )

        # Loop over layers
        for _ in range(self.n_layers):
            # Current layer embeddings: [N, K, D]
            current_embeddings = all_embeddings[-1]

            # Loop over iterations (Routing)
            for _ in range(self.n_iterations):
                # Calculate Routing Weights (Softmax over factors)
                routing_weights = F.softmax(routing_logits, dim=1)  # [E, K]

                # Dynamic Normalization
                # Degree of target nodes per factor: [N, K]
                # We sum the incoming edge weights for each node and factor
                deg = torch.zeros(self.num_nodes, self.n_factors, device=self.device)
                deg = deg.index_add(
                    0,
                    self.edge_index_src,  # type: ignore[arg-type]
                    routing_weights,
                )  # Sum rows (src)

                # D^-0.5
                deg_inv_sqrt = deg.pow(-0.5)
                deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float("inf"), 0.0)

                # Norm Edge Weights: [E, K]
                # norm[uv] = D[u]^-0.5 * W[uv] * D[v]^-0.5
                norm_weights = (
                    deg_inv_sqrt[self.edge_index_src]  # type: ignore[index]
                    * routing_weights
                    * deg_inv_sqrt[self.edge_index_dst]  # type: ignore[index]
                )

                # Message Passing
                # Gather Source Embeddings: [E, K, D]
                src_emb = current_embeddings[self.edge_index_src]  # type: ignore[index]

                # Apply Weights: [E, K, D] * [E, K, 1]
                weighted_messages = src_emb * norm_weights.unsqueeze(-1)

                # Aggregate to Target: [N, K, D]
                next_embeddings = torch.zeros_like(current_embeddings)
                next_embeddings = next_embeddings.index_add(
                    0,
                    self.edge_index_dst,  # type: ignore[arg-type]
                    weighted_messages,
                )

                # Gather embeddings for edges: [E, K, D]
                head_emb_edge = next_embeddings[self.edge_index_src]  # type: ignore[index]
                tail_emb_edge = current_embeddings[self.edge_index_dst]  # type: ignore[index]

                # Dot product sum over D -> [E, K]
                delta_logits = (head_emb_edge * torch.tanh(tail_emb_edge)).sum(dim=2)

                routing_logits = routing_logits + delta_logits

            # Update for next layer
            all_embeddings.append(next_embeddings)

        # Sum pooling over layers and factors
        # Stack layers: [L+1, N, K, D]
        # Sum layers: [N, K, D]
        final_embeddings = torch.stack(all_embeddings, dim=0).sum(dim=0)

        # Reshape back to [N, Embed_Size] (Concatenate factors)
        final_embeddings = final_embeddings.view(self.num_nodes, self.embedding_size)

        user_final, item_final = torch.split(
            final_embeddings, [self.n_users, self.n_items + 1]
        )
        return user_final, item_final

    def _distance_correlation(self, x1: Tensor, x2: Tensor) -> Tensor:
        """Calculates Distance Correlation between two tensors."""

        def _centered_distance(x):
            # Pairwise distance matrix: (x-y)^2 = x^2 - 2xy + y^2
            r = torch.sum(x * x, dim=1, keepdim=True)
            d = r - 2 * torch.mm(x, x.t()) + r.t()
            d = torch.sqrt(d.clamp(min=1e-8))

            # Centering
            row_mean = d.mean(dim=1, keepdim=True)
            col_mean = d.mean(dim=0, keepdim=True)
            all_mean = d.mean()
            return d - row_mean - col_mean + all_mean

        def _dist_covariance(d1, d2):
            return (d1 * d2).sum() / (d1.shape[0] ** 2)

        d1 = _centered_distance(x1)
        d2 = _centered_distance(x2)

        dcov_12 = _dist_covariance(d1, d2)
        dcov_11 = _dist_covariance(d1, d1)
        dcov_22 = _dist_covariance(d2, d2)

        return dcov_12 / (torch.sqrt(dcov_11 * dcov_22) + 1e-8)

    def _correlation_loss(self, user_emb: Tensor, item_emb: Tensor) -> Tensor:
        """Computes correlation loss efficiently."""
        # Reshape to [Batch, n_factors, factor_dim]
        # This avoids using torch.chunk which creates list overhead
        user_factors = user_emb.view(-1, self.n_factors, self.factor_dim)
        item_factors = item_emb.view(-1, self.n_factors, self.factor_dim)

        loss = torch.tensor(0.0, device=self.device)

        # We iterate only unique pairs of factors
        for i in range(self.n_factors):
            for j in range(i + 1, self.n_factors):
                loss = loss + self._distance_correlation(
                    user_factors[:, i], user_factors[:, j]
                )
                loss = loss + self._distance_correlation(
                    item_factors[:, i], item_factors[:, j]
                )

        return loss

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Retrieve all user and item embeddings from the propagation network
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        # Get the embeddings for the specific users in the batch
        user_embeddings = user_all_embeddings[
            user_indices
        ]  # [batch_size, embedding_size]

        if item_indices is None:
            # Case 'full': prediction on all items
            item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
            einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
        else:
            # Case 'sampled': prediction on a sampled set of items
            item_embeddings = item_all_embeddings[
                item_indices
            ]  # [batch_size, pad_seq, embedding_size]
            einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

        predictions = torch.einsum(
            einsum_string, user_embeddings, item_embeddings
        )  # [batch_size, n_items] or [batch_size, pad_seq]
        return predictions

forward()

Performs DGCF propagation using vectorized operations.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/dgcf.py
def forward(self) -> Tuple[Tensor, Tensor]:
    """Performs DGCF propagation using vectorized operations."""

    # Initial Embeddings: [n_nodes, embedding_size]
    ego_embeddings = torch.cat(
        [self.user_embedding.weight, self.item_embedding.weight], dim=0
    )

    # Reshape to [n_nodes, n_factors, factor_dim] for vectorized processing
    ego_embeddings = ego_embeddings.view(
        self.num_nodes, self.n_factors, self.factor_dim
    )

    all_embeddings = [ego_embeddings]

    # Initialize Routing Logits (S): [n_edges, n_factors]
    routing_logits = torch.zeros(
        self.num_edges, self.n_factors, device=self.device, dtype=torch.float32
    )

    # Loop over layers
    for _ in range(self.n_layers):
        # Current layer embeddings: [N, K, D]
        current_embeddings = all_embeddings[-1]

        # Loop over iterations (Routing)
        for _ in range(self.n_iterations):
            # Calculate Routing Weights (Softmax over factors)
            routing_weights = F.softmax(routing_logits, dim=1)  # [E, K]

            # Dynamic Normalization
            # Degree of target nodes per factor: [N, K]
            # We sum the incoming edge weights for each node and factor
            deg = torch.zeros(self.num_nodes, self.n_factors, device=self.device)
            deg = deg.index_add(
                0,
                self.edge_index_src,  # type: ignore[arg-type]
                routing_weights,
            )  # Sum rows (src)

            # D^-0.5
            deg_inv_sqrt = deg.pow(-0.5)
            deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float("inf"), 0.0)

            # Norm Edge Weights: [E, K]
            # norm[uv] = D[u]^-0.5 * W[uv] * D[v]^-0.5
            norm_weights = (
                deg_inv_sqrt[self.edge_index_src]  # type: ignore[index]
                * routing_weights
                * deg_inv_sqrt[self.edge_index_dst]  # type: ignore[index]
            )

            # Message Passing
            # Gather Source Embeddings: [E, K, D]
            src_emb = current_embeddings[self.edge_index_src]  # type: ignore[index]

            # Apply Weights: [E, K, D] * [E, K, 1]
            weighted_messages = src_emb * norm_weights.unsqueeze(-1)

            # Aggregate to Target: [N, K, D]
            next_embeddings = torch.zeros_like(current_embeddings)
            next_embeddings = next_embeddings.index_add(
                0,
                self.edge_index_dst,  # type: ignore[arg-type]
                weighted_messages,
            )

            # Gather embeddings for edges: [E, K, D]
            head_emb_edge = next_embeddings[self.edge_index_src]  # type: ignore[index]
            tail_emb_edge = current_embeddings[self.edge_index_dst]  # type: ignore[index]

            # Dot product sum over D -> [E, K]
            delta_logits = (head_emb_edge * torch.tanh(tail_emb_edge)).sum(dim=2)

            routing_logits = routing_logits + delta_logits

        # Update for next layer
        all_embeddings.append(next_embeddings)

    # Sum pooling over layers and factors
    # Stack layers: [L+1, N, K, D]
    # Sum layers: [N, K, D]
    final_embeddings = torch.stack(all_embeddings, dim=0).sum(dim=0)

    # Reshape back to [N, Embed_Size] (Concatenate factors)
    final_embeddings = final_embeddings.view(self.num_nodes, self.embedding_size)

    user_final, item_final = torch.split(
        final_embeddings, [self.n_users, self.n_items + 1]
    )
    return user_final, item_final

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/dgcf.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Retrieve all user and item embeddings from the propagation network
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    # Get the embeddings for the specific users in the batch
    user_embeddings = user_all_embeddings[
        user_indices
    ]  # [batch_size, embedding_size]

    if item_indices is None:
        # Case 'full': prediction on all items
        item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
        einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
    else:
        # Case 'sampled': prediction on a sampled set of items
        item_embeddings = item_all_embeddings[
            item_indices
        ]  # [batch_size, pad_seq, embedding_size]
        einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

    predictions = torch.einsum(
        einsum_string, user_embeddings, item_embeddings
    )  # [batch_size, n_items] or [batch_size, pad_seq]
    return predictions

warprec.recommenders.collaborative_filtering_recommender.graph_based.egcf.EGCF

Bases: IterativeRecommender, GraphRecommenderUtils

Implementation of EGCF algorithm from "Simplify to the Limit! Embedding-Less Graph Collaborative Filtering for Recommender Systems" (TOIS 2024).

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of items.

n_layers int

The number of propagation layers.

ssl_lambda float

Weight for the Self-Supervised Learning (InfoNCE) loss.

temperature float

Temperature parameter for InfoNCE loss.

mode str

Propagation mode, either 'parallel' or 'alternating'.

reg_weight float

Weight for the regularization loss.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Raises:

Type Description
ValueError

If the mode is not supported.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/egcf.py
@model_registry.register(name="EGCF")
class EGCF(IterativeRecommender, GraphRecommenderUtils):
    """Implementation of EGCF algorithm from
    "Simplify to the Limit! Embedding-Less Graph Collaborative Filtering for Recommender Systems" (TOIS 2024).

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of items.
        n_layers (int): The number of propagation layers.
        ssl_lambda (float): Weight for the Self-Supervised Learning (InfoNCE) loss.
        temperature (float): Temperature parameter for InfoNCE loss.
        mode (str): Propagation mode, either 'parallel' or 'alternating'.
        reg_weight (float): Weight for the regularization loss.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.

    Raises:
        ValueError: If the mode is not supported.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Model hyperparameters
    embedding_size: int
    n_layers: int
    ssl_lambda: float
    temperature: float
    mode: str
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        if self.mode not in ["parallel", "alternating"]:
            raise ValueError(
                f"Unsupported mode: {self.mode}. Available modes are 'parallel' and 'alternating'."
            )

        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # User-Item Interaction Matrix (R)
        self.R_adj = self._get_user_item_adj(interactions)

        # Bipartite Adjacency Matrix (A)
        if self.mode == "parallel":
            self.bipartite_adj = self.get_adj_mat(
                interactions.get_sparse().tocoo(),
                self.n_users,
                self.n_items + 1,  # Adjust for padding idx
                normalize=True,
            )

        self.activation = nn.Tanh()

        # Init embedding weights
        self.apply(self._init_weights)
        self.bpr_loss = BPRLoss()
        self.reg_loss = EmbLoss()
        self.nce_loss = InfoNCELoss(self.temperature)

    def _get_user_item_adj(self, interactions: Interactions) -> Tensor:
        """Constructs the normalized User-Item interaction matrix R.
        Formula: R_norm = D_u^(-1/2) * R * D_i^(-1/2)

        Args:
            interactions (Interactions): The interaction object.

        Returns:
            Tensor: The sparse normalized adjacency matrix (User x Item).
        """
        # Get sparse interaction matrix in COO format
        adj = interactions.get_sparse().tocoo()
        row = torch.from_numpy(adj.row).to(torch.long)
        col = torch.from_numpy(adj.col).to(torch.long)

        # Compute degrees for normalization
        # Degree of user u = number of items they interacted with
        # Degree of item i = number of users who interacted with it
        deg_user = torch.zeros(self.n_users, dtype=torch.float32)

        # Use n_items + 1 to be safe with dimensions, though we only populate up to n_items
        deg_item = torch.zeros(self.n_items + 1, dtype=torch.float32)

        deg_user.scatter_add_(0, row, torch.ones_like(row, dtype=torch.float32))
        deg_item.scatter_add_(0, col, torch.ones_like(col, dtype=torch.float32))

        # Inverse sqrt degree
        deg_inv_sqrt_user = deg_user.pow(-0.5)
        deg_inv_sqrt_user.masked_fill_(deg_inv_sqrt_user == float("inf"), 0.0)

        deg_inv_sqrt_item = deg_item.pow(-0.5)
        deg_inv_sqrt_item.masked_fill_(deg_inv_sqrt_item == float("inf"), 0.0)

        # Values for the sparse matrix: 1 / sqrt(Du * Di)
        values = deg_inv_sqrt_user[row] * deg_inv_sqrt_item[col]

        # Create sparse tensor
        indices = torch.stack([row, col], dim=0)
        shape = (self.n_users, self.n_items + 1)

        # We use coalesce to ensure indices are sorted and unique
        R_adj = torch.sparse_coo_tensor(indices, values, shape).coalesce()
        return R_adj

    def _sparse_mm(self, sparse_mat: Any, dense_mat: Tensor) -> Tensor:
        """Wrapper to handle both torch.Tensor (sparse) and torch_sparse.SparseTensor
        matrix multiplication.
        """
        if isinstance(sparse_mat, Tensor):
            # Standard PyTorch sparse tensor
            return torch.sparse.mm(sparse_mat, dense_mat)
        if isinstance(sparse_mat, SparseTensor):
            # torch_sparse.SparseTensor
            return sparse_mat.matmul(dense_mat)
        raise TypeError(f"Unsupported sparse matrix type: {type(sparse_mat)}")

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, pos_item, neg_item = batch

        # Get propagated embeddings
        user_all_embeddings, item_all_embeddings = self.forward()

        # Get embeddings for current batch users and items
        u_embeddings = user_all_embeddings[user]
        pos_embeddings = item_all_embeddings[pos_item]
        neg_embeddings = item_all_embeddings[neg_item]

        # Calculate BPR loss
        pos_scores = torch.mul(u_embeddings, pos_embeddings).sum(dim=1)
        neg_scores = torch.mul(u_embeddings, neg_embeddings).sum(dim=1)
        bpr_loss = self.bpr_loss(pos_scores, neg_scores)

        # Calculate L2 loss
        reg_loss = self.reg_weight * self.reg_loss(
            self.item_embedding(pos_item),
            self.item_embedding(neg_item),
        )

        # Calculate InfoNCE loss
        ssl_user_loss = self.nce_loss(
            u_embeddings, u_embeddings
        )  # User-User Uniformity (L_user)
        ssl_pos_loss = self.nce_loss(
            pos_embeddings, pos_embeddings
        )  # Item-Item Uniformity (L_item)
        ssl_inter_loss = self.nce_loss(
            u_embeddings, pos_embeddings
        )  # User-Item Alignment (L_inter)
        ssl_loss = self.ssl_lambda * (ssl_user_loss + ssl_pos_loss + ssl_inter_loss)

        # Loss logging
        loss = bpr_loss + reg_loss + ssl_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(self) -> Tuple[Tensor, Tensor]:
        """Forward pass of EGCF.

        Generates user and item embeddings based on the selected mode.

        Returns:
            Tuple[Tensor, Tensor]: The final user and item embeddings.
        """
        # Ensure R_adj is on the correct device
        if self.R_adj.device != self.device:
            self.R_adj = self.R_adj.to(self.device)

        if self.mode == "parallel":
            return self._forward_parallel()
        return self._forward_alternating()

    def _forward_alternating(self) -> Tuple[Tensor, Tensor]:
        """Alternating Iteration.

        Propagates information back and forth between users and items using R and R.T.
        """
        # Initial Item Embedding
        item_emb = self.item_embedding.weight

        all_user_embeddings = []
        all_item_embeddings = []

        current_item_emb = item_emb

        for _ in range(self.n_layers):
            # User = Tanh(R * Item)
            user_emb = self.activation(self._sparse_mm(self.R_adj, current_item_emb))

            # Item = Tanh(R.T * User)
            item_emb_next = self.activation(self._sparse_mm(self.R_adj.t(), user_emb))

            all_user_embeddings.append(user_emb)
            all_item_embeddings.append(item_emb_next)

            current_item_emb = item_emb_next

        # Sum pooling
        final_user_embeddings = torch.stack(all_user_embeddings, dim=1).sum(dim=1)
        final_item_embeddings = torch.stack(all_item_embeddings, dim=1).sum(dim=1)

        return final_user_embeddings, final_item_embeddings

    def _forward_parallel(self) -> Tuple[Tensor, Tensor]:
        """Parallel Iteration.

        Performs the following steps:
            1. Construct initial User embedding from Items.
            2. Concatenate and propagate using standard bipartite graph.
        """
        # Ensure bipartite_adj is on the correct device
        if self.bipartite_adj.device() != self.device:
            self.bipartite_adj = self.bipartite_adj.to(self.device)

        item_emb_0 = self.item_embedding.weight

        # Construct initial user embedding: e_u^(0) = R * e_i^(0)
        user_emb_0 = self._sparse_mm(self.R_adj, item_emb_0)

        # Concatenate [User, Item]
        all_embedding = torch.cat([user_emb_0, item_emb_0], dim=0)

        embeddings_list = []

        # Propagate using Bipartite Graph
        current_embedding = all_embedding
        for _ in range(self.n_layers):
            # A * E
            current_embedding = self._sparse_mm(self.bipartite_adj, current_embedding)
            # Activation
            current_embedding = self.activation(current_embedding)
            embeddings_list.append(current_embedding)

        # Sum pooling
        final_embeddings = torch.stack(embeddings_list, dim=1).sum(dim=1)

        # Split back to User and Item
        user_all_embeddings, item_all_embeddings = torch.split(
            final_embeddings, [self.n_users, self.n_items + 1]
        )

        return user_all_embeddings, item_all_embeddings

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Retrieve all user and item embeddings from the propagation network
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        # Get the embeddings for the specific users in the batch
        user_embeddings = user_all_embeddings[
            user_indices
        ]  # [batch_size, embedding_size]

        if item_indices is None:
            # Case 'full': prediction on all items
            item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
            einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
        else:
            # Case 'sampled': prediction on a sampled set of items
            item_embeddings = item_all_embeddings[
                item_indices
            ]  # [batch_size, pad_seq, embedding_size]
            einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

        predictions = torch.einsum(
            einsum_string, user_embeddings, item_embeddings
        )  # [batch_size, n_items] or [batch_size, pad_seq]
        return predictions

forward()

Forward pass of EGCF.

Generates user and item embeddings based on the selected mode.

Returns:

Type Description
Tuple[Tensor, Tensor]

Tuple[Tensor, Tensor]: The final user and item embeddings.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/egcf.py
def forward(self) -> Tuple[Tensor, Tensor]:
    """Forward pass of EGCF.

    Generates user and item embeddings based on the selected mode.

    Returns:
        Tuple[Tensor, Tensor]: The final user and item embeddings.
    """
    # Ensure R_adj is on the correct device
    if self.R_adj.device != self.device:
        self.R_adj = self.R_adj.to(self.device)

    if self.mode == "parallel":
        return self._forward_parallel()
    return self._forward_alternating()

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/egcf.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Retrieve all user and item embeddings from the propagation network
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    # Get the embeddings for the specific users in the batch
    user_embeddings = user_all_embeddings[
        user_indices
    ]  # [batch_size, embedding_size]

    if item_indices is None:
        # Case 'full': prediction on all items
        item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
        einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
    else:
        # Case 'sampled': prediction on a sampled set of items
        item_embeddings = item_all_embeddings[
            item_indices
        ]  # [batch_size, pad_seq, embedding_size]
        einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

    predictions = torch.einsum(
        einsum_string, user_embeddings, item_embeddings
    )  # [batch_size, n_items] or [batch_size, pad_seq]
    return predictions

warprec.recommenders.collaborative_filtering_recommender.graph_based.esigcf.ESIGCF

Bases: IterativeRecommender, GraphRecommenderUtils

Implementation of ESIGCF algorithm from Extremely Simplified but Intent-enhanced Graph Collaborative Filtering.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of items.

n_layers int

The number of propagation layers.

ssl_lambda float

Weight for SSL Intent Loss (User-Positive).

can_lambda float

Weight for Candidate Loss (Positive-Generated).

temperature float

Temperature for InfoNCE loss.

reg_weight float

The L2 regularization weight.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/esigcf.py
@model_registry.register(name="ESIGCF")
class ESIGCF(IterativeRecommender, GraphRecommenderUtils):
    """Implementation of ESIGCF algorithm from
        Extremely Simplified but Intent-enhanced Graph Collaborative Filtering.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of items.
        n_layers (int): The number of propagation layers.
        ssl_lambda (float): Weight for SSL Intent Loss (User-Positive).
        can_lambda (float): Weight for Candidate Loss (Positive-Generated).
        temperature (float): Temperature for InfoNCE loss.
        reg_weight (float): The L2 regularization weight.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Model hyperparameters
    embedding_size: int
    n_layers: int
    ssl_lambda: float
    can_lambda: float
    temperature: float
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # Activation functions
        self.activation_layer = nn.Tanh()
        self.gen_activation = nn.LeakyReLU()

        # Initialize weights
        self.apply(self._init_weights)

        # Initialize Losses
        self.bpr_loss_func = BPRLoss()
        self.reg_loss_func = EmbLoss()
        self.nce_loss = InfoNCELoss(temperature=self.temperature)

        # Precompute Adjacency Matrices with Hybrid Normalization
        self._init_graphs(interactions)

    def _init_graphs(self, interactions: Interactions):
        """Constructs the specific adjacency matrices required for JoGCN with Hybrid Normalization.

        Hybrid Norm: Val = 1/Du + 1/sqrt(Du*Di)

        Registers two buffers:
            1. user_graph (R): [n_users, n_items] - For initial user embedding generation.
            2. full_graph (A): [n_users + n_items, n_users + n_items] - For propagation.
        """
        # Get Interaction Matrix (User x Item)
        R = interactions.get_sparse().tocoo()
        row = R.row
        col = R.col

        # Calculate Degrees
        user_degree = np.array(R.sum(axis=1)).squeeze()
        item_degree = np.array(R.sum(axis=0)).squeeze()

        # Handle division by zero
        user_degree[user_degree == 0] = 1.0
        item_degree[item_degree == 0] = 1.0

        # Calculate Hybrid Values
        # Norm = (1 / |Nu|) + (1 / sqrt(|Nu||Ni|))
        d_u_inv = 1.0 / user_degree
        d_u_sqrt = np.sqrt(user_degree)
        d_i_sqrt = np.sqrt(item_degree)

        # Values for edges (u, i)
        val_mean = d_u_inv[row]
        val_sym = 1.0 / (d_u_sqrt[row] * d_i_sqrt[col])
        val_hybrid = val_mean + val_sym

        # Construct user_graph (R)
        indices = np.vstack((row, col))
        i = torch.LongTensor(indices)
        v = torch.FloatTensor(val_hybrid)
        shape = (self.n_users, self.n_items + 1)  # +1 for padding consistency

        user_graph = torch.sparse_coo_tensor(i, v, shape).coalesce()
        self.register_buffer("user_graph", user_graph)

        # Construct full_graph (A)
        # Top-Right (User -> Item): Hybrid Norm
        val_tr = val_hybrid

        # Bottom-Left (Item -> User): Hybrid Norm
        # Mean part: 1/Di
        # Sym part: 1/sqrt(Di*Du)
        d_i_inv = 1.0 / item_degree
        val_bl_mean = d_i_inv[col]
        val_bl_sym = 1.0 / (d_i_sqrt[col] * d_u_sqrt[row])
        val_bl = val_bl_mean + val_bl_sym

        # Concatenate for full matrix
        # Rows: [row, col + n_users]
        # Cols: [col + n_users, row]
        full_row = np.concatenate([row, col + self.n_users])
        full_col = np.concatenate([col + self.n_users, row])
        full_val = np.concatenate([val_tr, val_bl])

        indices_full = np.vstack((full_row, full_col))
        i_full = torch.LongTensor(indices_full)
        v_full = torch.FloatTensor(full_val)

        # Size: Users + Items + 1 (padding)
        total_nodes = self.n_users + self.n_items + 1
        shape_full = (total_nodes, total_nodes)

        full_graph = torch.sparse_coo_tensor(i_full, v_full, shape_full).coalesce()
        self.register_buffer("full_graph", full_graph)

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, pos_item, neg_item = batch

        # Get propagated embeddings
        all_user_embeddings, all_item_embeddings = self.forward()

        # Get embeddings for current batch users and items
        u_emb = all_user_embeddings[user]
        pos_emb = all_item_embeddings[pos_item]
        neg_emb = all_item_embeddings[neg_item]

        # Get initial embeddings for regularization
        ego_pos_emb = self.item_embedding(pos_item)
        ego_neg_emb = self.item_embedding(neg_item)

        # Generate "Intent-aware" Negative Item
        # Generated Negative = LeakyReLU(Pos * Neg)
        can_neg_emb = self.gen_activation(pos_emb * neg_emb)

        # Calculate BPR loss
        pos_scores = torch.sum(u_emb * pos_emb, dim=1)
        neg_scores = torch.sum(u_emb * neg_emb, dim=1)
        bpr_loss = self.bpr_loss_func(pos_scores, neg_scores)

        # Calculate L2 loss
        reg_loss = self.reg_weight * self.reg_loss_func(ego_pos_emb, ego_neg_emb)

        # SSL Intent Loss (User - Positive Item)
        # Align user and positive item
        ssl_intent_loss = self.nce_loss(u_emb, pos_emb)
        ssl_loss = self.ssl_lambda * ssl_intent_loss

        # Candidate/Item Loss (Positive Item - Generated Negative)
        # Align Positive Item with Generated Negative (Intent View)
        can_item_loss = self.nce_loss(pos_emb, can_neg_emb)
        can_loss = self.can_lambda * can_item_loss

        # Loss logging
        loss = bpr_loss + reg_loss + ssl_loss + can_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(self) -> Tuple[Tensor, Tensor]:
        """Forward pass of ESIGCF.

        Steps performed:
            1. Construct initial user embeddings from item embeddings using user_graph.
            2. Concatenate User and Item embeddings.
            3. Propagate through Full Graph with Tanh activation.
            4. Sum representations from all layers.
        """
        item_embedding = self.item_embedding.weight

        # Initial User Embedding = Activation(R * Item_Emb)
        user_embedding_0 = self.activation_layer(
            torch.sparse.mm(self.user_graph, item_embedding)  # [n_users, n_items+1]
        )

        # Concatenate for propagation: [n_users + n_items + 1, dim]
        all_embedding = torch.cat([user_embedding_0, item_embedding], dim=0)

        all_embeddings_list = [all_embedding]

        # Iterative Propagation
        for _ in range(self.n_layers):
            all_embedding = torch.sparse.mm(self.full_graph, all_embedding)
            all_embedding = self.activation_layer(all_embedding)
            all_embeddings_list.append(all_embedding)

        # Aggregation: Summation
        final_all_embeddings = torch.stack(all_embeddings_list, dim=1).sum(dim=1)

        # Split back to User and Item
        users_emb, items_emb = torch.split(
            final_all_embeddings, [self.n_users, self.n_items + 1]
        )

        return users_emb, items_emb

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Retrieve all user and item embeddings from the propagation network
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        # Get the embeddings for the specific users in the batch
        user_embeddings = user_all_embeddings[
            user_indices
        ]  # [batch_size, embedding_size]

        if item_indices is None:
            # Case 'full': prediction on all items
            item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
            einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
        else:
            # Case 'sampled': prediction on a sampled set of items
            item_embeddings = item_all_embeddings[
                item_indices
            ]  # [batch_size, pad_seq, embedding_size]
            einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

        predictions = torch.einsum(
            einsum_string, user_embeddings, item_embeddings
        )  # [batch_size, n_items] or [batch_size, pad_seq]
        return predictions

forward()

Forward pass of ESIGCF.

Steps performed
  1. Construct initial user embeddings from item embeddings using user_graph.
  2. Concatenate User and Item embeddings.
  3. Propagate through Full Graph with Tanh activation.
  4. Sum representations from all layers.
Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/esigcf.py
def forward(self) -> Tuple[Tensor, Tensor]:
    """Forward pass of ESIGCF.

    Steps performed:
        1. Construct initial user embeddings from item embeddings using user_graph.
        2. Concatenate User and Item embeddings.
        3. Propagate through Full Graph with Tanh activation.
        4. Sum representations from all layers.
    """
    item_embedding = self.item_embedding.weight

    # Initial User Embedding = Activation(R * Item_Emb)
    user_embedding_0 = self.activation_layer(
        torch.sparse.mm(self.user_graph, item_embedding)  # [n_users, n_items+1]
    )

    # Concatenate for propagation: [n_users + n_items + 1, dim]
    all_embedding = torch.cat([user_embedding_0, item_embedding], dim=0)

    all_embeddings_list = [all_embedding]

    # Iterative Propagation
    for _ in range(self.n_layers):
        all_embedding = torch.sparse.mm(self.full_graph, all_embedding)
        all_embedding = self.activation_layer(all_embedding)
        all_embeddings_list.append(all_embedding)

    # Aggregation: Summation
    final_all_embeddings = torch.stack(all_embeddings_list, dim=1).sum(dim=1)

    # Split back to User and Item
    users_emb, items_emb = torch.split(
        final_all_embeddings, [self.n_users, self.n_items + 1]
    )

    return users_emb, items_emb

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/esigcf.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Retrieve all user and item embeddings from the propagation network
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    # Get the embeddings for the specific users in the batch
    user_embeddings = user_all_embeddings[
        user_indices
    ]  # [batch_size, embedding_size]

    if item_indices is None:
        # Case 'full': prediction on all items
        item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
        einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
    else:
        # Case 'sampled': prediction on a sampled set of items
        item_embeddings = item_all_embeddings[
            item_indices
        ]  # [batch_size, pad_seq, embedding_size]
        einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

    predictions = torch.einsum(
        einsum_string, user_embeddings, item_embeddings
    )  # [batch_size, n_items] or [batch_size, pad_seq]
    return predictions

warprec.recommenders.collaborative_filtering_recommender.graph_based.gcmc.GCMC

Bases: IterativeRecommender, GraphRecommenderUtils

Implementation of GCMC algorithm from Graph Convolutional Matrix Completion (KDD 2018).

This model is a graph autoencoder for explicit feedback. It uses a graph convolutional encoder to learn user/item embeddings and a decoder to predict rating probabilities.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of user and item.

reg_weight float

The L2 regularization weight.

weight_decay float

The value of weight decay used in the optimizer.

batch_size int

The batch size used for training.

epochs int

The number of training epochs.

learning_rate float

The learning rate value.

Raises:

Type Description
ValueError

If the dataset does not have explicit rating.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/gcmc.py
@model_registry.register(name="GCMC")
class GCMC(IterativeRecommender, GraphRecommenderUtils):
    """Implementation of GCMC algorithm from
        Graph Convolutional Matrix Completion (KDD 2018).

    This model is a graph autoencoder for explicit feedback. It uses a graph
    convolutional encoder to learn user/item embeddings and a decoder to
    predict rating probabilities.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of user and item.
        reg_weight (float): The L2 regularization weight.
        weight_decay (float): The value of weight decay used in the optimizer.
        batch_size (int): The batch size used for training.
        epochs (int): The number of training epochs.
        learning_rate (float): The learning rate value.

    Raises:
        ValueError: If the dataset does not have explicit rating.
    """

    # Dataloader definition for explicit feedback
    DATALOADER_TYPE = DataLoaderType.ITEM_RATING_LOADER

    # Model hyperparameters
    embedding_size: int
    reg_weight: float
    weight_decay: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # Check for optional value of block size
        self.block_size = kwargs.get("block_size", 50)

        # Determine the unique ratings in the dataset
        unique_ratings = interactions.get_unique_ratings()
        self.n_ratings = len(unique_ratings)
        if self.n_ratings == 0:
            raise ValueError(
                "GCMC model requires explicit feedback with at least one rating value."
            )

        # Map rating values to class indices for loss computation
        classes_tensor = torch.tensor(unique_ratings, dtype=torch.float32)
        self.register_buffer("classes_tensor", classes_tensor)

        # Initial node features (embeddings)
        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # Encoder and decoder modules
        self.encoder = GCMCEncoderLayer(
            self.embedding_size, self.embedding_size, self.n_ratings
        )
        self.decoder = nn.Linear(2 * self.embedding_size, self.n_ratings)

        # Create adjacency matrices for each rating
        self.adj_tensors = []
        for rating_value in unique_ratings:
            # Retrieve the adjacency matrix for this rating
            matrix = interactions.get_sparse_by_rating(rating_value).tocoo()

            # Extract row and column indices
            row = torch.from_numpy(matrix.row).long()
            col = torch.from_numpy(matrix.col).long()

            # Create rectangular SparseTensor (Bipartite graph)
            adj_tensor = SparseTensor(
                row=row, col=col, sparse_sizes=(self.n_users, self.n_items + 1)
            )

            self.adj_tensors.append(adj_tensor)

        self.apply(self._init_weights)
        self.ce_loss = nn.CrossEntropyLoss()
        self.reg_loss = EmbLoss()

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_pointwise_dataloader(
            neg_samples=0,
            batch_size=self.batch_size,
            **kwargs,
        )

    def propagate_embeddings(self) -> Tuple[Tensor, Tensor]:
        """Performs the graph convolution to get final node embeddings."""
        user_feat = self.user_embedding.weight
        item_feat = self.item_embedding.weight

        # Move adjacency tensors to the same device as features
        device = user_feat.device
        if self.adj_tensors[0].device() != device:
            self.adj_tensors = [adj.to(device) for adj in self.adj_tensors]

        user_embed, item_embed = self.encoder(user_feat, item_feat, self.adj_tensors)

        return F.relu(user_embed), F.relu(item_embed)

    def training_step(self, batch: Any, batch_idx: int):
        user, item, rating = batch

        predictions = self.forward(user, item)

        # Find the closest class index for each true rating in a vectorized way
        diff = torch.abs(
            rating.unsqueeze(1) - self.classes_tensor.unsqueeze(0)  # type: ignore[operator]
        )  # [batch_size, num_ratings]

        # Calculate CE loss
        _, target_classes = torch.min(diff, dim=1)  # [batch_size]
        ce_loss = self.ce_loss(predictions, target_classes)

        # Calculate L2 regularization
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user),
            self.item_embedding(item),
        )

        # Loss logging
        loss = ce_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(self, user: Tensor, item: Tensor) -> Tensor:
        """Forward pass for GCMC. Computes rating logits for given user-item pairs.

        Args:
            user (Tensor): The tensor containing the user indexes.
            item (Tensor): The tensor containing the item indexes.

        Returns:
            Tensor: The raw logits for each rating class for each pair.
        """
        # Get the final, propagated embeddings for all users and items
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        # Select embeddings for the current batch
        user_e = user_all_embeddings[user]
        item_e = item_all_embeddings[item]

        # Concatenate user and item embeddings
        combined_e = torch.cat([user_e, item_e], dim=1)

        # Pass through the decoder to get rating logits
        logits = self.decoder(combined_e)
        return logits

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Returns the *expected rating* for each user-item pair, used for ranking.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item} containing expected ratings.
        """
        # Perform graph propagation once to get all node embeddings
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        # Deconstruct the decoder's linear layer for efficient calculation
        w = self.decoder.weight  # [num_ratings, 2*emb]
        b = self.decoder.bias  # [num_ratings]

        # Split weights into user and item parts
        w_user, w_item = torch.split(
            w, self.embedding_size, dim=1
        )  # [num_ratings, embedding_size]

        # Pre-compute partial scores for users in this batch
        batch_user_part = F.linear(
            user_all_embeddings[user_indices], w_user
        )  # [batch_size, num_ratings]

        if item_indices is None:
            # Case 'full': iterate through all items in memory-safe blocks
            valid_items_emb = item_all_embeddings[:-1]

            # Pre-compute the item-dependent part for ALL valid items
            all_items_part = F.linear(valid_items_emb, w_item)  # [n_items, num_ratings]

            all_scores = []
            n_items = valid_items_emb.size(0)

            # Add bias to user part once to avoid adding it in the loop
            batch_user_part_with_bias = batch_user_part + b

            for start in range(0, n_items, self.block_size):
                end = min(start + self.block_size, n_items)

                # Slice pre-computed item parts
                item_part_block = all_items_part[start:end]  # [block, num_ratings]

                # Use broadcasting to efficiently compute logits for the block
                logits_block = batch_user_part_with_bias.unsqueeze(
                    1
                ) + item_part_block.unsqueeze(0)  # [batch_size, block, num_ratings]

                probs_block = F.softmax(logits_block, dim=2)

                # Calculate expected rating: sum(probs * rating_values)
                expected_ratings = torch.einsum(
                    "bif,f->bi", probs_block, self.classes_tensor
                )  # [batch_size, block]
                all_scores.append(expected_ratings)

            predictions = torch.cat(all_scores, dim=1)  # [batch_size, n_items]
            return predictions

        # Case 'sampled': process all given item_indices at once
        batch_item_emb = item_all_embeddings[
            item_indices
        ]  # [batch_size, pad_seq, embedding_size]

        # Compute item part for sampled items
        # [B, S, E] @ [E, R] -> [B, S, R]
        # B=batch_size, S=pad_seq, E=embedding_size, R=num_ratings
        batch_item_part = torch.matmul(batch_item_emb, w_item.t())

        # Sum parts and bias to get logits
        # [B, 1, R] + [B, S, R] + [R] -> [B, S, R]
        logits = batch_user_part.unsqueeze(1) + batch_item_part + b

        probs = F.softmax(logits, dim=2)

        # Calculate expected rating
        # [B, S, R] * [R] -> [B, S]
        predictions = torch.einsum("bif,f->bi", probs, self.classes_tensor)
        return predictions

forward(user, item)

Forward pass for GCMC. Computes rating logits for given user-item pairs.

Parameters:

Name Type Description Default
user Tensor

The tensor containing the user indexes.

required
item Tensor

The tensor containing the item indexes.

required

Returns:

Name Type Description
Tensor Tensor

The raw logits for each rating class for each pair.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/gcmc.py
def forward(self, user: Tensor, item: Tensor) -> Tensor:
    """Forward pass for GCMC. Computes rating logits for given user-item pairs.

    Args:
        user (Tensor): The tensor containing the user indexes.
        item (Tensor): The tensor containing the item indexes.

    Returns:
        Tensor: The raw logits for each rating class for each pair.
    """
    # Get the final, propagated embeddings for all users and items
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    # Select embeddings for the current batch
    user_e = user_all_embeddings[user]
    item_e = item_all_embeddings[item]

    # Concatenate user and item embeddings
    combined_e = torch.cat([user_e, item_e], dim=1)

    # Pass through the decoder to get rating logits
    logits = self.decoder(combined_e)
    return logits

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Returns the expected rating for each user-item pair, used for ranking.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item} containing expected ratings.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/gcmc.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Returns the *expected rating* for each user-item pair, used for ranking.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item} containing expected ratings.
    """
    # Perform graph propagation once to get all node embeddings
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    # Deconstruct the decoder's linear layer for efficient calculation
    w = self.decoder.weight  # [num_ratings, 2*emb]
    b = self.decoder.bias  # [num_ratings]

    # Split weights into user and item parts
    w_user, w_item = torch.split(
        w, self.embedding_size, dim=1
    )  # [num_ratings, embedding_size]

    # Pre-compute partial scores for users in this batch
    batch_user_part = F.linear(
        user_all_embeddings[user_indices], w_user
    )  # [batch_size, num_ratings]

    if item_indices is None:
        # Case 'full': iterate through all items in memory-safe blocks
        valid_items_emb = item_all_embeddings[:-1]

        # Pre-compute the item-dependent part for ALL valid items
        all_items_part = F.linear(valid_items_emb, w_item)  # [n_items, num_ratings]

        all_scores = []
        n_items = valid_items_emb.size(0)

        # Add bias to user part once to avoid adding it in the loop
        batch_user_part_with_bias = batch_user_part + b

        for start in range(0, n_items, self.block_size):
            end = min(start + self.block_size, n_items)

            # Slice pre-computed item parts
            item_part_block = all_items_part[start:end]  # [block, num_ratings]

            # Use broadcasting to efficiently compute logits for the block
            logits_block = batch_user_part_with_bias.unsqueeze(
                1
            ) + item_part_block.unsqueeze(0)  # [batch_size, block, num_ratings]

            probs_block = F.softmax(logits_block, dim=2)

            # Calculate expected rating: sum(probs * rating_values)
            expected_ratings = torch.einsum(
                "bif,f->bi", probs_block, self.classes_tensor
            )  # [batch_size, block]
            all_scores.append(expected_ratings)

        predictions = torch.cat(all_scores, dim=1)  # [batch_size, n_items]
        return predictions

    # Case 'sampled': process all given item_indices at once
    batch_item_emb = item_all_embeddings[
        item_indices
    ]  # [batch_size, pad_seq, embedding_size]

    # Compute item part for sampled items
    # [B, S, E] @ [E, R] -> [B, S, R]
    # B=batch_size, S=pad_seq, E=embedding_size, R=num_ratings
    batch_item_part = torch.matmul(batch_item_emb, w_item.t())

    # Sum parts and bias to get logits
    # [B, 1, R] + [B, S, R] + [R] -> [B, S, R]
    logits = batch_user_part.unsqueeze(1) + batch_item_part + b

    probs = F.softmax(logits, dim=2)

    # Calculate expected rating
    # [B, S, R] * [R] -> [B, S]
    predictions = torch.einsum("bif,f->bi", probs, self.classes_tensor)
    return predictions

propagate_embeddings()

Performs the graph convolution to get final node embeddings.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/gcmc.py
def propagate_embeddings(self) -> Tuple[Tensor, Tensor]:
    """Performs the graph convolution to get final node embeddings."""
    user_feat = self.user_embedding.weight
    item_feat = self.item_embedding.weight

    # Move adjacency tensors to the same device as features
    device = user_feat.device
    if self.adj_tensors[0].device() != device:
        self.adj_tensors = [adj.to(device) for adj in self.adj_tensors]

    user_embed, item_embed = self.encoder(user_feat, item_feat, self.adj_tensors)

    return F.relu(user_embed), F.relu(item_embed)

warprec.recommenders.collaborative_filtering_recommender.graph_based.lightccf.LightCCF

Bases: IterativeRecommender, GraphRecommenderUtils

Implementation of LightCCF algorithm from "Unveiling Contrastive Learning’s Capability of Neighborhood Aggregation for Collaborative Filtering" (SIGIR 2025).

LightCCF introduces a Neighborhood Aggregation (NA) loss that brings users closer to all their interacted items while pushing them away from other positive pairs. It can operate with a simple Base Encoder (MF) or a GCN Encoder.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of user and item.

n_layers int

The number of graph convolution layers. If 0, uses Base Encoder (MF).

alpha float

The weight for the Neighborhood Aggregation.

temperature float

The temperature coefficient for InfoNCE.

reg_weight float

The L2 regularization weight.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/lightccf.py
@model_registry.register(name="LightCCF")
class LightCCF(IterativeRecommender, GraphRecommenderUtils):
    """Implementation of LightCCF algorithm from
    "Unveiling Contrastive Learning’s Capability of Neighborhood Aggregation for Collaborative Filtering" (SIGIR 2025).

    LightCCF introduces a Neighborhood Aggregation (NA) loss that brings users closer to
    all their interacted items while pushing them away from other positive pairs.
    It can operate with a simple Base Encoder (MF) or a GCN Encoder.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of user and item.
        n_layers (int): The number of graph convolution layers. If 0, uses Base Encoder (MF).
        alpha (float): The weight for the Neighborhood Aggregation.
        temperature (float): The temperature coefficient for InfoNCE.
        reg_weight (float): The L2 regularization weight.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Model hyperparameters
    embedding_size: int
    n_layers: int
    alpha: float
    temperature: float
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # Graph Initialization (Only if n_layers > 0)
        if self.n_layers > 0:
            self.adj = self.get_adj_mat(
                interactions.get_sparse().tocoo(),
                self.n_users,
                self.n_items + 1,  # Adjust for padding idx
            )
            self.propagation_network = nn.ModuleList(
                [LGConv() for _ in range(self.n_layers)]
            )
        else:
            self.adj = None
            self.propagation_network = None

        # Initialize weights
        self.apply(self._init_weights)

        # Initialize Losses
        self.bpr_loss = BPRLoss()
        self.reg_loss = EmbLoss()
        self.nce_loss = InfoNCELoss(temperature=self.temperature)

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, pos_item, neg_item = batch

        # Get propagated embeddings
        user_all_embeddings, item_all_embeddings = self.forward()

        # Get embeddings for current batch users and items
        u_embeddings = user_all_embeddings[user]
        pos_embeddings = item_all_embeddings[pos_item]
        neg_embeddings = item_all_embeddings[neg_item]

        # Calculate BPR loss
        pos_scores = torch.mul(u_embeddings, pos_embeddings).sum(dim=1)
        neg_scores = torch.mul(u_embeddings, neg_embeddings).sum(dim=1)
        bpr_loss = self.bpr_loss(pos_scores, neg_scores)

        # Calculate InfoNCE loss
        combined_view = torch.cat([pos_embeddings, u_embeddings], dim=0)
        na_loss = self.alpha * self.nce_loss(u_embeddings, combined_view)

        # Calculate L2 loss
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user),
            self.item_embedding(pos_item),
            self.item_embedding(neg_item),
        )

        # Loss logging
        loss = bpr_loss + reg_loss + na_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(self) -> Tuple[Tensor, Tensor]:
        """Forward pass.

        If n_layers > 0, performs LightGCN-style aggregation.
        If n_layers == 0, returns raw embeddings (MF encoder).

        Returns:
            Tuple[Tensor, Tensor]: User and Item embeddings.
        """
        # Base embeddings
        ego_user_emb = self.user_embedding.weight
        ego_item_emb = self.item_embedding.weight

        # Graph propagation (if enabled)
        if self.n_layers > 0:
            all_embeddings = torch.cat([ego_user_emb, ego_item_emb], dim=0)

            if self.adj.device() != all_embeddings.device:
                self.adj = self.adj.to(all_embeddings.device)

            embeddings_list = [all_embeddings]
            current_embeddings = all_embeddings

            for conv_layer in self.propagation_network:
                current_embeddings = conv_layer(current_embeddings, self.adj)
                embeddings_list.append(current_embeddings)

            final_embeddings = torch.stack(embeddings_list, dim=1).mean(dim=1)

            user_final, item_final = torch.split(
                final_embeddings, [self.n_users, self.n_items + 1]
            )
            return user_final, item_final

        # Base Encoder (MF)
        return ego_user_emb, ego_item_emb

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Retrieve all user and item embeddings from the propagation network
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        # Get the embeddings for the specific users in the batch
        user_embeddings = user_all_embeddings[
            user_indices
        ]  # [batch_size, embedding_size]

        if item_indices is None:
            # Case 'full': prediction on all items
            item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
            einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
        else:
            # Case 'sampled': prediction on a sampled set of items
            item_embeddings = item_all_embeddings[
                item_indices
            ]  # [batch_size, pad_seq, embedding_size]
            einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

        predictions = torch.einsum(
            einsum_string, user_embeddings, item_embeddings
        )  # [batch_size, n_items] or [batch_size, pad_seq]
        return predictions

forward()

Forward pass.

If n_layers > 0, performs LightGCN-style aggregation. If n_layers == 0, returns raw embeddings (MF encoder).

Returns:

Type Description
Tuple[Tensor, Tensor]

Tuple[Tensor, Tensor]: User and Item embeddings.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/lightccf.py
def forward(self) -> Tuple[Tensor, Tensor]:
    """Forward pass.

    If n_layers > 0, performs LightGCN-style aggregation.
    If n_layers == 0, returns raw embeddings (MF encoder).

    Returns:
        Tuple[Tensor, Tensor]: User and Item embeddings.
    """
    # Base embeddings
    ego_user_emb = self.user_embedding.weight
    ego_item_emb = self.item_embedding.weight

    # Graph propagation (if enabled)
    if self.n_layers > 0:
        all_embeddings = torch.cat([ego_user_emb, ego_item_emb], dim=0)

        if self.adj.device() != all_embeddings.device:
            self.adj = self.adj.to(all_embeddings.device)

        embeddings_list = [all_embeddings]
        current_embeddings = all_embeddings

        for conv_layer in self.propagation_network:
            current_embeddings = conv_layer(current_embeddings, self.adj)
            embeddings_list.append(current_embeddings)

        final_embeddings = torch.stack(embeddings_list, dim=1).mean(dim=1)

        user_final, item_final = torch.split(
            final_embeddings, [self.n_users, self.n_items + 1]
        )
        return user_final, item_final

    # Base Encoder (MF)
    return ego_user_emb, ego_item_emb

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/lightccf.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Retrieve all user and item embeddings from the propagation network
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    # Get the embeddings for the specific users in the batch
    user_embeddings = user_all_embeddings[
        user_indices
    ]  # [batch_size, embedding_size]

    if item_indices is None:
        # Case 'full': prediction on all items
        item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
        einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
    else:
        # Case 'sampled': prediction on a sampled set of items
        item_embeddings = item_all_embeddings[
            item_indices
        ]  # [batch_size, pad_seq, embedding_size]
        einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

    predictions = torch.einsum(
        einsum_string, user_embeddings, item_embeddings
    )  # [batch_size, n_items] or [batch_size, pad_seq]
    return predictions

warprec.recommenders.collaborative_filtering_recommender.graph_based.lightgcl.LightGCL

Bases: IterativeRecommender, GraphRecommenderUtils

Implementation of LightGCL algorithm from "LightGCL: Simple Yet Effective Graph Contrastive Learning for Recommendation" (ICLR 2023).

LightGCL utilizes Singular Value Decomposition (SVD) to construct a global contrastive view, which is contrasted with the local graph view (GCN) to enhance representation learning.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of user and item.

n_layers int

The number of graph convolution layers.

q int

The rank for SVD approximation.

ssl_lambda float

Weight for contrastive loss.

temperature float

Temperature for InfoNCE.

dropout float

Dropout probability for the adjacency matrix.

reg_weight float

The L2 regularization weight.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/lightgcl.py
@model_registry.register(name="LightGCL")
class LightGCL(IterativeRecommender, GraphRecommenderUtils):
    """Implementation of LightGCL algorithm from
    "LightGCL: Simple Yet Effective Graph Contrastive Learning for Recommendation" (ICLR 2023).

    LightGCL utilizes Singular Value Decomposition (SVD) to construct a global
    contrastive view, which is contrasted with the local graph view (GCN) to
    enhance representation learning.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of user and item.
        n_layers (int): The number of graph convolution layers.
        q (int): The rank for SVD approximation.
        ssl_lambda (float): Weight for contrastive loss.
        temperature (float): Temperature for InfoNCE.
        dropout (float): Dropout probability for the adjacency matrix.
        reg_weight (float): The L2 regularization weight.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Model hyperparameters
    embedding_size: int
    n_layers: int
    q: int
    ssl_lambda: float
    temperature: float
    dropout: float
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # Graph Construction (Normalized Adjacency)
        self.adj = self.get_adj_mat(
            interactions.get_sparse().tocoo(),
            self.n_users,
            self.n_items + 1,  # Adjust for padding idx
            normalize=True,
        )

        # SVD Decomposition (Pre-computed)
        # We perform SVD on the normalized adjacency matrix
        self._perform_svd(interactions)

        # Initialize weights
        self.apply(self._init_weights)

        # Losses
        self.bpr_loss = BPRLoss()
        self.reg_loss = EmbLoss()
        self.info_nce_loss = InfoNCELoss(temperature=self.temperature)

    def _perform_svd(self, interactions: Interactions):
        """Performs Truncated SVD on the normalized adjacency matrix.

        Constructs:
            U_s = U * S
            V_s = V * S
            U_t = U^T
            V_t = V^T
        """

        # Get normalized adjacency matrix as scipy sparse matrix
        R = interactions.get_sparse().tocoo()
        n_nodes = self.n_users + self.n_items

        # Construct the bipartite adjacency matrix
        row = np.concatenate([R.row, R.col + self.n_users])
        col = np.concatenate([R.col + self.n_users, R.row])
        data = np.ones(len(row))

        adj = sp.coo_matrix((data, (row, col)), shape=(n_nodes, n_nodes))

        # Normalize: D^-0.5 * A * D^-0.5
        rowsum = np.array(adj.sum(1))
        d_inv_sqrt = np.power(rowsum, -0.5).flatten()
        d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.0
        d_mat_inv_sqrt = sp.diags(d_inv_sqrt)

        norm_adj = d_mat_inv_sqrt.dot(adj).dot(d_mat_inv_sqrt)

        # Perform SVD
        # U: [N, q], S: [q], Vt: [q, N]
        u, s, _ = sp.linalg.svds(norm_adj, k=self.q)

        # Handle negative strides from scipy svds
        u = u.copy()

        # U_mul_s = U * S
        u_mul_s = u @ np.diag(s)

        # Padding for the last item (padding idx)
        pad_row = np.zeros((1, self.q), dtype=u.dtype)
        u = np.vstack([u, pad_row])
        u_mul_s = np.vstack([u_mul_s, pad_row])

        # Convert to tensors and register buffers
        # These are dense matrices of shape [N_nodes, q]
        self.register_buffer("svd_u", torch.from_numpy(u).float().to(self.device))
        self.register_buffer(
            "svd_u_mul_s", torch.from_numpy(u_mul_s).float().to(self.device)
        )

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def _dropout_adj(self, adj: SparseTensor) -> SparseTensor:
        """Applies edge dropout to the torch_sparse.SparseTensor."""
        if self.training and self.dropout > 0:
            # Extract values from SparseTensor
            _, _, val = adj.coo()

            # Create dropout mask
            mask = torch.rand(val.size(0), device=val.device) > self.dropout

            # Apply mask and scale
            # We cast mask to float to perform multiplication
            val = val * mask.to(val.dtype) / (1.0 - self.dropout)

            # Return a new SparseTensor with updated values
            # set_value is efficient as it reuses the index storage
            return adj.set_value(val, layout="coo")

        return adj

    def forward(self) -> Tuple[Tensor, Tensor, List[Tensor], List[Tensor]]:
        """Forward pass computing both GCN and SVD views."""
        # Initial Embeddings
        ego_u = self.user_embedding.weight
        ego_i = self.item_embedding.weight
        ego_all = torch.cat([ego_u, ego_i], dim=0)

        # Lists to store layer embeddings
        gcn_embeddings = [ego_all]
        svd_embeddings = [ego_all]

        # Ensure adj is on device
        if self.adj.device() != self.device:
            self.adj = self.adj.to(self.device)

        # Apply dropout for GCN view
        adj_dropped = self._dropout_adj(self.adj)

        for _ in range(self.n_layers):
            # GCN Propagation
            # Z^(l) = A * Z^(l-1)
            # We use the previous layer's GCN embedding
            z_prev = gcn_embeddings[-1]
            z_next = adj_dropped.matmul(z_prev)
            gcn_embeddings.append(z_next)

            # Project to latent space: [q, N] @ [N, d] -> [q, d]
            latent = torch.mm(self.svd_u.t(), z_prev)  # type: ignore[operator]

            # Reconstruct: [N, q] @ [q, d] -> [N, d]
            g_next = torch.mm(self.svd_u_mul_s, latent)  # type: ignore[arg-type]

            svd_embeddings.append(g_next)

        # Sum pooling
        gcn_final = torch.stack(gcn_embeddings, dim=0).sum(dim=0)

        # Split for BPR compute
        user_gcn, item_gcn = torch.split(gcn_final, [self.n_users, self.n_items + 1])

        return user_gcn, item_gcn, gcn_embeddings, svd_embeddings

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, pos_item, neg_item = batch

        # Forward Pass
        user_gcn, item_gcn, gcn_list, svd_list = self.forward()

        # Calculate BPR loss (on GCN view)
        u_e = user_gcn[user]
        pos_e = item_gcn[pos_item]
        neg_e = item_gcn[neg_item]

        pos_scores = torch.mul(u_e, pos_e).sum(dim=1)
        neg_scores = torch.mul(u_e, neg_e).sum(dim=1)
        bpr_loss = self.bpr_loss(pos_scores, neg_scores)

        # Contrastive Learning (Layer-wise InfoNCE)
        cl_loss = 0.0
        for i in range(self.n_layers + 1):
            # Split current layer embeddings
            u_gcn_l, i_gcn_l = torch.split(
                gcn_list[i], [self.n_users, self.n_items + 1]
            )
            u_svd_l, i_svd_l = torch.split(
                svd_list[i], [self.n_users, self.n_items + 1]
            )

            # Users Contrast
            cl_loss += self.info_nce_loss(u_gcn_l[user], u_svd_l[user])

            # Items Contrast (Only positive items usually)
            cl_loss += self.info_nce_loss(i_gcn_l[pos_item], i_svd_l[pos_item])

        cl_loss = self.ssl_lambda * cl_loss

        # Calculate L2 loss
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user),
            self.item_embedding(pos_item),
            self.item_embedding(neg_item),
        )

        # Loss logging
        loss = bpr_loss + reg_loss + cl_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Retrieve all user and item embeddings from the propagation network
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        # Get the embeddings for the specific users in the batch
        user_embeddings = user_all_embeddings[
            user_indices
        ]  # [batch_size, embedding_size]

        if item_indices is None:
            # Case 'full': prediction on all items
            item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
            einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
        else:
            # Case 'sampled': prediction on a sampled set of items
            item_embeddings = item_all_embeddings[
                item_indices
            ]  # [batch_size, pad_seq, embedding_size]
            einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

        predictions = torch.einsum(
            einsum_string, user_embeddings, item_embeddings
        )  # [batch_size, n_items] or [batch_size, pad_seq]
        return predictions

forward()

Forward pass computing both GCN and SVD views.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/lightgcl.py
def forward(self) -> Tuple[Tensor, Tensor, List[Tensor], List[Tensor]]:
    """Forward pass computing both GCN and SVD views."""
    # Initial Embeddings
    ego_u = self.user_embedding.weight
    ego_i = self.item_embedding.weight
    ego_all = torch.cat([ego_u, ego_i], dim=0)

    # Lists to store layer embeddings
    gcn_embeddings = [ego_all]
    svd_embeddings = [ego_all]

    # Ensure adj is on device
    if self.adj.device() != self.device:
        self.adj = self.adj.to(self.device)

    # Apply dropout for GCN view
    adj_dropped = self._dropout_adj(self.adj)

    for _ in range(self.n_layers):
        # GCN Propagation
        # Z^(l) = A * Z^(l-1)
        # We use the previous layer's GCN embedding
        z_prev = gcn_embeddings[-1]
        z_next = adj_dropped.matmul(z_prev)
        gcn_embeddings.append(z_next)

        # Project to latent space: [q, N] @ [N, d] -> [q, d]
        latent = torch.mm(self.svd_u.t(), z_prev)  # type: ignore[operator]

        # Reconstruct: [N, q] @ [q, d] -> [N, d]
        g_next = torch.mm(self.svd_u_mul_s, latent)  # type: ignore[arg-type]

        svd_embeddings.append(g_next)

    # Sum pooling
    gcn_final = torch.stack(gcn_embeddings, dim=0).sum(dim=0)

    # Split for BPR compute
    user_gcn, item_gcn = torch.split(gcn_final, [self.n_users, self.n_items + 1])

    return user_gcn, item_gcn, gcn_embeddings, svd_embeddings

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/lightgcl.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Retrieve all user and item embeddings from the propagation network
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    # Get the embeddings for the specific users in the batch
    user_embeddings = user_all_embeddings[
        user_indices
    ]  # [batch_size, embedding_size]

    if item_indices is None:
        # Case 'full': prediction on all items
        item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
        einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
    else:
        # Case 'sampled': prediction on a sampled set of items
        item_embeddings = item_all_embeddings[
            item_indices
        ]  # [batch_size, pad_seq, embedding_size]
        einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

    predictions = torch.einsum(
        einsum_string, user_embeddings, item_embeddings
    )  # [batch_size, n_items] or [batch_size, pad_seq]
    return predictions

warprec.recommenders.collaborative_filtering_recommender.graph_based.lightgcn.LightGCN

Bases: GraphRecommenderUtils, IterativeRecommender

Implementation of LightGCN algorithm from LightGCN: Simplifying and Powering Graph Convolution Network for Recommendation (SIGIR 2020)

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of user and item.

n_layers int

The number of graph convolution layers.

reg_weight float

The L2 regularization weight.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/lightgcn.py
@model_registry.register(name="LightGCN")
class LightGCN(GraphRecommenderUtils, IterativeRecommender):
    """Implementation of LightGCN algorithm from
        LightGCN: Simplifying and Powering Graph Convolution Network for Recommendation (SIGIR 2020)

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of user and item.
        n_layers (int): The number of graph convolution layers.
        reg_weight (float): The L2 regularization weight.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Model hyperparameters
    embedding_size: int
    n_layers: int
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )
        self.adj = self.get_adj_mat(
            interactions.get_sparse().tocoo(),
            self.n_users,
            self.n_items + 1,  # Adjust for padding idx
        )

        # Initialization of the propagation network
        propagation_network_list = []
        for _ in range(self.n_layers):
            propagation_network_list.append((LGConv(), "x, edge_index -> x"))
        self.propagation_network = torch_geometric.nn.Sequential(
            "x, edge_index", propagation_network_list
        )

        # Init embedding weights
        self.apply(self._init_weights)
        self.bpr_loss = BPRLoss()
        self.reg_loss = EmbLoss()

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def training_step(self, batch: Any, batch_idx: int):
        user, pos_item, neg_item = batch

        # Get propagated embeddings
        user_all_embeddings, item_all_embeddings = self.forward()

        # Get embeddings for current batch users and items
        u_embeddings = user_all_embeddings[user]
        pos_embeddings = item_all_embeddings[pos_item]
        neg_embeddings = item_all_embeddings[neg_item]

        # Calculate BPR loss
        pos_scores = torch.mul(u_embeddings, pos_embeddings).sum(dim=1)
        neg_scores = torch.mul(u_embeddings, neg_embeddings).sum(dim=1)
        bpr_loss = self.bpr_loss(pos_scores, neg_scores)

        # Calculate L2 loss
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user),
            self.item_embedding(pos_item),
            self.item_embedding(neg_item),
        )

        # Loss logging
        loss = bpr_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(self) -> Tuple[Tensor, Tensor]:
        """Forward pass of the LightGCN model for embedding propagation.

        Returns:
            Tuple[Tensor, Tensor]: User and item embeddings after propagation.
        """
        ego_embeddings = self.get_ego_embeddings(
            self.user_embedding, self.item_embedding
        )

        # Ensure adjacency matrix is on the same device as embeddings
        if self.adj.device() != ego_embeddings.device:
            self.adj = self.adj.to(ego_embeddings.device)

        embeddings_list = [ego_embeddings]

        # This will handle the propagation layer by layer.
        # This is used later to correctly multiply each layer by
        # the corresponding value of alpha
        current_embeddings = ego_embeddings
        for layer_module in self.propagation_network.children():
            current_embeddings = layer_module(current_embeddings, self.adj)
            embeddings_list.append(current_embeddings)

        # Stack and average the embeddings (this is equivalent to multiplying by alpha and summing)
        stacked_embeddings = torch.stack(embeddings_list, dim=0)
        lightgcn_all_embeddings = torch.mean(stacked_embeddings, dim=0)

        # Split into user and item embeddings
        user_all_embeddings, item_all_embeddings = torch.split(
            lightgcn_all_embeddings,
            [self.n_users, self.n_items + 1],  # Adjust for padding idx
        )
        return user_all_embeddings, item_all_embeddings

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Retrieve all user and item embeddings from the propagation network
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        # Get the embeddings for the specific users in the batch
        user_embeddings = user_all_embeddings[
            user_indices
        ]  # [batch_size, embedding_size]

        if item_indices is None:
            # Case 'full': prediction on all items
            item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
            einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
        else:
            # Case 'sampled': prediction on a sampled set of items
            item_embeddings = item_all_embeddings[
                item_indices
            ]  # [batch_size, pad_seq, embedding_size]
            einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

        predictions = torch.einsum(
            einsum_string, user_embeddings, item_embeddings
        )  # [batch_size, n_items] or [batch_size, pad_seq]
        return predictions

forward()

Forward pass of the LightGCN model for embedding propagation.

Returns:

Type Description
Tuple[Tensor, Tensor]

Tuple[Tensor, Tensor]: User and item embeddings after propagation.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/lightgcn.py
def forward(self) -> Tuple[Tensor, Tensor]:
    """Forward pass of the LightGCN model for embedding propagation.

    Returns:
        Tuple[Tensor, Tensor]: User and item embeddings after propagation.
    """
    ego_embeddings = self.get_ego_embeddings(
        self.user_embedding, self.item_embedding
    )

    # Ensure adjacency matrix is on the same device as embeddings
    if self.adj.device() != ego_embeddings.device:
        self.adj = self.adj.to(ego_embeddings.device)

    embeddings_list = [ego_embeddings]

    # This will handle the propagation layer by layer.
    # This is used later to correctly multiply each layer by
    # the corresponding value of alpha
    current_embeddings = ego_embeddings
    for layer_module in self.propagation_network.children():
        current_embeddings = layer_module(current_embeddings, self.adj)
        embeddings_list.append(current_embeddings)

    # Stack and average the embeddings (this is equivalent to multiplying by alpha and summing)
    stacked_embeddings = torch.stack(embeddings_list, dim=0)
    lightgcn_all_embeddings = torch.mean(stacked_embeddings, dim=0)

    # Split into user and item embeddings
    user_all_embeddings, item_all_embeddings = torch.split(
        lightgcn_all_embeddings,
        [self.n_users, self.n_items + 1],  # Adjust for padding idx
    )
    return user_all_embeddings, item_all_embeddings

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/lightgcn.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Retrieve all user and item embeddings from the propagation network
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    # Get the embeddings for the specific users in the batch
    user_embeddings = user_all_embeddings[
        user_indices
    ]  # [batch_size, embedding_size]

    if item_indices is None:
        # Case 'full': prediction on all items
        item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
        einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
    else:
        # Case 'sampled': prediction on a sampled set of items
        item_embeddings = item_all_embeddings[
            item_indices
        ]  # [batch_size, pad_seq, embedding_size]
        einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

    predictions = torch.einsum(
        einsum_string, user_embeddings, item_embeddings
    )  # [batch_size, n_items] or [batch_size, pad_seq]
    return predictions

warprec.recommenders.collaborative_filtering_recommender.graph_based.lightgcnpp.LightGCNpp

Bases: IterativeRecommender, GraphRecommenderUtils

Implementation of LightGCN++ algorithm from Revisiting LightGCN: Unexpected Inflexibility, Inconsistency, and A Remedy Towards Improved Recommendation (RecSys 2024).

Parameters:

Name Type Description Default
params dict

Model parameters. Requires 'alpha', 'beta', 'gamma'.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of user and item.

n_layers int

The number of graph convolution layers.

alpha float

The exponent for the target node degree in the normalization coefficient.

beta float

The exponent for the source node degree in the normalization coefficient.

gamma float

The coefficient balancing the initial embeddings (\(E^0\)) and the aggregated graph embeddings (\(E_{mean}\)).

reg_weight float

The L2 regularization weight.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/lightgcnpp.py
@model_registry.register(name="LightGCNpp")
class LightGCNpp(IterativeRecommender, GraphRecommenderUtils):
    """Implementation of LightGCN++ algorithm from Revisiting
        LightGCN: Unexpected Inflexibility, Inconsistency, and
        A Remedy Towards Improved Recommendation (RecSys 2024).

    Args:
        params (dict): Model parameters. Requires 'alpha', 'beta', 'gamma'.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of user and item.
        n_layers (int): The number of graph convolution layers.
        alpha (float): The exponent for the target node degree in the normalization coefficient.
        beta (float): The exponent for the source node degree in the normalization coefficient.
        gamma (float): The coefficient balancing the initial embeddings ($E^0$) and the
            aggregated graph embeddings ($E_{mean}$).
        reg_weight (float): The L2 regularization weight.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Model hyperparameters
    embedding_size: int
    n_layers: int
    alpha: float
    beta: float
    gamma: float
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # LightGCN++ requires custom weights baked into the adjacency matrix,
        # so we must disable LGConv's internal normalization.
        propagation_network_list = []
        for _ in range(self.n_layers):
            propagation_network_list.append(
                (LGConv(normalize=False), "x, edge_index -> x")
            )
        self.propagation_network = torch_geometric.nn.Sequential(
            "x, edge_index", propagation_network_list
        )

        # Compute the adjacency matrix with the weighted version
        self.adj = self._get_weighted_adj_mat(
            interactions.get_sparse().tocoo(),
            self.n_users,
            self.n_items + 1,  # Adjust for padding idx
        )

        # Init embedding weights
        self.apply(self._init_weights)
        self.bpr_loss = BPRLoss()
        self.reg_loss = EmbLoss()

    def _get_weighted_adj_mat(
        self,
        interaction_matrix: coo_matrix,
        n_users: int,
        n_items: int,
    ) -> SparseTensor:
        """Computes the weighted adjacency matrix based on Alpha and Beta.

        Formula: E_ij = 1 / (deg_i^alpha * deg_j^beta)
        """
        # Extract user and items nodes from interactions
        user_nodes = interaction_matrix.row
        item_nodes = interaction_matrix.col + n_users

        # Efficient degree calculation using numpy
        user_degrees = np.array(interaction_matrix.sum(axis=1)).flatten()
        item_degrees = np.array(interaction_matrix.sum(axis=0)).flatten()

        # Handle zero degrees to avoid division by zero (though unlikely in cleaned data)
        user_degrees[user_degrees == 0] = 1
        item_degrees[item_degrees == 0] = 1

        # Get degrees corresponding to the edges
        d_u = user_degrees[interaction_matrix.row]
        d_i = item_degrees[interaction_matrix.col]

        # Weights for User -> Item edges (Target is Item, Source is User)
        # weight_u2i = 1 / (d_i^alpha * d_u^beta)
        norm_u2i = (d_i**-self.alpha) * (d_u**-self.beta)

        # Weights for Item -> User edges (Target is User, Source is Item)
        # weight_i2u = 1 / (d_u^alpha * d_i^beta)
        norm_i2u = (d_u**-self.alpha) * (d_i**-self.beta)

        # Concatenate rows, cols and values
        row = np.concatenate([user_nodes, item_nodes])
        col = np.concatenate([item_nodes, user_nodes])
        values = np.concatenate([norm_u2i, norm_i2u])

        # Create the edge tensor
        edge_index = torch.tensor(np.vstack([row, col]), dtype=torch.int64)
        edge_weights = torch.tensor(values, dtype=torch.float32)

        # Create SparseTensor with explicit values (weights)
        adj = SparseTensor(
            row=edge_index[0],
            col=edge_index[1],
            value=edge_weights,
            sparse_sizes=(n_users + n_items, n_users + n_items),
        )

        return adj

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def training_step(self, batch: Any, batch_idx: int):
        user, pos_item, neg_item = batch

        # Get propagated embeddings
        user_all_embeddings, item_all_embeddings = self.forward()

        # Get embeddings for current batch users and items
        u_embeddings = user_all_embeddings[user]
        pos_embeddings = item_all_embeddings[pos_item]
        neg_embeddings = item_all_embeddings[neg_item]

        # Calculate BPR Loss
        pos_scores = torch.mul(u_embeddings, pos_embeddings).sum(dim=1)
        neg_scores = torch.mul(u_embeddings, neg_embeddings).sum(dim=1)
        bpr_loss = self.bpr_loss(pos_scores, neg_scores)

        # Calculate L2 loss
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user),
            self.item_embedding(pos_item),
            self.item_embedding(neg_item),
        )

        # Loss logging
        loss = bpr_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(self) -> Tuple[Tensor, Tensor]:
        """Forward pass of LightGCN++ with custom pooling logic"""
        ego_embeddings = self.get_ego_embeddings(
            self.user_embedding, self.item_embedding
        )

        if self.adj.device() != ego_embeddings.device:
            self.adj = self.adj.to(ego_embeddings.device)

        # Store embeddings from each layer
        embeddings_list = [ego_embeddings]

        current_embeddings = ego_embeddings
        for layer_module in self.propagation_network.children():
            # LGConv(normalize=False) will use the weights in self.adj
            current_embeddings = layer_module(current_embeddings, self.adj)
            embeddings_list.append(current_embeddings)

        # LightGCN++ Pooling Strategy
        # Layer 0 (Initial embeddings)
        e_0 = embeddings_list[0]

        # Layers 1 to K
        e_k_list = embeddings_list[1:]

        # Compute mean of layers 1 to K
        # Stack them to compute mean efficiently: [K, N, Emb_Size] -> Mean dim 0
        if len(e_k_list) > 0:
            e_k_mean = torch.stack(e_k_list, dim=0).mean(dim=0)
        else:
            # Fallback if 0 layers (should not happen in GCN)
            e_k_mean = torch.zeros_like(e_0)

        # Combine using Gamma
        final_embeddings = self.gamma * e_0 + (1 - self.gamma) * e_k_mean

        # Split into user and item embeddings
        user_all_embeddings, item_all_embeddings = torch.split(
            final_embeddings,
            [self.n_users, self.n_items + 1],
        )
        return user_all_embeddings, item_all_embeddings

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Retrieve all user and item embeddings from the propagation network
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        # Get the embeddings for the specific users in the batch
        user_embeddings = user_all_embeddings[
            user_indices
        ]  # [batch_size, embedding_size]

        if item_indices is None:
            # Case 'full': prediction on all items
            item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
            einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
        else:
            # Case 'sampled': prediction on a sampled set of items
            item_embeddings = item_all_embeddings[
                item_indices
            ]  # [batch_size, pad_seq, embedding_size]
            einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

        predictions = torch.einsum(
            einsum_string, user_embeddings, item_embeddings
        )  # [batch_size, n_items] or [batch_size, pad_seq]
        return predictions

forward()

Forward pass of LightGCN++ with custom pooling logic

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/lightgcnpp.py
def forward(self) -> Tuple[Tensor, Tensor]:
    """Forward pass of LightGCN++ with custom pooling logic"""
    ego_embeddings = self.get_ego_embeddings(
        self.user_embedding, self.item_embedding
    )

    if self.adj.device() != ego_embeddings.device:
        self.adj = self.adj.to(ego_embeddings.device)

    # Store embeddings from each layer
    embeddings_list = [ego_embeddings]

    current_embeddings = ego_embeddings
    for layer_module in self.propagation_network.children():
        # LGConv(normalize=False) will use the weights in self.adj
        current_embeddings = layer_module(current_embeddings, self.adj)
        embeddings_list.append(current_embeddings)

    # LightGCN++ Pooling Strategy
    # Layer 0 (Initial embeddings)
    e_0 = embeddings_list[0]

    # Layers 1 to K
    e_k_list = embeddings_list[1:]

    # Compute mean of layers 1 to K
    # Stack them to compute mean efficiently: [K, N, Emb_Size] -> Mean dim 0
    if len(e_k_list) > 0:
        e_k_mean = torch.stack(e_k_list, dim=0).mean(dim=0)
    else:
        # Fallback if 0 layers (should not happen in GCN)
        e_k_mean = torch.zeros_like(e_0)

    # Combine using Gamma
    final_embeddings = self.gamma * e_0 + (1 - self.gamma) * e_k_mean

    # Split into user and item embeddings
    user_all_embeddings, item_all_embeddings = torch.split(
        final_embeddings,
        [self.n_users, self.n_items + 1],
    )
    return user_all_embeddings, item_all_embeddings

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/lightgcnpp.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Retrieve all user and item embeddings from the propagation network
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    # Get the embeddings for the specific users in the batch
    user_embeddings = user_all_embeddings[
        user_indices
    ]  # [batch_size, embedding_size]

    if item_indices is None:
        # Case 'full': prediction on all items
        item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
        einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
    else:
        # Case 'sampled': prediction on a sampled set of items
        item_embeddings = item_all_embeddings[
            item_indices
        ]  # [batch_size, pad_seq, embedding_size]
        einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

    predictions = torch.einsum(
        einsum_string, user_embeddings, item_embeddings
    )  # [batch_size, n_items] or [batch_size, pad_seq]
    return predictions

warprec.recommenders.collaborative_filtering_recommender.graph_based.lightgode.LightGODE

Bases: IterativeRecommender, GraphRecommenderUtils

Implementation of LightGODE from "Do We Really Need Graph Convolution During Training? Light Post-Training Graph-ODE for Efficient Recommendation" (CIKM '24).

LightGODE skips graph convolution during training, optimizing embeddings directly via Alignment and Uniformity losses (like Matrix Factorization but with geometric losses). During inference, it applies a continuous Graph-ODE solver to inject high-order connectivity information.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of user and item.

gamma float

The weight for the uniformity loss.

t float

The time horizon for ODE integration.

n_ode_steps int

The number of ODE integration steps during inference.

reg_weight float

The L2 regularization weight.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/lightgode.py
@model_registry.register(name="LightGODE")
class LightGODE(IterativeRecommender, GraphRecommenderUtils):
    """Implementation of LightGODE from "Do We Really Need Graph Convolution During Training?
    Light Post-Training Graph-ODE for Efficient Recommendation" (CIKM '24).

    LightGODE skips graph convolution during training, optimizing embeddings directly
    via Alignment and Uniformity losses (like Matrix Factorization but with geometric losses).
    During inference, it applies a continuous Graph-ODE solver to inject high-order
    connectivity information.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of user and item.
        gamma (float): The weight for the uniformity loss.
        t (float): The time horizon for ODE integration.
        n_ode_steps (int): The number of ODE integration steps during inference.
        reg_weight (float): The L2 regularization weight.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
    """

    DATALOADER_TYPE = DataLoaderType.ITEM_RATING_LOADER

    # Hyperparameters
    embedding_size: int
    gamma: float
    t: float
    n_ode_steps: int
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # Graph Construction (Used ONLY for Post-Training ODE)
        # Standard LightGCN normalization: D^-1/2 A D^-1/2
        self.adj = self.get_adj_mat(
            interactions.get_sparse().tocoo(),
            self.n_users,
            self.n_items + 1,
            normalize=True,
        )

        # Initialize weights
        self.apply(self._init_weights)

        # Regularization
        self.reg_loss = EmbLoss()

        # Cache for inference embeddings
        self._cached_inference_emb: Optional[Tuple[Tensor, Tensor]] = None

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_pointwise_dataloader(
            neg_samples=0,
            batch_size=self.batch_size,
            **kwargs,
        )

    def train(self, mode: bool = True):
        """Override train mode to clear cache when switching back to training."""
        super().train(mode)
        if mode:
            self._cached_inference_emb = None
        return self

    def _alignment_loss(self, x: Tensor, y: Tensor) -> Tensor:
        """Calculates Alignment Loss.

        Minimizes Euclidean distance between normalized positive pairs.
        L_align = Mean( ||x - y||^2 )
        """
        # Note: Inputs x, y are already normalized in train_step
        return (x - y).norm(p=2, dim=1).pow(2).mean()

    def _uniformity_loss(self, x: Tensor) -> Tensor:
        """Calculates Uniformity Loss.

        Maximizes distance between all pairs in the batch (Gaussian potential).
        L_uniform = log( Mean( exp( -2 * ||u - u'||^2 ) ) )
        """
        # x @ x.T gives cosine similarity matrix since x is normalized
        sq_dist = torch.mm(x, x.t())  # Cosine similarity

        # Efficient computation:
        # ||u - u'||^2 = ||u||^2 + ||u'||^2 - 2 u.u' = 2 - 2 u.u' (since norm is 1)
        # exponent = -2 * (2 - 2 * sim) = -4 + 4 * sim

        exponent = -4.0 + 4.0 * sq_dist
        return torch.logsumexp(exponent, dim=1).mean()

    @torch.no_grad()
    def _ode_solver(self) -> Tuple[Tensor, Tensor]:
        """Post-Training Graph-ODE Solver.

        Solves: dh(t)/dt = Adj * h(t) + h0
        Using Euler method: h_{k+1} = h_k + step_size * (Adj * h_k + h0)
        """
        h0_u = self.user_embedding.weight
        h0_i = self.item_embedding.weight
        h0_all = torch.cat([h0_u, h0_i], dim=0)

        h_t = h0_all.clone()

        # Ensure adj is on device
        if self.adj.device() != self.device:
            self.adj = self.adj.to(self.device)

        # Euler Integration
        # Total time T, split into N steps. Step size = T / N
        step_size = self.t / self.n_ode_steps

        for _ in range(self.n_ode_steps):
            # derivative = A * h(t) + h0

            # Graph Aggregation: A * h(t)
            agg = self.adj.matmul(h_t)

            # Add Source Term: + h0
            derivative = agg + h0_all

            # Euler Update: h(t+1) = h(t) + dt * derivative
            h_t = h_t + step_size * derivative

        user_final, item_final = torch.split(h_t, [self.n_users, self.n_items + 1])
        return user_final, item_final

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, pos_item, _ = batch  # Ignore rating values

        # Get Embeddings (No GCN)
        all_users, all_items = self.forward()

        u_emb = all_users[user]
        i_emb = all_items[pos_item]

        # Normalize (Crucial for Alignment/Uniformity)
        u_emb_norm = F.normalize(u_emb, dim=1)
        i_emb_norm = F.normalize(i_emb, dim=1)

        # Calculate Alignment loss
        align_loss = self._alignment_loss(u_emb_norm, i_emb_norm)

        # Calculate Uniformity loss
        unif_loss_u = self._uniformity_loss(u_emb_norm)
        unif_loss_i = self._uniformity_loss(i_emb_norm)
        unif_loss = self.gamma * (unif_loss_u + unif_loss_i) / 2

        # Calculate L2 loss
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user), self.item_embedding(pos_item)
        )

        # Loss logging
        loss = align_loss + unif_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(self) -> Tuple[Tensor, Tensor]:
        """Standard forward pass.

        - If training: Returns raw ID embeddings (no convolution).
        - If eval: Returns ODE-convolved embeddings (with caching).
        """
        if self.training:
            # Training Phase: No Graph Convolution
            return self.user_embedding.weight, self.item_embedding.weight

        # Inference Phase: Post-Training Graph-ODE
        if self._cached_inference_emb is None:
            self._cached_inference_emb = self._ode_solver()
        return self._cached_inference_emb

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Retrieve all user and item embeddings from the propagation network
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        # Get the embeddings for the specific users in the batch
        user_embeddings = user_all_embeddings[
            user_indices
        ]  # [batch_size, embedding_size]

        if item_indices is None:
            # Case 'full': prediction on all items
            item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
            einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
        else:
            # Case 'sampled': prediction on a sampled set of items
            item_embeddings = item_all_embeddings[
                item_indices
            ]  # [batch_size, pad_seq, embedding_size]
            einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

        predictions = torch.einsum(
            einsum_string, user_embeddings, item_embeddings
        )  # [batch_size, n_items] or [batch_size, pad_seq]
        return predictions

forward()

Standard forward pass.

  • If training: Returns raw ID embeddings (no convolution).
  • If eval: Returns ODE-convolved embeddings (with caching).
Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/lightgode.py
def forward(self) -> Tuple[Tensor, Tensor]:
    """Standard forward pass.

    - If training: Returns raw ID embeddings (no convolution).
    - If eval: Returns ODE-convolved embeddings (with caching).
    """
    if self.training:
        # Training Phase: No Graph Convolution
        return self.user_embedding.weight, self.item_embedding.weight

    # Inference Phase: Post-Training Graph-ODE
    if self._cached_inference_emb is None:
        self._cached_inference_emb = self._ode_solver()
    return self._cached_inference_emb

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/lightgode.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Retrieve all user and item embeddings from the propagation network
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    # Get the embeddings for the specific users in the batch
    user_embeddings = user_all_embeddings[
        user_indices
    ]  # [batch_size, embedding_size]

    if item_indices is None:
        # Case 'full': prediction on all items
        item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
        einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
    else:
        # Case 'sampled': prediction on a sampled set of items
        item_embeddings = item_all_embeddings[
            item_indices
        ]  # [batch_size, pad_seq, embedding_size]
        einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

    predictions = torch.einsum(
        einsum_string, user_embeddings, item_embeddings
    )  # [batch_size, n_items] or [batch_size, pad_seq]
    return predictions

train(mode=True)

Override train mode to clear cache when switching back to training.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/lightgode.py
def train(self, mode: bool = True):
    """Override train mode to clear cache when switching back to training."""
    super().train(mode)
    if mode:
        self._cached_inference_emb = None
    return self

warprec.recommenders.collaborative_filtering_recommender.graph_based.macrgcn.MACRGCN

Bases: GraphRecommenderUtils, IterativeRecommender

Implementation of MACRGCN from Model-Agnostic Counterfactual Reasoning for Eliminating Popularity Bias in Recommender System (KDD 2021).

The model adds two auxiliary branches (user module, item module) to a standard LightGCN backbone and applies counterfactual inference at test time to remove the direct effect of item popularity on ranking scores.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

Dataset information (n_users, n_items, ...).

required
interactions Interactions

Training interactions for adjacency matrix.

required
*args Any

Variable length argument list.

()
seed int

Random seed.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

Uses pointwise loader with negatives for BCE training.

embedding_size int

Embedding dimension.

n_layers int

Number of LightGCN propagation layers.

reg_weight float

L2 regularization weight.

alpha float

Weight for item module loss L_I.

beta float

Weight for user module loss L_U.

c float

Counterfactual reference constant.

user_mlp_hidden int

Hidden size for user module MLP.

item_mlp_hidden int

Hidden size for item module MLP.

neg_samples int

Negative samples per positive.

batch_size int

Batch size.

epochs int

Training epochs.

learning_rate float

Learning rate.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/macrgcn.py
@model_registry.register(name="MACRGCN")
class MACRGCN(GraphRecommenderUtils, IterativeRecommender):
    """Implementation of MACRGCN
        from Model-Agnostic Counterfactual Reasoning for Eliminating Popularity Bias in Recommender System (KDD 2021).

    The model adds two auxiliary branches (user module, item module) to a
    standard LightGCN backbone and applies counterfactual inference at test
    time to remove the direct effect of item popularity on ranking scores.

    Args:
        params (dict): Model parameters.
        info (dict): Dataset information (n_users, n_items, ...).
        interactions (Interactions): Training interactions for adjacency matrix.
        *args (Any): Variable length argument list.
        seed (int): Random seed.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: Uses pointwise loader with negatives for BCE training.
        embedding_size (int): Embedding dimension.
        n_layers (int): Number of LightGCN propagation layers.
        reg_weight (float): L2 regularization weight.
        alpha (float): Weight for item module loss L_I.
        beta (float): Weight for user module loss L_U.
        c (float): Counterfactual reference constant.
        user_mlp_hidden (int): Hidden size for user module MLP.
        item_mlp_hidden (int): Hidden size for item module MLP.
        neg_samples (int): Negative samples per positive.
        batch_size (int): Batch size.
        epochs (int): Training epochs.
        learning_rate (float): Learning rate.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.ITEM_RATING_LOADER

    # Model hyperparameters
    embedding_size: int
    n_layers: int
    reg_weight: float
    alpha: float
    beta: float
    c: float
    user_mlp_hidden: int
    item_mlp_hidden: int
    neg_samples: int
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # ======================== Backbone: LightGCN ========================
        # Eq. 7 main branch — user-item matching y_k = Y_k(K(U=u, I=i))
        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # Adjacency matrix (symmetric normalization handled by LGConv)
        self.adj = self.get_adj_mat(
            interactions.get_sparse().tocoo(),
            self.n_users,
            self.n_items + 1,  # +1 for padding index
        )

        # LightGCN propagation layers — Section 3.2, graph convolution for K(U,I)
        propagation_network_list = []
        for _ in range(self.n_layers):
            propagation_network_list.append((LGConv(), "x, edge_index -> x"))
        self.propagation_network = torch_geometric.nn.Sequential(
            "x, edge_index", propagation_network_list
        )

        # ======================== User Module ========================
        # Section 3.3, Figure 5 (blue branch)
        # y_u = Y_u(U = u): projects user embedding to scalar score
        # ASSUMPTION: 2-layer MLP with ReLU. The paper states "can be
        # implemented as multi-layer perceptrons" but does not specify depth.
        self.user_module = nn.Sequential(
            nn.Linear(self.embedding_size, self.user_mlp_hidden),
            nn.ReLU(),
            nn.Linear(self.user_mlp_hidden, 1),
        )

        # ======================== Item Module ========================
        # Section 3.3, Figure 5 (green branch)
        # y_i = Y_i(I = i): projects item embedding to scalar score
        # ASSUMPTION: Same 2-layer MLP architecture as user module.
        self.item_module = nn.Sequential(
            nn.Linear(self.embedding_size, self.item_mlp_hidden),
            nn.ReLU(),
            nn.Linear(self.item_mlp_hidden, 1),
        )

        # ======================== Loss functions ========================
        # Eq. 6, 8 — BCE loss for L_O, L_I, L_U
        self.bce_loss = nn.BCEWithLogitsLoss(reduction="mean")
        self.reg_loss = EmbLoss()

        # Weight initialization — Appendix B: Xavier (matches _init_weights)
        self.apply(self._init_weights)

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_pointwise_dataloader(
            neg_samples=self.neg_samples,
            batch_size=self.batch_size,
            **kwargs,
        )

    def forward(self) -> Tuple[Tensor, Tensor]:
        """LightGCN forward pass — propagate and average embeddings.

        Returns:
            Tuple[Tensor, Tensor]: (user_embeddings, item_embeddings) after
                multi-layer graph convolution with mean pooling across layers.
        """
        # Section 3.2 — K(U, I) via LightGCN graph convolution
        ego_embeddings = self.get_ego_embeddings(
            self.user_embedding, self.item_embedding
        )

        if self.adj.device() != ego_embeddings.device:
            self.adj = self.adj.to(ego_embeddings.device)

        embeddings_list = [ego_embeddings]

        current_embeddings = ego_embeddings
        for layer_module in self.propagation_network.children():
            current_embeddings = layer_module(current_embeddings, self.adj)
            embeddings_list.append(current_embeddings)

        # LightGCN layer combination: mean pooling (equivalent to uniform alpha)
        stacked_embeddings = torch.stack(embeddings_list, dim=0)
        lightgcn_all_embeddings = torch.mean(stacked_embeddings, dim=0)

        user_all_embeddings, item_all_embeddings = torch.split(
            lightgcn_all_embeddings,
            [self.n_users, self.n_items + 1],
        )
        return user_all_embeddings, item_all_embeddings

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        """Multi-task training with BCE losses on three branches.

        Implements Eq. 7 (fusion) and Eq. 8 (multi-task loss):
            L = L_O + alpha * L_I + beta * L_U

        Args:
            batch (Any): Tuple of (user, item, rating) from pointwise loader.
            batch_idx (int): Batch index.

        Returns:
            Tensor: Combined loss scalar.
        """
        user, item, rating = batch[:3]
        rating = rating.float()

        # --- Backbone: LightGCN propagated embeddings ---
        user_all_embeddings, item_all_embeddings = self.forward()

        u_emb = user_all_embeddings[user]  # [batch, emb_size]
        i_emb = item_all_embeddings[item]  # [batch, emb_size]

        # y_k: user-item matching score (dot product)
        # Section 3.3 — "ranking score from the existing recommender"
        y_k = (u_emb * i_emb).sum(dim=-1)  # [batch]

        # y_u: user conformity score — Section 3.3, user module
        # Uses the *initial* (ego) user embedding as input to the user module,
        # consistent with the causal graph U -> Y (direct effect from user node).
        # ASSUMPTION: Use ego embedding (pre-propagation) for user/item modules,
        # since the causal graph treats U and I as raw inputs, not propagated ones.
        u_ego = self.user_embedding(user)  # [batch, emb_size]
        y_u = self.user_module(u_ego).squeeze(-1)  # [batch]

        # y_i: item popularity score — Section 3.3, item module
        i_ego = self.item_embedding(item)  # [batch, emb_size]
        y_i = self.item_module(i_ego).squeeze(-1)  # [batch]

        # --- Eq. 7: fused ranking score y_ui = y_k * sigma(y_i) * sigma(y_u) ---
        y_ui = y_k * torch.sigmoid(y_i) * torch.sigmoid(y_u)

        # --- Eq. 8: multi-task loss ---
        # L_O: main recommendation loss on fused score
        loss_o = self.bce_loss(y_ui, rating)  # Eq. 6

        # L_I: item module loss — trains item module to predict interaction
        # from item alone (captures popularity)
        loss_i = self.bce_loss(y_i, rating)

        # L_U: user module loss — trains user module to predict interaction
        # from user alone (captures conformity)
        loss_u = self.bce_loss(y_u, rating)

        # L2 regularization on ego embeddings
        reg = self.reg_weight * self.reg_loss(
            self.user_embedding(user),
            self.item_embedding(item),
        )

        # Eq. 8: L = L_O + alpha * L_I + beta * L_U + reg
        loss = loss_o + self.alpha * loss_i + self.beta * loss_u + reg

        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        self.log("loss_o", loss_o, prog_bar=False, on_step=False, on_epoch=True)
        self.log("loss_i", loss_i, prog_bar=False, on_step=False, on_epoch=True)
        self.log("loss_u", loss_u, prog_bar=False, on_step=False, on_epoch=True)
        return loss

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Counterfactual inference — Eq. 9, Algorithm 1.

        At test time the ranking score is:
            y_ui = y_k * sigma(y_i) * sigma(y_u)  -  c * sigma(y_i) * sigma(y_u)
                 = (y_k - c) * sigma(y_i) * sigma(y_u)

        This removes the Natural Direct Effect (NDE) of item popularity
        (Section 3.4, Eq. 10), ranking items by Total Indirect Effect (TIE).

        Args:
            user_indices (Tensor): Batch of user indices.
            *args (Any): Variable length argument list.
            item_indices (Optional[Tensor]): Candidate item indices. If None,
                scores are computed for all items.
            **kwargs (Any): Arbitrary keyword arguments.

        Returns:
            Tensor: Debiased ranking scores [batch_size, n_items] or
                    [batch_size, k] if item_indices provided.
        """
        # Propagated embeddings (cached during eval via GraphRecommenderUtils)
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        user_emb = user_all_embeddings[user_indices]  # [batch, emb_size]

        # --- User module score ---
        u_ego = self.user_embedding(user_indices)  # [batch, emb_size]
        y_u = self.user_module(u_ego).squeeze(-1)  # [batch]
        sigma_y_u = torch.sigmoid(y_u)  # [batch]

        if item_indices is None:
            # Full prediction: score all items (excluding padding)
            item_emb = item_all_embeddings[:-1, :]  # [n_items, emb_size]

            # y_k for all items: [batch, n_items]
            y_k = torch.matmul(user_emb, item_emb.t())

            # Item module scores for all items
            i_ego = self.item_embedding.weight[:-1, :]  # [n_items, emb_size]
            y_i = self.item_module(i_ego).squeeze(-1)  # [n_items]
            sigma_y_i = torch.sigmoid(y_i)  # [n_items]

            # Eq. 9: counterfactual debiased score
            # (y_k - c) * sigma(y_i)[1, n_items] * sigma(y_u)[batch, 1]
            scores = (y_k - self.c) * sigma_y_i.unsqueeze(0) * sigma_y_u.unsqueeze(1)
        else:
            # Sampled prediction
            item_emb = item_all_embeddings[item_indices]  # [batch, k, emb_size]

            # y_k: [batch, k]
            y_k = torch.einsum("be,bke->bk", user_emb, item_emb)

            # Item module scores for sampled items
            i_ego = self.item_embedding(item_indices)  # [batch, k, emb_size]
            y_i = self.item_module(i_ego).squeeze(-1)  # [batch, k]
            sigma_y_i = torch.sigmoid(y_i)  # [batch, k]

            # Eq. 9: counterfactual debiased score
            scores = (y_k - self.c) * sigma_y_i * sigma_y_u.unsqueeze(1)

        return scores

forward()

LightGCN forward pass — propagate and average embeddings.

Returns:

Type Description
Tuple[Tensor, Tensor]

Tuple[Tensor, Tensor]: (user_embeddings, item_embeddings) after multi-layer graph convolution with mean pooling across layers.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/macrgcn.py
def forward(self) -> Tuple[Tensor, Tensor]:
    """LightGCN forward pass — propagate and average embeddings.

    Returns:
        Tuple[Tensor, Tensor]: (user_embeddings, item_embeddings) after
            multi-layer graph convolution with mean pooling across layers.
    """
    # Section 3.2 — K(U, I) via LightGCN graph convolution
    ego_embeddings = self.get_ego_embeddings(
        self.user_embedding, self.item_embedding
    )

    if self.adj.device() != ego_embeddings.device:
        self.adj = self.adj.to(ego_embeddings.device)

    embeddings_list = [ego_embeddings]

    current_embeddings = ego_embeddings
    for layer_module in self.propagation_network.children():
        current_embeddings = layer_module(current_embeddings, self.adj)
        embeddings_list.append(current_embeddings)

    # LightGCN layer combination: mean pooling (equivalent to uniform alpha)
    stacked_embeddings = torch.stack(embeddings_list, dim=0)
    lightgcn_all_embeddings = torch.mean(stacked_embeddings, dim=0)

    user_all_embeddings, item_all_embeddings = torch.split(
        lightgcn_all_embeddings,
        [self.n_users, self.n_items + 1],
    )
    return user_all_embeddings, item_all_embeddings

predict(user_indices, *args, item_indices=None, **kwargs)

Counterfactual inference — Eq. 9, Algorithm 1.

At test time the ranking score is

y_ui = y_k * sigma(y_i) * sigma(y_u) - c * sigma(y_i) * sigma(y_u) = (y_k - c) * sigma(y_i) * sigma(y_u)

This removes the Natural Direct Effect (NDE) of item popularity (Section 3.4, Eq. 10), ranking items by Total Indirect Effect (TIE).

Parameters:

Name Type Description Default
user_indices Tensor

Batch of user indices.

required
*args Any

Variable length argument list.

()
item_indices Optional[Tensor]

Candidate item indices. If None, scores are computed for all items.

None
**kwargs Any

Arbitrary keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

Debiased ranking scores [batch_size, n_items] or [batch_size, k] if item_indices provided.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/macrgcn.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Counterfactual inference — Eq. 9, Algorithm 1.

    At test time the ranking score is:
        y_ui = y_k * sigma(y_i) * sigma(y_u)  -  c * sigma(y_i) * sigma(y_u)
             = (y_k - c) * sigma(y_i) * sigma(y_u)

    This removes the Natural Direct Effect (NDE) of item popularity
    (Section 3.4, Eq. 10), ranking items by Total Indirect Effect (TIE).

    Args:
        user_indices (Tensor): Batch of user indices.
        *args (Any): Variable length argument list.
        item_indices (Optional[Tensor]): Candidate item indices. If None,
            scores are computed for all items.
        **kwargs (Any): Arbitrary keyword arguments.

    Returns:
        Tensor: Debiased ranking scores [batch_size, n_items] or
                [batch_size, k] if item_indices provided.
    """
    # Propagated embeddings (cached during eval via GraphRecommenderUtils)
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    user_emb = user_all_embeddings[user_indices]  # [batch, emb_size]

    # --- User module score ---
    u_ego = self.user_embedding(user_indices)  # [batch, emb_size]
    y_u = self.user_module(u_ego).squeeze(-1)  # [batch]
    sigma_y_u = torch.sigmoid(y_u)  # [batch]

    if item_indices is None:
        # Full prediction: score all items (excluding padding)
        item_emb = item_all_embeddings[:-1, :]  # [n_items, emb_size]

        # y_k for all items: [batch, n_items]
        y_k = torch.matmul(user_emb, item_emb.t())

        # Item module scores for all items
        i_ego = self.item_embedding.weight[:-1, :]  # [n_items, emb_size]
        y_i = self.item_module(i_ego).squeeze(-1)  # [n_items]
        sigma_y_i = torch.sigmoid(y_i)  # [n_items]

        # Eq. 9: counterfactual debiased score
        # (y_k - c) * sigma(y_i)[1, n_items] * sigma(y_u)[batch, 1]
        scores = (y_k - self.c) * sigma_y_i.unsqueeze(0) * sigma_y_u.unsqueeze(1)
    else:
        # Sampled prediction
        item_emb = item_all_embeddings[item_indices]  # [batch, k, emb_size]

        # y_k: [batch, k]
        y_k = torch.einsum("be,bke->bk", user_emb, item_emb)

        # Item module scores for sampled items
        i_ego = self.item_embedding(item_indices)  # [batch, k, emb_size]
        y_i = self.item_module(i_ego).squeeze(-1)  # [batch, k]
        sigma_y_i = torch.sigmoid(y_i)  # [batch, k]

        # Eq. 9: counterfactual debiased score
        scores = (y_k - self.c) * sigma_y_i * sigma_y_u.unsqueeze(1)

    return scores

training_step(batch, batch_idx)

Multi-task training with BCE losses on three branches.

Implements Eq. 7 (fusion) and Eq. 8 (multi-task loss): L = L_O + alpha * L_I + beta * L_U

Parameters:

Name Type Description Default
batch Any

Tuple of (user, item, rating) from pointwise loader.

required
batch_idx int

Batch index.

required

Returns:

Name Type Description
Tensor Tensor

Combined loss scalar.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/macrgcn.py
def training_step(self, batch: Any, batch_idx: int) -> Tensor:
    """Multi-task training with BCE losses on three branches.

    Implements Eq. 7 (fusion) and Eq. 8 (multi-task loss):
        L = L_O + alpha * L_I + beta * L_U

    Args:
        batch (Any): Tuple of (user, item, rating) from pointwise loader.
        batch_idx (int): Batch index.

    Returns:
        Tensor: Combined loss scalar.
    """
    user, item, rating = batch[:3]
    rating = rating.float()

    # --- Backbone: LightGCN propagated embeddings ---
    user_all_embeddings, item_all_embeddings = self.forward()

    u_emb = user_all_embeddings[user]  # [batch, emb_size]
    i_emb = item_all_embeddings[item]  # [batch, emb_size]

    # y_k: user-item matching score (dot product)
    # Section 3.3 — "ranking score from the existing recommender"
    y_k = (u_emb * i_emb).sum(dim=-1)  # [batch]

    # y_u: user conformity score — Section 3.3, user module
    # Uses the *initial* (ego) user embedding as input to the user module,
    # consistent with the causal graph U -> Y (direct effect from user node).
    # ASSUMPTION: Use ego embedding (pre-propagation) for user/item modules,
    # since the causal graph treats U and I as raw inputs, not propagated ones.
    u_ego = self.user_embedding(user)  # [batch, emb_size]
    y_u = self.user_module(u_ego).squeeze(-1)  # [batch]

    # y_i: item popularity score — Section 3.3, item module
    i_ego = self.item_embedding(item)  # [batch, emb_size]
    y_i = self.item_module(i_ego).squeeze(-1)  # [batch]

    # --- Eq. 7: fused ranking score y_ui = y_k * sigma(y_i) * sigma(y_u) ---
    y_ui = y_k * torch.sigmoid(y_i) * torch.sigmoid(y_u)

    # --- Eq. 8: multi-task loss ---
    # L_O: main recommendation loss on fused score
    loss_o = self.bce_loss(y_ui, rating)  # Eq. 6

    # L_I: item module loss — trains item module to predict interaction
    # from item alone (captures popularity)
    loss_i = self.bce_loss(y_i, rating)

    # L_U: user module loss — trains user module to predict interaction
    # from user alone (captures conformity)
    loss_u = self.bce_loss(y_u, rating)

    # L2 regularization on ego embeddings
    reg = self.reg_weight * self.reg_loss(
        self.user_embedding(user),
        self.item_embedding(item),
    )

    # Eq. 8: L = L_O + alpha * L_I + beta * L_U + reg
    loss = loss_o + self.alpha * loss_i + self.beta * loss_u + reg

    self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
    self.log("loss_o", loss_o, prog_bar=False, on_step=False, on_epoch=True)
    self.log("loss_i", loss_i, prog_bar=False, on_step=False, on_epoch=True)
    self.log("loss_u", loss_u, prog_bar=False, on_step=False, on_epoch=True)
    return loss

warprec.recommenders.collaborative_filtering_recommender.graph_based.mixrec.MixRec

Bases: IterativeRecommender, GraphRecommenderUtils

Implementation of MixRec algorithm from "MixRec: Individual and Collective Mixing Empowers Data Augmentation for Recommender Systems" (WWW '25).

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of user and item.

n_layers int

The number of LightGCN propagation layers.

ssl_lambda float

Weight for Contrastive Loss.

alpha float

Shape parameter for Beta distribution (for Individual Mixing).

temperature float

Temperature for InfoNCE.

reg_weight float

The L2 regularization weight.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/mixrec.py
@model_registry.register(name="MixRec")
class MixRec(IterativeRecommender, GraphRecommenderUtils):
    """Implementation of MixRec algorithm from
    "MixRec: Individual and Collective Mixing Empowers Data Augmentation for Recommender Systems" (WWW '25).

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of user and item.
        n_layers (int): The number of LightGCN propagation layers.
        ssl_lambda (float): Weight for Contrastive Loss.
        alpha (float): Shape parameter for Beta distribution (for Individual Mixing).
        temperature (float): Temperature for InfoNCE.
        reg_weight (float): The L2 regularization weight.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Model hyperparameters
    embedding_size: int
    n_layers: int
    ssl_lambda: float
    alpha: float
    temperature: float
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # Graph Construction (LightGCN Encoder)
        self.adj = self.get_adj_mat(
            interactions.get_sparse().tocoo(),
            self.n_users,
            self.n_items + 1,
            normalize=True,
        )

        # Propagation Network
        self.propagation_network = nn.ModuleList(
            [LGConv() for _ in range(self.n_layers)]
        )

        # Vectorized normalization for embedding aggregation (Mean pooling)
        alpha_tensor = torch.full(
            (self.n_layers + 1,), 1.0 / (self.n_layers + 1), device=self.device
        )
        self.register_buffer("alpha_gcn", alpha_tensor)

        # Initialize weights
        self.apply(self._init_weights)

        # Losses
        self.bpr_loss = BPRLoss()
        self.reg_loss = EmbLoss()

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def forward(self) -> Tuple[Tensor, Tensor]:
        """Standard LightGCN Forward pass to get encoded embeddings."""
        ego_embeddings = self.get_ego_embeddings(
            self.user_embedding, self.item_embedding
        )

        if self.adj.device() != ego_embeddings.device:
            self.adj = self.adj.to(ego_embeddings.device)

        embeddings_list = [ego_embeddings]
        current_embeddings = ego_embeddings

        for conv_layer in self.propagation_network:
            current_embeddings = conv_layer(current_embeddings, self.adj)
            embeddings_list.append(current_embeddings)

        # Weighted sum
        final_embeddings = torch.zeros_like(ego_embeddings)
        for k, emb in enumerate(embeddings_list):
            final_embeddings += emb * self.alpha_gcn[k]  # type: ignore[index]

        user_final, item_final = torch.split(
            final_embeddings, [self.n_users, self.n_items + 1]
        )
        return user_final, item_final

    def _mix_embeddings(
        self, original: Tensor, shuffled: Tensor, beta: Tensor
    ) -> Tensor:
        """Individual Mixing: Linear interpolation."""
        return beta * original + (1 - beta) * shuffled

    def _collective_mixing(self, embeddings: Tensor) -> Tensor:
        """Collective Mixing.

        Generates new samples by forming convex combinations of the entire batch.
        To be efficient and avoid O(B^2) sampling, we sample one set of Dirichlet
        weights per batch (or a small number of sets) and broadcast.
        """
        batch_size = embeddings.size(0)
        # Sample coefficients from Dirichlet(1, 1, ..., 1)
        dir_dist = torch.distributions.Dirichlet(
            torch.ones(batch_size, device=self.device)
        )  # [1, Batch]
        coeffs = dir_dist.sample().unsqueeze(0)

        # Weighted sum of all embeddings in batch: [1, Batch] x [Batch, Dim] -> [1, Dim]
        collective_view = torch.mm(coeffs, embeddings)

        # Expand to match batch size for loss calculation
        return collective_view.expand(batch_size, -1)

    def _hard_nce_loss(
        self,
        anchor: Tensor,
        positive: Tensor,
        neg_disorder: Tensor,
        neg_collective: Tensor,
        temperature: float,
    ) -> Tensor:
        """Computes InfoNCE loss with hard negatives."""
        # L2 normalization
        anchor = F.normalize(anchor, p=2, dim=1)
        positive = F.normalize(positive, p=2, dim=1)
        neg_disorder = F.normalize(neg_disorder, p=2, dim=1)
        neg_collective = F.normalize(neg_collective, p=2, dim=1)

        # Positive similarity (Anchor vs Mixed)
        pos_sim = (anchor * positive).sum(dim=1) / temperature  # [Batch]

        # Hard negative 1 similarity (Anchor vs Disorder)
        dis_sim = (anchor * neg_disorder).sum(dim=1) / temperature  # [Batch]

        # Hard negative 2 similarity (Anchor vs Collective)
        col_sim = (anchor * neg_collective).sum(dim=1) / temperature  # [Batch]

        # Batch negatives similarity (Anchor vs Mixed)
        batch_sim_matrix = (
            torch.mm(anchor, positive.t()) / temperature
        )  # [Batch, Batch]

        all_logits = torch.cat(
            [batch_sim_matrix, dis_sim.unsqueeze(1), col_sim.unsqueeze(1)], dim=1
        )  # [B, B + 2]

        # Loss = -log( exp(pos) / sum(exp(all)) )
        #      = -pos + logsumexp(all)

        loss = -pos_sim + torch.logsumexp(all_logits, dim=1)
        return loss

    def _dual_mixing_cl_loss(
        self,
        original: Tensor,
        mixed: Tensor,
        disordered: Tensor,
        collective: Tensor,
        beta: Tensor,
    ) -> Tensor:
        """Calculates the Dual-Mixing Contrastive Loss.

        L_user = beta * L_pos + (1 - beta) * L_neg

        L_pos: Anchor=Original. Pos=Mixed. Negs={Disordered, Collective}.
        L_neg: Anchor=Disordered. Pos=Mixed. Negs={Original, Collective}.
        """
        # L_pos: Anchor=Original, Pos=Mixed, HardNegs={Disordered, Collective}
        l_pos = self._hard_nce_loss(
            anchor=original,
            positive=mixed,
            neg_disorder=disordered,
            neg_collective=collective,
            temperature=self.temperature,
        )

        # L_neg: Anchor=Disordered, Pos=Mixed, HardNegs={Original, Collective}
        l_neg = self._hard_nce_loss(
            anchor=disordered,
            positive=mixed,
            neg_disorder=original,
            neg_collective=collective,
            temperature=self.temperature,
        )

        # Weighted Sum
        loss = (beta * l_pos + (1 - beta) * l_neg).mean()
        return loss

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, pos_item, neg_item = batch
        batch_size = user.size(0)

        # Get propagated embeddings
        user_all_embeddings, item_all_embeddings = self.forward()

        # Get embeddings for current batch users and items
        u_embeddings = user_all_embeddings[user]
        pos_embeddings = item_all_embeddings[pos_item]
        neg_embeddings = item_all_embeddings[neg_item]

        # Calculate BPR loss
        pos_scores = torch.mul(u_embeddings, pos_embeddings).sum(dim=1)
        neg_scores = torch.mul(u_embeddings, neg_embeddings).sum(dim=1)
        bpr_pos = self.bpr_loss(pos_scores, neg_scores)

        # Calculate L2 loss
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user),
            self.item_embedding(pos_item),
            self.item_embedding(neg_item),
        )

        # Generate Mixing Parameters
        # Beta distribution Beta(alpha, alpha)
        beta_dist = torch.distributions.Beta(self.alpha, self.alpha)
        beta_u = beta_dist.sample((batch_size, 1)).to(self.device)
        beta_i = beta_dist.sample((batch_size, 1)).to(self.device)

        # Disordered Views (Shuffled batch)
        perm_idx = torch.randperm(batch_size, device=self.device)
        u_dis = u_embeddings[perm_idx]
        pos_dis = pos_embeddings[perm_idx]
        neg_dis = neg_embeddings[perm_idx]

        # Individual Mixing
        u_mix = self._mix_embeddings(u_embeddings, u_dis, beta_u)
        pos_mix = self._mix_embeddings(pos_embeddings, pos_dis, beta_i)
        neg_mix = self._mix_embeddings(neg_embeddings, neg_dis, beta_i)

        # Collective Mixing
        u_cm = self._collective_mixing(u_embeddings)
        pos_cm = self._collective_mixing(pos_embeddings)

        # Mixed Negative BPR
        # Encourages item to stay close to user even if mixed with negative
        neg_mix_scores = torch.mul(u_embeddings, neg_mix).sum(dim=1)
        bpr_neg = self.bpr_loss(pos_scores, neg_mix_scores)

        # We use the mean of beta_i for scalar weighting or element-wise
        # Since BPR returns a scalar mean, we use mean of beta
        b_i_scalar = beta_i.mean()
        main_loss = b_i_scalar * bpr_pos + (1 - b_i_scalar) * bpr_neg

        # B. Dual-Mixing Contrastive Loss (Eq. 9 & 10)
        cl_user = self._dual_mixing_cl_loss(u_embeddings, u_mix, u_dis, u_cm, beta_u)
        cl_item = self._dual_mixing_cl_loss(
            pos_embeddings, pos_mix, pos_dis, pos_cm, beta_i
        )

        cl_loss = self.ssl_lambda * (cl_user + cl_item)

        # Loss logging
        loss = main_loss + cl_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Retrieve all user and item embeddings from the propagation network
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        # Get the embeddings for the specific users in the batch
        user_embeddings = user_all_embeddings[
            user_indices
        ]  # [batch_size, embedding_size]

        if item_indices is None:
            # Case 'full': prediction on all items
            item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
            einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
        else:
            # Case 'sampled': prediction on a sampled set of items
            item_embeddings = item_all_embeddings[
                item_indices
            ]  # [batch_size, pad_seq, embedding_size]
            einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

        predictions = torch.einsum(
            einsum_string, user_embeddings, item_embeddings
        )  # [batch_size, n_items] or [batch_size, pad_seq]
        return predictions

forward()

Standard LightGCN Forward pass to get encoded embeddings.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/mixrec.py
def forward(self) -> Tuple[Tensor, Tensor]:
    """Standard LightGCN Forward pass to get encoded embeddings."""
    ego_embeddings = self.get_ego_embeddings(
        self.user_embedding, self.item_embedding
    )

    if self.adj.device() != ego_embeddings.device:
        self.adj = self.adj.to(ego_embeddings.device)

    embeddings_list = [ego_embeddings]
    current_embeddings = ego_embeddings

    for conv_layer in self.propagation_network:
        current_embeddings = conv_layer(current_embeddings, self.adj)
        embeddings_list.append(current_embeddings)

    # Weighted sum
    final_embeddings = torch.zeros_like(ego_embeddings)
    for k, emb in enumerate(embeddings_list):
        final_embeddings += emb * self.alpha_gcn[k]  # type: ignore[index]

    user_final, item_final = torch.split(
        final_embeddings, [self.n_users, self.n_items + 1]
    )
    return user_final, item_final

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/mixrec.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Retrieve all user and item embeddings from the propagation network
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    # Get the embeddings for the specific users in the batch
    user_embeddings = user_all_embeddings[
        user_indices
    ]  # [batch_size, embedding_size]

    if item_indices is None:
        # Case 'full': prediction on all items
        item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
        einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
    else:
        # Case 'sampled': prediction on a sampled set of items
        item_embeddings = item_all_embeddings[
            item_indices
        ]  # [batch_size, pad_seq, embedding_size]
        einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

    predictions = torch.einsum(
        einsum_string, user_embeddings, item_embeddings
    )  # [batch_size, n_items] or [batch_size, pad_seq]
    return predictions

warprec.recommenders.collaborative_filtering_recommender.graph_based.ngcf.NGCF

Bases: IterativeRecommender, GraphRecommenderUtils

Implementation of NGCF algorithm from Neural Graph Collaborative Filtering (SIGIR 2019)

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of user and item.

weight_size list[int]

List of hidden sizes for each layer.

node_dropout float

Dropout rate for nodes in the adjacency matrix.

message_dropout float

Dropout rate for messages/embeddings during propagation.

reg_weight float

The L2 regularization weight.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/ngcf.py
@model_registry.register(name="NGCF")
class NGCF(IterativeRecommender, GraphRecommenderUtils):
    """Implementation of NGCF algorithm from
        Neural Graph Collaborative Filtering (SIGIR 2019)

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of user and item.
        weight_size (list[int]): List of hidden sizes for each layer.
        node_dropout (float): Dropout rate for nodes in the adjacency matrix.
        message_dropout (float): Dropout rate for messages/embeddings during propagation.
        reg_weight (float): The L2 regularization weight.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Model hyperparameters
    embedding_size: int
    weight_size: list[int]
    node_dropout: float
    message_dropout: float
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # Initialize the hidden dimensions
        self.hidden_size_list = [
            self.embedding_size
        ] + self.weight_size  # [embed_k, layer1_dim, layer2_dim, ...]

        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )
        self.adj = self._get_norm_adj_mat_ngcf(
            interactions.get_sparse().tocoo(),
            self.n_users,
            self.n_items + 1,  # Adjust for padding idx
        )

        # Optionally define a dropout layer (optimized for sparse data)
        self.sparse_dropout = (
            SparseDropout(self.node_dropout) if self.node_dropout > 0 else None
        )

        # Initialization of the propagation network
        self.propagation_network = nn.ModuleList()
        for i in range(len(self.weight_size)):
            in_f = self.hidden_size_list[i]
            out_f = self.hidden_size_list[i + 1]
            self.propagation_network.append(
                NGCFLayer(in_f, out_f, self.message_dropout)
            )

        # Init embedding weights
        self.apply(self._init_weights)
        self.bpr_loss = BPRLoss()
        self.reg_loss = EmbLoss()

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def training_step(self, batch: Any, batch_idx: int):
        user, pos_item, neg_item = batch

        # Get propagated embeddings
        user_all_embeddings, item_all_embeddings = self.forward()

        # Get embeddings for current batch users and items
        u_embeddings = user_all_embeddings[user]
        pos_embeddings = item_all_embeddings[pos_item]
        neg_embeddings = item_all_embeddings[neg_item]

        # Calculate BPR loss
        pos_scores = torch.mul(u_embeddings, pos_embeddings).sum(dim=1)
        neg_scores = torch.mul(u_embeddings, neg_embeddings).sum(dim=1)
        bpr_loss = self.bpr_loss(pos_scores, neg_scores)

        # Calculate L2 regularization
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user),
            self.item_embedding(pos_item),
            self.item_embedding(neg_item),
        )

        # Loss logging
        loss = bpr_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(self) -> Tuple[Tensor, Tensor]:
        """Forward pass of the NGCF model for embedding propagation.

        Returns:
            Tuple[Tensor, Tensor]: User and item embeddings after propagation.
        """
        # Get the ego_embeddings [user + item]
        ego_embeddings = self.get_ego_embeddings(
            self.user_embedding, self.item_embedding
        )

        # Ensure adjacency matrix is on the same device as embeddings
        if self.adj.device != ego_embeddings.device:
            self.adj = self.adj.to(ego_embeddings.device)

        embeddings_list = [ego_embeddings]

        # Apply dropout if required from hyperparameters
        adj_matrix_current = self.adj
        if self.sparse_dropout is not None:
            adj_matrix_current = self.sparse_dropout(self.adj)

        # Forward each embedding through the sequential
        # propagation network
        current_embeddings = ego_embeddings
        for layer_module in self.propagation_network:
            current_embeddings = layer_module(current_embeddings, adj_matrix_current)
            embeddings_list.append(current_embeddings)

        # Concatenate embeddings from all layers (including ego-embeddings)
        # along the feature dimension
        ngcf_all_embeddings = torch.cat(embeddings_list, dim=1)

        # Split into user and item embeddings
        user_all_embeddings, item_all_embeddings = torch.split(
            ngcf_all_embeddings,
            [self.n_users, self.n_items + 1],  # Adjust for padding idx
        )
        return user_all_embeddings, item_all_embeddings

    def _get_norm_adj_mat_ngcf(
        self,
        interaction_matrix: coo_matrix,
        n_users: int,
        n_items: int,
    ) -> SparseTensor:
        """Get the normalized interaction matrix of users and items specific to NGCF.
        This includes constructing the full adjacency matrix and applying symmetric normalization.

        Args:
            interaction_matrix (coo_matrix): The full interaction matrix in coo format.
            n_users (int): The number of users.
            n_items (int): The number of items.

        Returns:
            SparseTensor: The sparse normalized adjacency matrix (A_hat).
        """
        # Build adjacency matrix (A)
        # [num_user + n_items x num_user + n_items]
        A = sp.dok_matrix((n_users + n_items, n_users + n_items), dtype=np.float32)
        inter_M = interaction_matrix
        inter_M_t = interaction_matrix.transpose()

        # Add user-item interactions
        for u, i in zip(inter_M.row, inter_M.col):
            A[u, i + n_users] = 1.0  # user -> item
        # Add item-user interactions (transpose)
        for i, u in zip(inter_M_t.row, inter_M_t.col):
            A[i + n_users, u] = 1.0  # item -> user

        A = (
            A.tocsr()
        )  # Convert to CSR for efficient row-wise sum and diagonal matrix creation

        # Symmetric Normalization: D^{-0.5} A D^{-0.5}
        sum_rows = np.array(A.sum(axis=1)).flatten()
        # Add epsilon to avoid division by zero
        sum_rows[sum_rows == 0] = 1e-7
        diag_inv_sqrt = np.power(sum_rows, -0.5)
        D_inv_sqrt = sp.diags(diag_inv_sqrt)

        # L = D^{-0.5} A D^{-0.5}
        L = D_inv_sqrt.dot(A).dot(D_inv_sqrt)

        # Convert to COO format for SparseTensor
        L_coo = L.tocoo()
        indices = torch.LongTensor(np.vstack((L_coo.row, L_coo.col)))
        values = torch.FloatTensor(L_coo.data)
        shape = torch.Size(L_coo.shape)

        return torch.sparse_coo_tensor(indices, values, shape).coalesce()

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Retrieve all user and item embeddings from the propagation network
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        # Get the embeddings for the specific users in the batch
        user_embeddings = user_all_embeddings[
            user_indices
        ]  # [batch_size, embedding_size]

        if item_indices is None:
            # Case 'full': prediction on all items
            item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
            einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
        else:
            # Case 'sampled': prediction on a sampled set of items
            item_embeddings = item_all_embeddings[
                item_indices
            ]  # [batch_size, pad_seq, embedding_size]
            einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

        # Compute scores using the appropriate einsum operation
        predictions = torch.einsum(
            einsum_string, user_embeddings, item_embeddings
        )  # [batch_size, n_items] or [batch_size, pad_seq]
        return predictions

forward()

Forward pass of the NGCF model for embedding propagation.

Returns:

Type Description
Tuple[Tensor, Tensor]

Tuple[Tensor, Tensor]: User and item embeddings after propagation.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/ngcf.py
def forward(self) -> Tuple[Tensor, Tensor]:
    """Forward pass of the NGCF model for embedding propagation.

    Returns:
        Tuple[Tensor, Tensor]: User and item embeddings after propagation.
    """
    # Get the ego_embeddings [user + item]
    ego_embeddings = self.get_ego_embeddings(
        self.user_embedding, self.item_embedding
    )

    # Ensure adjacency matrix is on the same device as embeddings
    if self.adj.device != ego_embeddings.device:
        self.adj = self.adj.to(ego_embeddings.device)

    embeddings_list = [ego_embeddings]

    # Apply dropout if required from hyperparameters
    adj_matrix_current = self.adj
    if self.sparse_dropout is not None:
        adj_matrix_current = self.sparse_dropout(self.adj)

    # Forward each embedding through the sequential
    # propagation network
    current_embeddings = ego_embeddings
    for layer_module in self.propagation_network:
        current_embeddings = layer_module(current_embeddings, adj_matrix_current)
        embeddings_list.append(current_embeddings)

    # Concatenate embeddings from all layers (including ego-embeddings)
    # along the feature dimension
    ngcf_all_embeddings = torch.cat(embeddings_list, dim=1)

    # Split into user and item embeddings
    user_all_embeddings, item_all_embeddings = torch.split(
        ngcf_all_embeddings,
        [self.n_users, self.n_items + 1],  # Adjust for padding idx
    )
    return user_all_embeddings, item_all_embeddings

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/ngcf.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Retrieve all user and item embeddings from the propagation network
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    # Get the embeddings for the specific users in the batch
    user_embeddings = user_all_embeddings[
        user_indices
    ]  # [batch_size, embedding_size]

    if item_indices is None:
        # Case 'full': prediction on all items
        item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
        einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
    else:
        # Case 'sampled': prediction on a sampled set of items
        item_embeddings = item_all_embeddings[
            item_indices
        ]  # [batch_size, pad_seq, embedding_size]
        einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

    # Compute scores using the appropriate einsum operation
    predictions = torch.einsum(
        einsum_string, user_embeddings, item_embeddings
    )  # [batch_size, n_items] or [batch_size, pad_seq]
    return predictions

warprec.recommenders.collaborative_filtering_recommender.graph_based.paac.PAAC

Bases: GraphRecommenderUtils, IterativeRecommender

Implementation of PAAC from "Popularity-Aware Alignment and Contrast for Mitigating Popularity Bias in Recommendation" (KDD 2024).

Popularity-Aware Alignment and Contrast (PAAC) for Mitigating Popularity Bias.

PAAC wraps a LightGCN encoder with the supervised alignment and re-weighted contrastive objectives defined in the paper.

Parameters:

Name Type Description Default
params dict

Model hyperparameters (see PAACConfig).

required
info dict

Dataset metadata (n_users, n_items, …).

required
interactions Interactions

Training interactions used to build the graph and popularity counts.

required
*args Any

Forwarded to parent constructors.

()
seed int

Random seed for reproducibility.

42
**kwargs Any

Forwarded to parent constructors.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

POS_NEG_LOADER — yields (user, pos_item, neg_item) triples.

embedding_size int

Embedding dimensionality.

n_layers int

LightGCN propagation depth.

lambda1 float

Weight on the supervised alignment loss (λ₁).

lambda2 float

Weight on the re-weighting contrastive loss (λ₂).

temperature float

InfoNCE temperature τ.

gamma float

Popular-vs-unpopular positive-sample weight γ (Eq. 7).

beta float

Cross-group negative-sample weight β (Eq. 8/9).

pop_ratio float

Fraction of batch items classified as popular per mini-batch.

eps float

Noise scale for contrastive augmentation.

reg_weight float

L2 regularization coefficient λ₃.

batch_size int

Training batch size.

epochs int

Maximum training epochs.

learning_rate float

Adam learning rate.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/paac.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
@model_registry.register(name="PAAC")
class PAAC(GraphRecommenderUtils, IterativeRecommender):
    """Implementation of PAAC from
    "Popularity-Aware Alignment and Contrast for Mitigating Popularity Bias in Recommendation" (KDD 2024).

    Popularity-Aware Alignment and Contrast (PAAC) for Mitigating Popularity Bias.

    PAAC wraps a LightGCN encoder with the supervised alignment and
    re-weighted contrastive objectives defined in the paper.

    Args:
        params (dict): Model hyperparameters (see PAACConfig).
        info (dict): Dataset metadata (n_users, n_items, …).
        interactions (Interactions): Training interactions used to build the
            graph and popularity counts.
        *args (Any): Forwarded to parent constructors.
        seed (int): Random seed for reproducibility.
        **kwargs (Any): Forwarded to parent constructors.

    Attributes:
        DATALOADER_TYPE: POS_NEG_LOADER — yields (user, pos_item, neg_item) triples.
        embedding_size (int): Embedding dimensionality.
        n_layers (int): LightGCN propagation depth.
        lambda1 (float): Weight on the supervised alignment loss (λ₁).
        lambda2 (float): Weight on the re-weighting contrastive loss (λ₂).
        temperature (float): InfoNCE temperature τ.
        gamma (float): Popular-vs-unpopular positive-sample weight γ (Eq. 7).
        beta (float): Cross-group negative-sample weight β (Eq. 8/9).
        pop_ratio (float): Fraction of batch items classified as popular per mini-batch.
        eps (float): Noise scale for contrastive augmentation.
        reg_weight (float): L2 regularization coefficient λ₃.
        batch_size (int): Training batch size.
        epochs (int): Maximum training epochs.
        learning_rate (float): Adam learning rate.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Model hyperparameters
    embedding_size: int
    n_layers: int
    lambda1: float
    lambda2: float
    temperature: float
    gamma: float
    beta: float
    pop_ratio: float
    eps: float
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ) -> None:
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # ------------------------------------------------------------------ #
        # Embeddings                                                           #
        # ------------------------------------------------------------------ #
        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # Normalized adjacency matrix for LightGCN propagation
        self.adj = self.get_adj_mat(
            interactions.get_sparse().tocoo(),
            self.n_users,
            self.n_items + 1,
            normalize=True,
        )

        # ------------------------------------------------------------------ #
        # Popularity counts (Section 3.1, Eq. 3/4)                           #
        # Popularity p(i) = interaction frequency of item i in training data  #
        # ------------------------------------------------------------------ #
        # ASSUMPTION: item popularity = raw interaction count from the
        # training set, consistent with prior works cited in the paper
        # (IPS, MACR). Counts indexed by WarpRec item id (0-based).
        sparse_mat = interactions.get_sparse()  # CSR: users × items
        # Sum over users to get per-item interaction count
        item_pop = torch.tensor(
            sparse_mat.sum(axis=0).A1, dtype=torch.float32
        )  # shape [n_items]
        # Register as buffer so it moves to the correct device automatically
        self.register_buffer("item_popularity", item_pop)

        self.bpr_loss = BPRLoss()  # Eq. 1
        self.reg_loss = EmbLoss()  # λ₃ * ||Θ||²  (Eq. 11)
        self.info_nce_loss = InfoNCELoss(self.temperature)

        # Weight initialization (xavier_normal_ for Linear/Embedding)
        self.apply(self._init_weights)

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ) -> DataLoader:
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def _perturb_embedding(self, embedding: Tensor) -> Tensor:
        """Apply SimGCL-style noise perturbation."""
        noise = torch.rand_like(embedding)
        noise = noise * embedding.sign()
        noise = F.normalize(noise, p=2, dim=1) * self.eps
        return embedding + noise

    def forward(self, perturbed: bool = False) -> Tuple[Tensor, Tensor]:
        """LightGCN graph propagation with an optional perturbed view."""
        ego_embeddings = self.get_ego_embeddings(
            self.user_embedding, self.item_embedding
        )

        # Move adjacency matrix to current device if needed
        if self.adj.device() != ego_embeddings.device:
            self.adj = self.adj.to(ego_embeddings.device)

        final_embeddings = ego_embeddings.clone()
        current_embeddings = ego_embeddings

        for _ in range(self.n_layers):
            current_embeddings = self.adj.matmul(current_embeddings)
            if perturbed:
                current_embeddings = self._perturb_embedding(current_embeddings)
            final_embeddings.add_(current_embeddings)

        all_embeddings = final_embeddings / (self.n_layers + 1)

        user_all_emb, item_all_emb = torch.split(
            all_embeddings, [self.n_users, self.n_items + 1]
        )
        return user_all_emb, item_all_emb

    def _supervised_alignment_loss(
        self,
        users: Tensor,
        pos_items: Tensor,
        item_embeddings: Tensor,
    ) -> Tensor:
        """Compute the batch-local supervised alignment loss from Eq. 5."""
        # Find unique users and items in the current batch
        unique_users, user_inverse = torch.unique(users, return_inverse=True)
        unique_items, item_inverse = torch.unique(pos_items, return_inverse=True)

        num_u = unique_users.size(0)
        num_i = unique_items.size(0)

        # Create a boolean mask of interactions in the batch (Users x Items)
        mask = torch.zeros((num_u, num_i), dtype=torch.bool, device=users.device)
        mask[user_inverse, item_inverse] = True

        # Count items per user and filter out users with fewer than 2 items
        n_items = mask.sum(dim=1)
        valid_users = n_items >= 2

        if not valid_users.any():
            return torch.tensor(0.0, device=users.device)

        # Reduce matrices to valid users only to save computation memory and time
        mask = mask[valid_users]
        n_items = n_items[valid_users]
        num_u = mask.size(0)

        # Get item popularity and calculate per-user ranks
        pops = self.item_popularity[unique_items]  # type: ignore[index]

        # Assign -inf to items the user hasn't interacted with
        # to push them to the bottom during the sorting phase
        user_item_pops = torch.where(
            mask, pops.unsqueeze(0), torch.tensor(-float("inf"), device=users.device)
        )

        # Calculate the popularity rank (0 = most popular)
        sort_idx = torch.argsort(user_item_pops, dim=1, descending=True)
        ranks = torch.empty_like(sort_idx)
        ranks.scatter_(
            1,
            sort_idx,
            torch.arange(num_i, device=users.device).unsqueeze(0).expand(num_u, num_i),
        )

        # Determine the split points (n_pop and n_unpop)
        n_pop = n_items // 2
        n_unpop = n_items - n_pop

        # Create masks for popular and unpopular items
        # An item is "pop" if the user interacted with it (mask) AND its rank is < n_pop
        pop_mask = mask & (ranks < n_pop.unsqueeze(1))
        unpop_mask = mask & (ranks >= n_pop.unsqueeze(1))

        # Extract embeddings and compute sums
        H = item_embeddings[unique_items]  # [num_i, D]
        H_sq = H.pow(2).sum(dim=1)  # [num_i]

        # Sum of squares (equivalent to sum_sq_pop / sum_sq_unpop)
        sum_sq_pop = (pop_mask.float() * H_sq.unsqueeze(0)).sum(dim=1)  # [num_u]
        sum_sq_unpop = (unpop_mask.float() * H_sq.unsqueeze(0)).sum(dim=1)  # [num_u]

        # Sum of embeddings (equivalent to sum_pop / sum_unpop)
        # We use matrix multiplication: [num_u, num_i] @ [num_i, D] -> [num_u, D]
        sum_pop = pop_mask.float() @ H  # [num_u, D]
        sum_unpop = unpop_mask.float() @ H  # [num_u, D]

        # Dot product between the sums
        dot_sums = (sum_pop * sum_unpop).sum(dim=1)  # [num_u]

        # Final loss calculation
        pair_loss = (
            n_unpop.float() * sum_sq_pop + n_pop.float() * sum_sq_unpop - 2.0 * dot_sums
        )

        loss_per_user = torch.clamp(pair_loss, min=0.0) / n_items.float()

        return loss_per_user.sum()

    def _reweighting_contrast_loss(
        self,
        item_emb_view1: Tensor,
        item_emb_view2: Tensor,
        batch_items: Tensor,
    ) -> Tensor:
        """Compute the item-side re-weighted contrastive loss."""
        if batch_items.numel() < 2:
            return item_emb_view1.new_zeros(())

        h_prime = F.normalize(item_emb_view1[batch_items], p=2, dim=1)
        h_dprime = F.normalize(item_emb_view2[batch_items], p=2, dim=1)

        pop_mask, unpop_mask = self._split_batch_items_by_popularity(batch_items)
        if not pop_mask.any() or not unpop_mask.any():
            return item_emb_view1.new_zeros(())

        return self._group_infonce(h_prime, h_dprime, pop_mask, unpop_mask)

    def _split_batch_items_by_popularity(
        self, batch_items: Tensor
    ) -> Tuple[Tensor, Tensor]:
        """Split a batch item set into top-x% popular and remaining unpopular items."""
        n_items = batch_items.numel()
        pops = self.item_popularity[batch_items]  # type: ignore[index]
        sorted_idx = torch.argsort(pops, descending=True)
        k_pop = math.ceil(self.pop_ratio * n_items)
        k_pop = min(max(k_pop, 1), n_items - 1)

        pop_mask = torch.zeros(n_items, dtype=torch.bool, device=batch_items.device)
        pop_mask[sorted_idx[:k_pop]] = True
        return pop_mask, ~pop_mask

    def _group_infonce(
        self,
        view1: Tensor,
        view2: Tensor,
        pop_mask: Tensor,
        unpop_mask: Tensor,
    ) -> Tensor:
        """Compute the item-side γ-weighted InfoNCE from Eq. 7-9."""
        log_beta = math.log(max(self.beta, 1e-8))

        # ---- L^pop (Eq. 8) --- popular items as positive samples ---------- #
        if pop_mask.sum() > 0:
            h_pop_v1 = view1[pop_mask]  # [n_pop, D]
            h_pop_v2 = view2[pop_mask]  # [n_pop, D]
            h_unpop_v2 = view2[unpop_mask]  # [n_unpop, D]

            pos_scores_pop = (h_pop_v1 * h_pop_v2).sum(dim=1) / self.temperature

            intra_pop = torch.matmul(h_pop_v1, h_pop_v2.T) / self.temperature
            log_intra_pop = torch.logsumexp(intra_pop, dim=1)

            if h_unpop_v2.numel() > 0:
                cross_pop = torch.matmul(h_pop_v1, h_unpop_v2.T) / self.temperature
                log_cross_pop = log_beta + torch.logsumexp(cross_pop, dim=1)
                denom_pop = torch.logaddexp(log_intra_pop, log_cross_pop)
            else:
                denom_pop = log_intra_pop

            l_pop = -(pos_scores_pop - denom_pop).mean()
        else:
            l_pop = torch.tensor(0.0, device=view1.device)

        # ---- L^unpop (Eq. 9) — unpopular items as positive samples ------- #
        if unpop_mask.sum() > 0:
            h_unpop_v1 = view1[unpop_mask]  # [n_unpop, D]
            h_unpop_v2 = view2[unpop_mask]  # [n_unpop, D]
            h_pop_v2_for_unpop = view2[pop_mask]  # [n_pop, D]

            pos_scores_unpop = (h_unpop_v1 * h_unpop_v2).sum(dim=1) / self.temperature

            intra_unpop = torch.matmul(h_unpop_v1, h_unpop_v2.T) / self.temperature
            log_intra_unpop = torch.logsumexp(intra_unpop, dim=1)

            if h_pop_v2_for_unpop.numel() > 0:
                cross_unpop = (
                    torch.matmul(h_unpop_v1, h_pop_v2_for_unpop.T) / self.temperature
                )
                log_cross_unpop = log_beta + torch.logsumexp(cross_unpop, dim=1)
                denom_unpop = torch.logaddexp(log_intra_unpop, log_cross_unpop)
            else:
                denom_unpop = log_intra_unpop

            l_unpop = -(pos_scores_unpop - denom_unpop).mean()
        else:
            l_unpop = torch.tensor(0.0, device=view1.device)

        return self.gamma * l_pop + (1.0 - self.gamma) * l_unpop

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        """Single training iteration following Algorithm 1 of the paper.

        Computes the three-component loss (Eq. 11):
            L = L_rec  +  λ₁ * L_sa  +  λ₂ * L_cl  +  λ₃ * ||Θ||²

        Args:
            batch (Any): Tuple of (user, pos_item, neg_item) from POS_NEG_LOADER.
            batch_idx (int): Batch index (unused).

        Returns:
            Tensor: Scalar total loss.
        """
        user, pos_item, neg_item = batch

        user_emb, item_emb = self.forward(perturbed=False)
        user_emb_v1, item_emb_v1 = self.forward(perturbed=True)
        user_emb_v2, item_emb_v2 = self.forward(perturbed=True)

        u_emb = user_emb[user]
        pos_emb = item_emb[pos_item]
        neg_emb = item_emb[neg_item]

        pos_scores = torch.mul(u_emb, pos_emb).sum(dim=1)
        neg_scores = torch.mul(u_emb, neg_emb).sum(dim=1)
        rec_loss = self.bpr_loss(pos_scores, neg_scores)

        sa_loss = self._supervised_alignment_loss(user, pos_item, item_emb)

        batch_items = pos_item.unique()
        batch_users = user.unique()
        item_cl_loss = self._reweighting_contrast_loss(
            item_emb_v1, item_emb_v2, batch_items
        )
        user_cl_loss = self.info_nce_loss(
            user_emb_v1[batch_users], user_emb_v2[batch_users]
        )
        cl_loss = 0.5 * (item_cl_loss + user_cl_loss)

        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user),
            self.item_embedding(pos_item),
            self.item_embedding(neg_item),
        )

        loss = rec_loss + self.lambda1 * sa_loss + self.lambda2 * cl_loss + reg_loss

        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        self.log("rec_loss", rec_loss, prog_bar=False, on_step=False, on_epoch=True)
        self.log("sa_loss", sa_loss, prog_bar=False, on_step=False, on_epoch=True)
        self.log("cl_loss", cl_loss, prog_bar=False, on_step=False, on_epoch=True)
        return loss

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Score users against items using the learnt propagated embeddings.

        Prediction score: s(u, i) = z_u^T h_i  (dot product, Section 2.1)

        Args:
            user_indices (Tensor): Batch of user indices [B].
            *args (Any): Ignored.
            item_indices (Optional[Tensor]): If None, scores all items [B, N].
                Otherwise scores the provided candidates [B, K].
            **kwargs (Any): Ignored.

        Returns:
            Tensor: Score matrix of shape [B, N] (full) or [B, K] (sampled).
        """
        user_all_emb, item_all_emb = self.propagate_embeddings()
        user_emb = user_all_emb[user_indices]  # [B, D]

        if item_indices is None:
            # Full prediction over all items (exclude padding index)
            item_emb = item_all_emb[:-1, :]  # [N, D]
            return torch.einsum("be,ie->bi", user_emb, item_emb)
        else:
            # Sampled prediction
            item_emb = item_all_emb[item_indices]  # [B, K, D]
            return torch.einsum("be,bke->bk", user_emb, item_emb)

forward(perturbed=False)

LightGCN graph propagation with an optional perturbed view.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/paac.py
def forward(self, perturbed: bool = False) -> Tuple[Tensor, Tensor]:
    """LightGCN graph propagation with an optional perturbed view."""
    ego_embeddings = self.get_ego_embeddings(
        self.user_embedding, self.item_embedding
    )

    # Move adjacency matrix to current device if needed
    if self.adj.device() != ego_embeddings.device:
        self.adj = self.adj.to(ego_embeddings.device)

    final_embeddings = ego_embeddings.clone()
    current_embeddings = ego_embeddings

    for _ in range(self.n_layers):
        current_embeddings = self.adj.matmul(current_embeddings)
        if perturbed:
            current_embeddings = self._perturb_embedding(current_embeddings)
        final_embeddings.add_(current_embeddings)

    all_embeddings = final_embeddings / (self.n_layers + 1)

    user_all_emb, item_all_emb = torch.split(
        all_embeddings, [self.n_users, self.n_items + 1]
    )
    return user_all_emb, item_all_emb

predict(user_indices, *args, item_indices=None, **kwargs)

Score users against items using the learnt propagated embeddings.

Prediction score: s(u, i) = z_u^T h_i (dot product, Section 2.1)

Parameters:

Name Type Description Default
user_indices Tensor

Batch of user indices [B].

required
*args Any

Ignored.

()
item_indices Optional[Tensor]

If None, scores all items [B, N]. Otherwise scores the provided candidates [B, K].

None
**kwargs Any

Ignored.

{}

Returns:

Name Type Description
Tensor Tensor

Score matrix of shape [B, N] (full) or [B, K] (sampled).

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/paac.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Score users against items using the learnt propagated embeddings.

    Prediction score: s(u, i) = z_u^T h_i  (dot product, Section 2.1)

    Args:
        user_indices (Tensor): Batch of user indices [B].
        *args (Any): Ignored.
        item_indices (Optional[Tensor]): If None, scores all items [B, N].
            Otherwise scores the provided candidates [B, K].
        **kwargs (Any): Ignored.

    Returns:
        Tensor: Score matrix of shape [B, N] (full) or [B, K] (sampled).
    """
    user_all_emb, item_all_emb = self.propagate_embeddings()
    user_emb = user_all_emb[user_indices]  # [B, D]

    if item_indices is None:
        # Full prediction over all items (exclude padding index)
        item_emb = item_all_emb[:-1, :]  # [N, D]
        return torch.einsum("be,ie->bi", user_emb, item_emb)
    else:
        # Sampled prediction
        item_emb = item_all_emb[item_indices]  # [B, K, D]
        return torch.einsum("be,bke->bk", user_emb, item_emb)

training_step(batch, batch_idx)

Single training iteration following Algorithm 1 of the paper.

Computes the three-component loss (Eq. 11): L = L_rec + λ₁ * L_sa + λ₂ * L_cl + λ₃ * ||Θ||²

Parameters:

Name Type Description Default
batch Any

Tuple of (user, pos_item, neg_item) from POS_NEG_LOADER.

required
batch_idx int

Batch index (unused).

required

Returns:

Name Type Description
Tensor Tensor

Scalar total loss.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/paac.py
def training_step(self, batch: Any, batch_idx: int) -> Tensor:
    """Single training iteration following Algorithm 1 of the paper.

    Computes the three-component loss (Eq. 11):
        L = L_rec  +  λ₁ * L_sa  +  λ₂ * L_cl  +  λ₃ * ||Θ||²

    Args:
        batch (Any): Tuple of (user, pos_item, neg_item) from POS_NEG_LOADER.
        batch_idx (int): Batch index (unused).

    Returns:
        Tensor: Scalar total loss.
    """
    user, pos_item, neg_item = batch

    user_emb, item_emb = self.forward(perturbed=False)
    user_emb_v1, item_emb_v1 = self.forward(perturbed=True)
    user_emb_v2, item_emb_v2 = self.forward(perturbed=True)

    u_emb = user_emb[user]
    pos_emb = item_emb[pos_item]
    neg_emb = item_emb[neg_item]

    pos_scores = torch.mul(u_emb, pos_emb).sum(dim=1)
    neg_scores = torch.mul(u_emb, neg_emb).sum(dim=1)
    rec_loss = self.bpr_loss(pos_scores, neg_scores)

    sa_loss = self._supervised_alignment_loss(user, pos_item, item_emb)

    batch_items = pos_item.unique()
    batch_users = user.unique()
    item_cl_loss = self._reweighting_contrast_loss(
        item_emb_v1, item_emb_v2, batch_items
    )
    user_cl_loss = self.info_nce_loss(
        user_emb_v1[batch_users], user_emb_v2[batch_users]
    )
    cl_loss = 0.5 * (item_cl_loss + user_cl_loss)

    reg_loss = self.reg_weight * self.reg_loss(
        self.user_embedding(user),
        self.item_embedding(pos_item),
        self.item_embedding(neg_item),
    )

    loss = rec_loss + self.lambda1 * sa_loss + self.lambda2 * cl_loss + reg_loss

    self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
    self.log("rec_loss", rec_loss, prog_bar=False, on_step=False, on_epoch=True)
    self.log("sa_loss", sa_loss, prog_bar=False, on_step=False, on_epoch=True)
    self.log("cl_loss", cl_loss, prog_bar=False, on_step=False, on_epoch=True)
    return loss

warprec.recommenders.collaborative_filtering_recommender.graph_based.popdcl.PopDCL

Bases: GraphRecommenderUtils, IterativeRecommender

Implementation of PopDCL model from Popularity-aware Debiased Contrastive Loss for Collaborative Filtering (CIKM 2023).

Implements the full PopDCL model from Liu et al., CIKM 2023. The encoder is LightGCN (He et al., SIGIR 2020); the novelty is the loss function that simultaneously corrects: - Positive scores via M+(u,i): reduces the score of positive pairs that are likely false-positives due to popularity bias (Sections 3.3, Eq. 3–6). - Negative scores via M-(u,j): personalizes the debiased contrastive loss using a per-user false-negative probability omega+(u) (Section 3.4, Eq. 8–10).

Both corrections rely solely on item/user popularity (degree in the interaction graph), which is pre-computed from the training set and stored as a fixed buffer.

Parameters:

Name Type Description Default
params dict

Model parameters (see annotated attributes below).

required
info dict

Dataset information dict containing 'n_users' and 'n_items'.

required
interactions Interactions

Training interactions used to build the graph adjacency matrix and precompute popularity statistics.

required
*args Any

Variable length argument list (forwarded to LightningModule).

()
seed int

Random seed for reproducibility. Default: 42.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

POS_NEG_LOADER – yields (user, pos_item, neg_item) triplets. The neg_item column is not used by the loss; in-batch negatives are derived from the pos_item column of each mini-batch (see Section 3.2).

embedding_size int

Dimensionality of user/item embedding vectors.

n_layers int

Number of LightGCN propagation layers.

temperature float

Contrastive temperature parameter tau (Section 3.6).

reg_weight float

L2 regularization coefficient lambda (Eq. 16).

batch_size int

Training mini-batch size.

epochs int

Number of training epochs.

learning_rate float

Adam learning rate.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/popdcl.py
@model_registry.register(name="PopDCL")
class PopDCL(GraphRecommenderUtils, IterativeRecommender):
    """Implementation of PopDCL model from
        Popularity-aware Debiased Contrastive Loss for Collaborative Filtering (CIKM 2023).

    Implements the full PopDCL model from Liu et al., CIKM 2023.  The encoder is
    LightGCN (He et al., SIGIR 2020); the novelty is the loss function that
    simultaneously corrects:
      - **Positive scores** via M+(u,i): reduces the score of positive pairs that
        are likely false-positives due to popularity bias (Sections 3.3, Eq. 3–6).
      - **Negative scores** via M-(u,j): personalizes the debiased contrastive loss
        using a per-user false-negative probability omega+(u) (Section 3.4, Eq. 8–10).

    Both corrections rely solely on item/user popularity (degree in the interaction
    graph), which is pre-computed from the training set and stored as a fixed buffer.

    Args:
        params (dict): Model parameters (see annotated attributes below).
        info (dict): Dataset information dict containing 'n_users' and 'n_items'.
        interactions (Interactions): Training interactions used to build the graph
            adjacency matrix and precompute popularity statistics.
        *args (Any): Variable length argument list (forwarded to LightningModule).
        seed (int): Random seed for reproducibility. Default: 42.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: POS_NEG_LOADER – yields (user, pos_item, neg_item) triplets.
            The neg_item column is not used by the loss; in-batch negatives are derived
            from the pos_item column of each mini-batch (see Section 3.2).
        embedding_size (int): Dimensionality of user/item embedding vectors.
        n_layers (int): Number of LightGCN propagation layers.
        temperature (float): Contrastive temperature parameter tau (Section 3.6).
        reg_weight (float): L2 regularization coefficient lambda (Eq. 16).
        batch_size (int): Training mini-batch size.
        epochs (int): Number of training epochs.
        learning_rate (float): Adam learning rate.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Model hyperparameters
    embedding_size: int
    n_layers: int
    temperature: float  # tau in the paper (Section 3.6)
    reg_weight: float  # lambda in Eq. 16
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # ---- Embedding tables (same structure as LightGCN) ----
        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # ---- Graph adjacency matrix (un-normalized, same as LightGCN) ----
        # ASSUMPTION: LightGCN is used as the GNN encoder; the adjacency matrix is
        # built identically to the canonical LightGCN implementation in WarpRec.
        self.adj = self.get_adj_mat(
            interactions.get_sparse().tocoo(),
            self.n_users,
            self.n_items + 1,  # +1 for padding index
        )

        # ---- LGConv propagation network (K layers) ----
        propagation_network_list = []
        for _ in range(self.n_layers):
            propagation_network_list.append((LGConv(), "x, edge_index -> x"))
        self.propagation_network = torch_geometric.nn.Sequential(
            "x, edge_index", propagation_network_list
        )

        # ---- Precompute popularity statistics from training interactions ----
        # pop(u) = degree of user u  (Section 3.1)
        # pop(i) = degree of item i  (Section 3.1)
        # Both defined as the number of observed interactions (row / column sums
        # of the binary interaction matrix).
        sparse_matrix = interactions.get_sparse()  # shape: [n_users, n_items]

        # User popularity: row sums → [n_users]
        user_pop_np = np.asarray(sparse_matrix.sum(axis=1)).flatten()  # Eq. 3 denom
        # Item popularity: column sums → [n_items]
        item_pop_np = np.asarray(sparse_matrix.sum(axis=0)).flatten()  # Eq. 3 num

        # Total number of interactions N (Eq. 8 denominator)
        # ASSUMPTION: N is the total training set size (nnz of the sparse matrix).
        total_interactions = float(sparse_matrix.nnz)

        # Register as non-trainable buffers so they move to the correct device
        # automatically when model.to(device) is called.
        self.register_buffer(
            "user_pop",
            torch.tensor(user_pop_np, dtype=torch.float32),
        )  # shape: [n_users]
        self.register_buffer(
            "item_pop",
            torch.tensor(
                np.concatenate([item_pop_np, [0.0]]),  # pad slot → pop 0
                dtype=torch.float32,
            ),
        )  # shape: [n_items + 1]

        # ---- omega+(u) = sum_{i in N_u} pop(i) / N  (Eq. 8) ----
        # Precomputed per user; shape: [n_users].
        # This is the personalized false-negative probability.
        # We clamp to [eps, 1 - eps] to keep omega-(u) > 0.
        user_interacted_item_pop_sum = np.array(
            sparse_matrix.multiply(
                np.asarray(sparse_matrix.sum(axis=0))  # item_pop broadcast
            ).sum(axis=1)
        ).flatten()
        # SIMPLIFICATION: sparse.multiply broadcasts item pop as column vector.
        # Equivalent to dot(R_u, pop_item) for each user u, which equals
        # sum_{i in N_u} pop(i).  Result: [n_users].
        omega_plus_np = user_interacted_item_pop_sum / total_interactions
        omega_plus_np = np.clip(omega_plus_np, 1e-7, 1.0 - 1e-7)
        self.register_buffer(
            "omega_plus",
            torch.tensor(omega_plus_np, dtype=torch.float32),
        )  # shape: [n_users]

        # ---- sum_{i' in N_u} pop(i') per user  (Eq. 3 denominator) ----
        # Used inside the loss to compute P(i ∉ N_u).
        # Already captured as user_interacted_item_pop_sum above.
        user_pop_sum_np = user_interacted_item_pop_sum  # same quantity
        self.register_buffer(
            "user_pop_sum",
            torch.tensor(user_pop_sum_np, dtype=torch.float32),
        )  # shape: [n_users]

        # ---- Weight initialization (Xavier, as stated in Section 4.1.4) ----
        self.apply(self._init_weights)

        # ---- Regularization loss ----
        self.reg_loss = EmbLoss()

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def forward(self) -> Tuple[Tensor, Tensor]:
        """LightGCN propagation: layer-wise mean pooling of embeddings.

        Returns:
            Tuple[Tensor, Tensor]: (user_all_embeddings, item_all_embeddings).
        """
        ego_embeddings = self.get_ego_embeddings(
            self.user_embedding, self.item_embedding
        )

        # Move adjacency to the same device as embeddings (lazy migration)
        if self.adj.device() != ego_embeddings.device:
            self.adj = self.adj.to(ego_embeddings.device)

        embeddings_list = [ego_embeddings]
        current_embeddings = ego_embeddings
        for layer_module in self.propagation_network.children():
            current_embeddings = layer_module(current_embeddings, self.adj)
            embeddings_list.append(current_embeddings)

        # Mean pooling across layers 0..K  (LightGCN Eq. 11 / Section 3.2.1)
        lightgcn_all_embeddings = torch.mean(torch.stack(embeddings_list, dim=0), dim=0)

        # Split into user and item sub-matrices
        user_all_embeddings, item_all_embeddings = torch.split(
            lightgcn_all_embeddings,
            [self.n_users, self.n_items + 1],
        )
        return user_all_embeddings, item_all_embeddings

    def _compute_popdcl_loss(
        self,
        user_emb: Tensor,
        item_emb_batch: Tensor,
        user_idx: Tensor,
        pos_item_idx: Tensor,
    ) -> Tensor:
        """Compute the PopDCL contrastive loss for one mini-batch.

        Args:
            user_emb (Tensor): L2-normalized user embeddings for the batch users. shape: [B, d].
            item_emb_batch (Tensor): L2-normalized positive item embeddings; items
                are also used as in-batch negatives for every other user in the batch. shape: [B, d].
            user_idx (Tensor): Integer user IDs (used to fetch popularity stats). shape: [B]
            pos_item_idx (Tensor):  Integer positive-item IDs. shape: [B]

        Returns:
            Tensor: Scalar loss averaged over the batch.
        """
        # B = batch size
        B = user_emb.size(0)

        # -- Score matrix: f(u_b, i_k) for all (b, k) pairs in batch  (Eq. 1) --
        # shape: [B, B]
        # ASSUMPTION: cosine similarity is used as f(u, i) (Section 3.2, just before Eq. 2).
        # Since both embeddings are L2-normalized, cosine = dot-product.
        scores = torch.mm(user_emb, item_emb_batch.t())  # [B, B]

        # Positive scores are on the diagonal: f(u_b, i_b)
        pos_scores = torch.diag(scores)  # [B]

        # ----------------------------------------------------------------
        # Positive score correction: M+(u, i)   (Eqs. 3, 5, 6)
        # ----------------------------------------------------------------

        # f^-(u, i) = mean of scores to all in-batch negatives j ≠ i  (Eq. 5)
        # For efficiency, subtract the positive score and divide by (B-1).
        # SIMPLIFICATION: The paper says the sum is over j ∈ B\{i} but does not
        # explicitly handle B=1 edge cases; we guard with clamp.
        row_sum = scores.sum(dim=1)  # [B] – sum over all j including i
        f_neg_u = (row_sum - pos_scores) / max(B - 1, 1)  # [B] – Eq. 5

        # P(i ∉ N_u) = pop(i) / sum_{i' ∈ N_u} pop(i')   (Eq. 3)
        pop_pos = self.item_pop[pos_item_idx]  # type: ignore[index]
        user_pop_sum_b = self.user_pop_sum[user_idx]  # type: ignore[index]
        # Guard against zero denominator (new users with no interactions)
        # ASSUMPTION: user_pop_sum is always > 0 for users in training set.
        p_false_pos = pop_pos / user_pop_sum_b.clamp(min=1.0)  # [B] – Eq. 3

        # M+(u, i) = sigma(P(i ∉ N_u) * f^-(u, i))   (Eq. 6)
        m_plus = torch.sigmoid(p_false_pos * f_neg_u)  # [B] – Eq. 6

        # ----------------------------------------------------------------
        # Negative score correction: M-(u, j)   (Eqs. 8, 10)
        # ----------------------------------------------------------------

        # omega+(u) per user in batch  (Eq. 8, precomputed)
        omega_p = self.omega_plus[user_idx]  # type: ignore[index]
        omega_m = 1.0 - omega_p  # [B] – omega-(u)

        # M-(u, j) = omega+(u) / omega-(u) * exp(1/tau * [f(u,i) - f(u,j)])  (Eq. 10)
        # shape: [B, B]  – one row per user, one column per in-batch negative j
        #
        # The Maclaurin expansion in the paper yields this closed-form for M-(u,j).
        # pos_scores[:, None]  → [B, 1] broadcasts over the j dimension.
        ratio = (omega_p / omega_m).unsqueeze(1)  # [B, 1]
        delta_f = pos_scores.unsqueeze(1) - scores  # [B, B] – f(u,i) - f(u,j)
        m_minus = ratio * torch.exp(delta_f / self.temperature)  # [B, B] – Eq. 10

        # ----------------------------------------------------------------
        # Full PopDCL loss  (Eq. 1, rewritten in log form for stability)
        # ----------------------------------------------------------------
        # L(u, i) = log(1 + sum_{j ≠ i} exp(1/tau * [f(u,j) - M-(u,j)
        #                                              - (f(u,i) - M+(u,i))]))
        #         = log(1 + sum_{j ≠ i} exp(1/tau * [Δf - ΔM]))   (Eq. 14)
        #
        # where:
        #   corrected_pos_score[b]   = (pos_scores[b] - m_plus[b]) / tau
        #   corrected_neg_score[b,k] = (scores[b,k]   - m_minus[b,k]) / tau  for k ≠ b

        # Numerator in Eq. 1 (in log-space):
        #   log_num[b] = (f(u_b, i_b) - M+(u_b, i_b)) / tau
        log_num = (pos_scores - m_plus) / self.temperature  # [B]

        # Build corrected negative logits: (f(u,j) - M-(u,j)) / tau
        corrected_neg = (scores - m_minus) / self.temperature  # [B, B]

        # Mask out the positive pair (diagonal) so it doesn't appear in the sum
        # over negatives.
        diag_mask = torch.eye(B, dtype=torch.bool, device=scores.device)
        corrected_neg = corrected_neg.masked_fill(diag_mask, float("-inf"))

        # Denominator in Eq. 1 (numerator + sum of negatives), computed via
        # logsumexp for numerical stability:
        #   log_denom[b] = logsumexp(
        #       [log_num[b], corrected_neg[b,0], ..., corrected_neg[b,B-1]]
        #   )
        all_logits = torch.cat([log_num.unsqueeze(1), corrected_neg], dim=1)  # [B, B+1]
        log_denom = torch.logsumexp(all_logits, dim=1)  # [B]

        # Per-sample loss: - log( exp(log_num) / exp(log_denom) )
        #                = log_denom - log_num
        loss_per_sample = log_denom - log_num  # [B] – Eq. 1

        return loss_per_sample.mean()

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        """One training iteration.

        The standard WarpRec contrastive dataloader provides
        (user, pos_item, neg_item) triplets.  neg_item is ignored here because
        PopDCL constructs negatives in-batch from the pos_item column.

        Args:
            batch (Any): Triplet (user [B], pos_item [B], neg_item [B]).
            batch_idx (int): Current batch index.

        Returns:
            Tensor: Scalar training loss.
        """
        user, pos_item, _ = batch  # neg_item unused — in-batch strategy

        # Get full propagated embeddings from LightGCN encoder
        user_all_embeddings, item_all_embeddings = self.forward()

        # Gather per-batch embeddings
        u_emb = user_all_embeddings[user]  # [B, d]
        i_emb = item_all_embeddings[pos_item]  # [B, d]

        # L2-normalize: required by Eq. 5 and stated explicitly in Section 3.3
        # ("we use normalization to stabilize contrastive learning, meaning that
        # the user embedding e_u and item embedding e_i are both l2-normalized")
        u_emb = F.normalize(u_emb, p=2, dim=-1)  # Sec. 3.3
        i_emb = F.normalize(i_emb, p=2, dim=-1)  # Sec. 3.3

        # --- PopDCL contrastive loss (Eqs. 1–10, 14) ---
        pop_loss = self._compute_popdcl_loss(
            user_emb=u_emb,
            item_emb_batch=i_emb,
            user_idx=user,
            pos_item_idx=pos_item,
        )

        # --- L2 regularization on initial (un-propagated) embeddings (Eq. 16) ---
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user),
            self.item_embedding(pos_item),
        )  # Eq. 16

        loss = pop_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Compute recommendation scores for the given users.

        At inference time, scores are plain cosine similarities between the
        propagated user and item embeddings (no correction applied).
        ASSUMPTION: Corrections are applied only during training to debias the
        loss; the final embeddings are used directly for ranking (consistent with
        how BC_loss, DCL, HCL are evaluated — all use plain inner-product/cosine
        at test time).

        Args:
            user_indices (Tensor): Batch of user IDs.
            *args (Any): Unused positional arguments.
            item_indices (Optional[Tensor]): If None, scores against all items are
                returned.  Otherwise, scores for the sampled item sub-set.
            **kwargs (Any): Unused keyword arguments.

        Returns:
            Tensor: Score matrix [batch_size, n_items] or [batch_size, n_samples].
        """
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        # L2-normalize embeddings for cosine similarity (consistent with training)
        user_embeddings = F.normalize(
            user_all_embeddings[user_indices], p=2, dim=-1
        )  # [B, d]

        if item_indices is None:
            # Full ranking – score against all n_items (drop padding slot)
            item_embeddings = F.normalize(
                item_all_embeddings[:-1, :], p=2, dim=-1
            )  # [n_items, d]
            return torch.einsum("be,ie->bi", user_embeddings, item_embeddings)

        # Sampled ranking
        item_embeddings = F.normalize(
            item_all_embeddings[item_indices], p=2, dim=-1
        )  # [B, S, d]
        return torch.einsum("be,bse->bs", user_embeddings, item_embeddings)

forward()

LightGCN propagation: layer-wise mean pooling of embeddings.

Returns:

Type Description
Tuple[Tensor, Tensor]

Tuple[Tensor, Tensor]: (user_all_embeddings, item_all_embeddings).

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/popdcl.py
def forward(self) -> Tuple[Tensor, Tensor]:
    """LightGCN propagation: layer-wise mean pooling of embeddings.

    Returns:
        Tuple[Tensor, Tensor]: (user_all_embeddings, item_all_embeddings).
    """
    ego_embeddings = self.get_ego_embeddings(
        self.user_embedding, self.item_embedding
    )

    # Move adjacency to the same device as embeddings (lazy migration)
    if self.adj.device() != ego_embeddings.device:
        self.adj = self.adj.to(ego_embeddings.device)

    embeddings_list = [ego_embeddings]
    current_embeddings = ego_embeddings
    for layer_module in self.propagation_network.children():
        current_embeddings = layer_module(current_embeddings, self.adj)
        embeddings_list.append(current_embeddings)

    # Mean pooling across layers 0..K  (LightGCN Eq. 11 / Section 3.2.1)
    lightgcn_all_embeddings = torch.mean(torch.stack(embeddings_list, dim=0), dim=0)

    # Split into user and item sub-matrices
    user_all_embeddings, item_all_embeddings = torch.split(
        lightgcn_all_embeddings,
        [self.n_users, self.n_items + 1],
    )
    return user_all_embeddings, item_all_embeddings

predict(user_indices, *args, item_indices=None, **kwargs)

Compute recommendation scores for the given users.

At inference time, scores are plain cosine similarities between the propagated user and item embeddings (no correction applied). ASSUMPTION: Corrections are applied only during training to debias the loss; the final embeddings are used directly for ranking (consistent with how BC_loss, DCL, HCL are evaluated — all use plain inner-product/cosine at test time).

Parameters:

Name Type Description Default
user_indices Tensor

Batch of user IDs.

required
*args Any

Unused positional arguments.

()
item_indices Optional[Tensor]

If None, scores against all items are returned. Otherwise, scores for the sampled item sub-set.

None
**kwargs Any

Unused keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

Score matrix [batch_size, n_items] or [batch_size, n_samples].

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/popdcl.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Compute recommendation scores for the given users.

    At inference time, scores are plain cosine similarities between the
    propagated user and item embeddings (no correction applied).
    ASSUMPTION: Corrections are applied only during training to debias the
    loss; the final embeddings are used directly for ranking (consistent with
    how BC_loss, DCL, HCL are evaluated — all use plain inner-product/cosine
    at test time).

    Args:
        user_indices (Tensor): Batch of user IDs.
        *args (Any): Unused positional arguments.
        item_indices (Optional[Tensor]): If None, scores against all items are
            returned.  Otherwise, scores for the sampled item sub-set.
        **kwargs (Any): Unused keyword arguments.

    Returns:
        Tensor: Score matrix [batch_size, n_items] or [batch_size, n_samples].
    """
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    # L2-normalize embeddings for cosine similarity (consistent with training)
    user_embeddings = F.normalize(
        user_all_embeddings[user_indices], p=2, dim=-1
    )  # [B, d]

    if item_indices is None:
        # Full ranking – score against all n_items (drop padding slot)
        item_embeddings = F.normalize(
            item_all_embeddings[:-1, :], p=2, dim=-1
        )  # [n_items, d]
        return torch.einsum("be,ie->bi", user_embeddings, item_embeddings)

    # Sampled ranking
    item_embeddings = F.normalize(
        item_all_embeddings[item_indices], p=2, dim=-1
    )  # [B, S, d]
    return torch.einsum("be,bse->bs", user_embeddings, item_embeddings)

training_step(batch, batch_idx)

One training iteration.

The standard WarpRec contrastive dataloader provides (user, pos_item, neg_item) triplets. neg_item is ignored here because PopDCL constructs negatives in-batch from the pos_item column.

Parameters:

Name Type Description Default
batch Any

Triplet (user [B], pos_item [B], neg_item [B]).

required
batch_idx int

Current batch index.

required

Returns:

Name Type Description
Tensor Tensor

Scalar training loss.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/popdcl.py
def training_step(self, batch: Any, batch_idx: int) -> Tensor:
    """One training iteration.

    The standard WarpRec contrastive dataloader provides
    (user, pos_item, neg_item) triplets.  neg_item is ignored here because
    PopDCL constructs negatives in-batch from the pos_item column.

    Args:
        batch (Any): Triplet (user [B], pos_item [B], neg_item [B]).
        batch_idx (int): Current batch index.

    Returns:
        Tensor: Scalar training loss.
    """
    user, pos_item, _ = batch  # neg_item unused — in-batch strategy

    # Get full propagated embeddings from LightGCN encoder
    user_all_embeddings, item_all_embeddings = self.forward()

    # Gather per-batch embeddings
    u_emb = user_all_embeddings[user]  # [B, d]
    i_emb = item_all_embeddings[pos_item]  # [B, d]

    # L2-normalize: required by Eq. 5 and stated explicitly in Section 3.3
    # ("we use normalization to stabilize contrastive learning, meaning that
    # the user embedding e_u and item embedding e_i are both l2-normalized")
    u_emb = F.normalize(u_emb, p=2, dim=-1)  # Sec. 3.3
    i_emb = F.normalize(i_emb, p=2, dim=-1)  # Sec. 3.3

    # --- PopDCL contrastive loss (Eqs. 1–10, 14) ---
    pop_loss = self._compute_popdcl_loss(
        user_emb=u_emb,
        item_emb_batch=i_emb,
        user_idx=user,
        pos_item_idx=pos_item,
    )

    # --- L2 regularization on initial (un-propagated) embeddings (Eq. 16) ---
    reg_loss = self.reg_weight * self.reg_loss(
        self.user_embedding(user),
        self.item_embedding(pos_item),
    )  # Eq. 16

    loss = pop_loss + reg_loss
    self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
    return loss

warprec.recommenders.collaborative_filtering_recommender.graph_based.recdcl.RecDCL

Bases: GraphRecommenderUtils, IterativeRecommender

Implementation of RecDCL model from "RecDCL: Dual Contrastive Learning for Recommendation" (WWW 2024).

Implements the full RecDCL framework (Zhang et al., WWW 2024) which combines: - FCL objective (feature-wise CL): - UIBT: Barlow-Twins-style cross-correlation loss between user and item embeddings to eliminate inter-user/item redundancy (Eq. 5). - UUII: Polynomial-kernel uniformity loss applied within the user and item embedding matrices along the feature dimension (Eq. 6). - BCL objective (batch-wise CL): - Historical-embedding output augmentation inspired by SimSiam, using online and target networks with shared graph encoder (Eqs. 8–9).

The total training objective is (Eq. 10): L = L_UIBT + alpha * L_UUII + beta * L_BCL

The encoder is a 2-layer LightGCN (He et al., SIGIR 2020). Embeddings are L2-normalized before all loss computations (Algorithm 1).

Parameters:

Name Type Description Default
params dict

Model parameters (see annotated attributes below).

required
info dict

Dataset information dict containing 'n_users' and 'n_items'.

required
interactions Interactions

Training interactions used to build the graph adjacency matrix.

required
*args Any

Variable length argument list (forwarded to LightningModule).

()
seed int

Random seed for reproducibility. Default: 42.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

POS_LOADER – yields (user, pos_item) pairs. RecDCL does not require negative sampling (Section 4.2 / Table 1).

embedding_size int

Dimensionality of user/item embedding vectors (F). Best results at large F (2048); see Section D.7 / Figure 4.

n_layers int

Number of LightGCN propagation layers. Default: 2.

gamma float

Redundancy-reduction weight in UIBT (Eq. 5). Default: 0.01.

alpha float

Coefficient for UUII loss in total objective (Eq. 10). Default: 0.2 (best for Beauty/Game; see Table 13 and Figure 5).

beta float

Coefficient for BCL loss in total objective (Eq. 10). Default: 5 (best for Beauty/Yelp; see Table 13 and Figure 6).

tau_momentum float

Momentum ratio for historical embedding blending in Eq. 8. Default: 0.1 (best for Beauty; Table 13 / Figure 8).

poly_a float

Polynomial kernel coefficient a for UUII (Eq. 6). Default: 1.

poly_c float

Polynomial kernel offset c for UUII (Eq. 6). Default: 1e-7.

poly_e int

Polynomial kernel exponent e for UUII (Eq. 6). Default: 4.

batch_size int

Training mini-batch size. Default 256 for Beauty, 1024 for Food/Game/Yelp (Appendix D.3).

epochs int

Number of training epochs.

learning_rate float

Adam learning rate. Default: 0.001 (Appendix D.3).

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/recdcl.py
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
@model_registry.register(name="RecDCL")
class RecDCL(GraphRecommenderUtils, IterativeRecommender):
    """Implementation of RecDCL model
    from "RecDCL: Dual Contrastive Learning for Recommendation" (WWW 2024).

    Implements the full RecDCL framework (Zhang et al., WWW 2024) which combines:
      - **FCL objective** (feature-wise CL):
          - UIBT: Barlow-Twins-style cross-correlation loss between user and item
            embeddings to eliminate inter-user/item redundancy (Eq. 5).
          - UUII: Polynomial-kernel uniformity loss applied *within* the user
            and item embedding matrices along the feature dimension (Eq. 6).
      - **BCL objective** (batch-wise CL):
          - Historical-embedding output augmentation inspired by SimSiam, using
            online and target networks with shared graph encoder (Eqs. 8–9).

    The total training objective is (Eq. 10):
        L = L_UIBT + alpha * L_UUII + beta * L_BCL

    The encoder is a 2-layer LightGCN (He et al., SIGIR 2020).  Embeddings are
    L2-normalized before all loss computations (Algorithm 1).

    Args:
        params (dict): Model parameters (see annotated attributes below).
        info (dict): Dataset information dict containing 'n_users' and 'n_items'.
        interactions (Interactions): Training interactions used to build the
            graph adjacency matrix.
        *args (Any): Variable length argument list (forwarded to LightningModule).
        seed (int): Random seed for reproducibility. Default: 42.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: POS_LOADER – yields (user, pos_item) pairs.
            RecDCL does not require negative sampling (Section 4.2 / Table 1).
        embedding_size (int): Dimensionality of user/item embedding vectors (F).
            Best results at large F (2048); see Section D.7 / Figure 4.
        n_layers (int): Number of LightGCN propagation layers. Default: 2.
        gamma (float): Redundancy-reduction weight in UIBT (Eq. 5). Default: 0.01.
        alpha (float): Coefficient for UUII loss in total objective (Eq. 10).
            Default: 0.2 (best for Beauty/Game; see Table 13 and Figure 5).
        beta (float): Coefficient for BCL loss in total objective (Eq. 10).
            Default: 5 (best for Beauty/Yelp; see Table 13 and Figure 6).
        tau_momentum (float): Momentum ratio for historical embedding blending
            in Eq. 8. Default: 0.1 (best for Beauty; Table 13 / Figure 8).
        poly_a (float): Polynomial kernel coefficient a for UUII (Eq. 6). Default: 1.
        poly_c (float): Polynomial kernel offset c for UUII (Eq. 6). Default: 1e-7.
        poly_e (int): Polynomial kernel exponent e for UUII (Eq. 6). Default: 4.
        batch_size (int): Training mini-batch size. Default 256 for Beauty, 1024
            for Food/Game/Yelp (Appendix D.3).
        epochs (int): Number of training epochs.
        learning_rate (float): Adam learning rate. Default: 0.001 (Appendix D.3).
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.POS_DATALOADER

    # Model hyperparameters
    embedding_size: int
    n_layers: int
    gamma: float
    alpha: float
    beta: float
    tau_momentum: float
    poly_a: float
    poly_c: float
    poly_e: int
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # ---- Embedding tables (same structure as LightGCN) ---- Algorithm 1
        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # ---- Graph adjacency matrix ----
        # ASSUMPTION: The LightGCN adjacency is built without explicit symmetric
        # normalization here (same as the canonical WarpRec LightGCN); LGConv
        # internally applies the degree normalization during message passing.
        self.adj = self.get_adj_mat(
            interactions.get_sparse().tocoo(),
            self.n_users,
            self.n_items + 1,  # +1 for padding idx
        )

        # ---- LGConv propagation network (L layers) ----
        # Section 4.2: "we adopt [LightGCN] as the graph encoder f_theta"
        propagation_network_list = []
        for _ in range(self.n_layers):
            propagation_network_list.append((LGConv(), "x, edge_index -> x"))
        self.propagation_network = torch_geometric.nn.Sequential(
            "x, edge_index", propagation_network_list
        )

        # ---- BCL projector h(·) — Figure 3 / Eq. 9 ----
        self.projector = Projector(self.embedding_size)

        # ---- Historical embedding buffers for BCL (Eq. 8) ----
        # Initialized to zeros; updated at the end of each forward pass.
        # ASSUMPTION: "historical embeddings from prior training iterations"
        # (Section 4.2) means the embeddings from the immediately preceding
        # batch/step, stored as a detached copy.  This matches the description
        # in [3, 9, 54] cited in the paper and the GNNAutoScale approach.
        self.register_buffer(
            "hist_user_emb",
            torch.zeros(self.n_users, self.embedding_size),
        )
        self.register_buffer(
            "hist_item_emb",
            torch.zeros(self.n_items + 1, self.embedding_size),
        )
        # Track whether historical buffers have been populated yet
        self._hist_initialized: bool = False

        # ---- Xavier weight initialization ---- (Appendix D.3 / [11])
        self.apply(self._init_weights)

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_positive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def _uibt_loss(
        self,
        e_u: Tensor,
        e_i: Tensor,
        gamma: float,
    ) -> Tensor:
        """Feature-wise cross-correlation loss between users and items (UIBT).

        Extends Barlow Twins [47] to user-item alignment for recommendations.
        The cross-correlation matrix C is built between the user embedding matrix
        E_U and item embedding matrix E_I along the feature dimension.

        The loss has two terms scaled by 1/F (Eq. 5):
        - Invariance term : (1 - C_mm)^2  — drives diagonal to 1.
        - Redundancy term : gamma * C_mn^2 for m != n — drives off-diagonal to 0.

        Args:
            e_u (Tensor): L2-normalized user embeddings  [B, F].
            e_i (Tensor): L2-normalized item embeddings  [B, F].
            gamma (float): Weight on the redundancy-reduction term (Eq. 5).

        Returns:
            Tensor: Scalar UIBT loss.
        """
        # Eq. 5 — Section 4.1 ("Eliminate redundancy between users and items")
        B, F = e_u.shape

        # Cross-correlation matrix C [F, F] — Eq. 3 / Eq. 5
        # C_mn = (E_U[:, m])^T E_I[:, n] / B
        # ASSUMPTION: batch-normalization of the projector output (see Figure 3)
        # means embeddings reaching here are approximately zero-mean.
        # The paper divides by B (batch size) rather than by ||Z^:m|| * ||Z_hat^:n||
        # as in the original Barlow Twins; Algorithm 2 shows: C = mm(e_u.T, e_i).div(B)
        C = torch.mm(e_u.t(), e_i) / B  # [F, F] — Algorithm 2

        # Invariance term: (1 - C_mm)^2 summed, divided by F — Eq. 5
        on_diag = torch.diagonal(C).add_(-1).pow_(2).sum().div(F)

        # Redundancy reduction term: gamma * sum_{m != n} C_mn^2 / F — Eq. 5
        # off_diagonal mask: all elements minus diagonal
        off_diag = C.clone()
        diag_idx = torch.arange(F, device=C.device)
        off_diag[diag_idx, diag_idx] = 0.0
        off_diag_loss = gamma * off_diag.pow(2).sum().div(F)

        return on_diag + off_diag_loss  # Eq. 5

    def _uuii_loss(
        self,
        e_u: Tensor,
        e_i: Tensor,
        poly_a: float,
        poly_c: float,
        poly_e: int,
    ) -> Tensor:
        """Feature-wise uniformity loss within users and within items (UUII).

        Uses a polynomial kernel to drive feature-column representations on the
        user and item hyperspheres toward uniformity. Computed separately for users
        and items, then averaged (Eq. 6 / Section 4.1 "Eliminate redundancy within
        users and items").

        Kernel:  k(z_a, z_b) = (a * z_a^T z_b + c)^e

        The loss is the log of the mean pairwise kernel value over all pairs of
        feature columns within the user (resp. item) embedding matrix.

        Args:
            e_u (Tensor): L2-normalized user embeddings  [B, F].
            e_i (Tensor): L2-normalized item embeddings  [B, F].
            poly_a (float): Polynomial kernel coefficient a. Default 1.
            poly_c (float): Polynomial kernel offset c. Default 1e-7.
            poly_e (int): Polynomial kernel exponent e. Default 4.

        Returns:
            Tensor: Scalar UUII loss.
        """
        # Eq. 6 — Section 4.1
        # Note: the summation/mean is over FEATURE columns (m != n), not batch samples.
        # Algorithm 2: L_uni = mm(e_i.T, e_i).add_(c).pow_(e).mean().log()
        # The 1/2 factor comes from averaging user and item terms.

        def _uni(e: Tensor) -> Tensor:
            # Gram matrix of feature columns: [F, F]
            gram = torch.mm(e.t(), e)  # E^T * E in Algorithm 2
            # Apply polynomial kernel and compute log-mean — Algorithm 2
            return (poly_a * gram + poly_c).pow(poly_e).mean().log()

        return 0.5 * _uni(e_u) + 0.5 * _uni(e_i)  # Eq. 6

    def _bcl_loss(
        self,
        e_u: Tensor,
        e_i: Tensor,
        e_u_hist: Tensor,
        e_i_hist: Tensor,
        tau_momentum: float,
        projector: nn.Module,
    ) -> Tensor:
        """Batch-wise contrastive loss with historical-embedding augmentation (BCL).

        Follows the SimSiam-style asymmetric design (Zhang et al. [48]):
        1. Blend historical and current embeddings to form perturbed views (Eq. 8).
        2. Apply stop-gradient to the perturbed (target-network) side.
        3. Compute cosine distance between the projected current embedding and the
            stop-gradient perturbed embedding (Eq. 9).
        4. Average user→item and item→user directions (Algorithm 2 / Eq. 9).

        Args:
            e_u (Tensor): Current user embeddings  [B, F].
            e_i (Tensor): Current item embeddings  [B, F].
            e_u_hist (Tensor): Historical user embeddings from the target network [B, F].
            e_i_hist (Tensor): Historical item embeddings from the target network [B, F].
            tau_momentum (float): Momentum coefficient tau controlling historical
                embedding weight in Eq. 8.
            projector (nn.Module): The shared MLP projector h(·) (Figure 3).

        Returns:
            Tensor: Scalar BCL loss.
        """
        # Eq. 8 — perturbed augmented representations using historical embeddings
        # e_hat_u = tau * e_u_hist + (1 - tau) * e_u
        e_hat_u = tau_momentum * e_u_hist + (1.0 - tau_momentum) * e_u  # Eq. 8
        e_hat_i = tau_momentum * e_i_hist + (1.0 - tau_momentum) * e_i  # Eq. 8

        # Eq. 9 — asymmetric cosine-distance with stop-gradient on the target side
        # L_BCL = 1/2 * S(h(E_U), sg(E_hat_I)) + 1/2 * S(sg(E_hat_U), h(E_I))
        # S(a, b) = negative cosine similarity (cosine *distance*)
        # ASSUMPTION: cosine distance S = 1 - cos_sim is the intended metric;
        # the paper writes "S(·, ·) denotes the cosine distance" (Section 4.2).
        def _cos_distance(a: Tensor, b: Tensor) -> Tensor:
            return 1.0 - F.cosine_similarity(a, b, dim=-1).mean()

        # h(E_U): projected current user embeddings — applied to current side only
        p_u = projector(e_u)  # online projection
        p_i = projector(e_i)  # online projection

        # Stop gradient on the augmented (target) side — Eq. 9 / Figure 2 (d)
        # Algorithm 2: L_aug = S(h(e_u), sg(e_hat_i))
        loss_u = _cos_distance(p_u, e_hat_i.detach())  # sg(E_hat_I) — Eq. 9
        loss_i = _cos_distance(p_i, e_hat_u.detach())  # sg(E_hat_U) — Eq. 9

        return 0.5 * loss_u + 0.5 * loss_i  # Algorithm 2 outer loop

    def forward(self) -> Tuple[Tensor, Tensor]:
        """LightGCN propagation: layer-wise mean pooling of embeddings.

        Implements the standard LightGCN forward pass used as the backbone
        encoder f_theta in RecDCL (Section 4.2).

        Returns:
            Tuple[Tensor, Tensor]: (user_all_embeddings [n_users, F],
                                    item_all_embeddings [n_items+1, F]).
        """
        ego_embeddings = self.get_ego_embeddings(
            self.user_embedding, self.item_embedding
        )

        # Ensure adjacency matrix is on the same device as embeddings
        if self.adj.device() != ego_embeddings.device:
            self.adj = self.adj.to(ego_embeddings.device)

        embeddings_list = [ego_embeddings]
        current_embeddings = ego_embeddings

        # L-layer message passing — LightGCN backbone (He et al. SIGIR 2020)
        for layer_module in self.propagation_network.children():
            current_embeddings = layer_module(current_embeddings, self.adj)
            embeddings_list.append(current_embeddings)

        # Mean pooling across layers 0 … L (equivalent to alpha=1/(L+1) weighting)
        all_embeddings = torch.mean(torch.stack(embeddings_list, dim=0), dim=0)

        # Split into user and item sub-matrices
        user_all_embeddings, item_all_embeddings = torch.split(
            all_embeddings, [self.n_users, self.n_items + 1]
        )
        return user_all_embeddings, item_all_embeddings

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        """One training iteration executing all three RecDCL loss components.

        Follows Algorithm 1 and Algorithm 2 from Appendix D.4:
          1. Encode via LightGCN to obtain E_U and E_I.
          2. L2-normalize embeddings (Algorithm 1 lines 4-5).
          3. Compute L_UIBT (Eq. 5), L_UUII (Eq. 6), L_BCL (Eq. 9).
          4. Combine with trade-off coefficients (Eq. 10).
          5. Update historical embedding buffers for the next step.

        Args:
            batch (Any): Triplet (user [B], pos_item [B], neg_item [B]).
                neg_item is discarded — RecDCL is a negative-sampling-free method.
            batch_idx (int): Current batch index.

        Returns:
            Tensor: Scalar training loss.
        """
        user, pos_item = batch

        # ---- Step 1: Encode ---- Algorithm 1 line 3
        user_all_emb, item_all_emb = self.forward()

        # Gather batch-level embeddings
        e_u = user_all_emb[user]  # [B, F] — E_U in the paper
        e_i = item_all_emb[pos_item]  # [B, F] — E_I in the paper

        # ---- Step 2: L2-normalize ---- Algorithm 1 lines 4-5
        # "Normalize e_u: e_u = e_u / ||e_u||"
        e_u = F.normalize(e_u, p=2, dim=-1)  # Algorithm 1 line 4
        e_i = F.normalize(e_i, p=2, dim=-1)  # Algorithm 1 line 5

        # ---- Step 3a: FCL — UIBT loss (Eq. 5) ---- Algorithm 1 line 6
        loss_uibt = self._uibt_loss(e_u, e_i, self.gamma)  # Eq. 5 / Algorithm 2

        # ---- Step 3b: FCL — UUII loss (Eq. 6) ---- Algorithm 1 line 7
        # Algorithm 2: L_UUII = UUII(e_u) / 2 + UUII(e_i) / 2
        loss_uuii = self._uuii_loss(
            e_u,
            e_i,
            poly_a=self.poly_a,
            poly_c=self.poly_c,
            poly_e=self.poly_e,
        )  # Eq. 6

        # ---- Step 3c: BCL loss (Eq. 9) ---- Algorithm 1 line 8
        # Historical embeddings are used to form the perturbed augmented view (Eq. 8).
        # On the very first step they are zeroed; we fall back to the current embeddings
        # as a warm-up (equivalent to tau_momentum=0 for the first iteration).
        if not self._hist_initialized:
            # ASSUMPTION: first-step fallback — use current embeddings as history
            # so Eq. 8 reduces to e_hat = current embedding.
            e_u_hist = e_u.detach()
            e_i_hist = e_i.detach()
        else:
            e_u_hist = self.hist_user_emb[user]  # type: ignore[index]
            e_i_hist = self.hist_item_emb[pos_item]  # type: ignore[index]

        # BCL uses the raw (non-normalized) embeddings for perturbed views.
        # ASSUMPTION: Eq. 8 operates on the propagated (possibly normalized) embeddings.
        # We apply normalization after blending for consistency with Algorithm 1.
        loss_bcl = self._bcl_loss(
            e_u,
            e_i,
            e_u_hist,
            e_i_hist,
            tau_momentum=self.tau_momentum,
            projector=self.projector,
        )  # Eq. 9 / Algorithm 2

        # ---- Step 4: Total loss (Eq. 10) ---- Algorithm 1 line 9
        # L = L_UIBT + alpha * L_UUII + beta * L_BCL
        loss = loss_uibt + self.alpha * loss_uuii + self.beta * loss_bcl  # Eq. 10

        # ---- Step 5: Update historical embedding buffers ----
        # Store the propagated (non-normalized) embeddings for the *next* step.
        # ASSUMPTION: historical embeddings are the full-graph propagated vectors
        # (not just the batch slice) to match the "target network" framing in
        # Section 4.2 ("online and target networks share the same graph encoder").
        # Storing only batch slices would be inconsistent across batches; storing
        # the full propagated matrix is more faithful to the momentum update style.
        with torch.no_grad():
            # SIMPLIFICATION: We update only the rows corresponding to users/items
            # seen in this batch to avoid storing a full [n_users+n_items, F] copy
            # every step, which would dominate memory for large catalogues.
            self.hist_user_emb[user] = user_all_emb[user].detach()  # type: ignore[operator]
            self.hist_item_emb[pos_item] = item_all_emb[pos_item].detach()  # type: ignore[operator]
            self._hist_initialized = True

        # ---- Logging ----
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        self.log("loss_uibt", loss_uibt, prog_bar=False, on_step=False, on_epoch=True)
        self.log("loss_uuii", loss_uuii, prog_bar=False, on_step=False, on_epoch=True)
        self.log("loss_bcl", loss_bcl, prog_bar=False, on_step=False, on_epoch=True)
        return loss

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Compute recommendation scores for the given users.

        At inference time, scores are computed as inner products between the
        propagated user and item embeddings.  No contrastive corrections are
        applied — the FCL/BCL objectives are purely training-time regularizers.

        Section 4.3: "we calculate the ranking score function by using the inner
        product between user and item representations."

        Args:
            user_indices (Tensor): Batch of user IDs [B].
            *args (Any): Unused positional arguments.
            item_indices (Optional[Tensor]): If None, scores against all items
                are returned [B, n_items].  Otherwise, scores for the sampled
                item sub-set [B, S].
            **kwargs (Any): Unused keyword arguments.

        Returns:
            Tensor: Score matrix [B, n_items] or [B, S].
        """
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        user_embeddings = user_all_embeddings[user_indices]  # [B, F]

        if item_indices is None:
            # Full ranking — score against all n_items (drop padding slot)
            item_embeddings = item_all_embeddings[:-1, :]  # [n_items, F]
            return torch.einsum("be,ie->bi", user_embeddings, item_embeddings)

        # Sampled ranking
        item_embeddings = item_all_embeddings[item_indices]  # [B, S, F]
        return torch.einsum("be,bse->bs", user_embeddings, item_embeddings)

forward()

LightGCN propagation: layer-wise mean pooling of embeddings.

Implements the standard LightGCN forward pass used as the backbone encoder f_theta in RecDCL (Section 4.2).

Returns:

Type Description
Tuple[Tensor, Tensor]

Tuple[Tensor, Tensor]: (user_all_embeddings [n_users, F], item_all_embeddings [n_items+1, F]).

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/recdcl.py
def forward(self) -> Tuple[Tensor, Tensor]:
    """LightGCN propagation: layer-wise mean pooling of embeddings.

    Implements the standard LightGCN forward pass used as the backbone
    encoder f_theta in RecDCL (Section 4.2).

    Returns:
        Tuple[Tensor, Tensor]: (user_all_embeddings [n_users, F],
                                item_all_embeddings [n_items+1, F]).
    """
    ego_embeddings = self.get_ego_embeddings(
        self.user_embedding, self.item_embedding
    )

    # Ensure adjacency matrix is on the same device as embeddings
    if self.adj.device() != ego_embeddings.device:
        self.adj = self.adj.to(ego_embeddings.device)

    embeddings_list = [ego_embeddings]
    current_embeddings = ego_embeddings

    # L-layer message passing — LightGCN backbone (He et al. SIGIR 2020)
    for layer_module in self.propagation_network.children():
        current_embeddings = layer_module(current_embeddings, self.adj)
        embeddings_list.append(current_embeddings)

    # Mean pooling across layers 0 … L (equivalent to alpha=1/(L+1) weighting)
    all_embeddings = torch.mean(torch.stack(embeddings_list, dim=0), dim=0)

    # Split into user and item sub-matrices
    user_all_embeddings, item_all_embeddings = torch.split(
        all_embeddings, [self.n_users, self.n_items + 1]
    )
    return user_all_embeddings, item_all_embeddings

predict(user_indices, *args, item_indices=None, **kwargs)

Compute recommendation scores for the given users.

At inference time, scores are computed as inner products between the propagated user and item embeddings. No contrastive corrections are applied — the FCL/BCL objectives are purely training-time regularizers.

Section 4.3: "we calculate the ranking score function by using the inner product between user and item representations."

Parameters:

Name Type Description Default
user_indices Tensor

Batch of user IDs [B].

required
*args Any

Unused positional arguments.

()
item_indices Optional[Tensor]

If None, scores against all items are returned [B, n_items]. Otherwise, scores for the sampled item sub-set [B, S].

None
**kwargs Any

Unused keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

Score matrix [B, n_items] or [B, S].

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/recdcl.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Compute recommendation scores for the given users.

    At inference time, scores are computed as inner products between the
    propagated user and item embeddings.  No contrastive corrections are
    applied — the FCL/BCL objectives are purely training-time regularizers.

    Section 4.3: "we calculate the ranking score function by using the inner
    product between user and item representations."

    Args:
        user_indices (Tensor): Batch of user IDs [B].
        *args (Any): Unused positional arguments.
        item_indices (Optional[Tensor]): If None, scores against all items
            are returned [B, n_items].  Otherwise, scores for the sampled
            item sub-set [B, S].
        **kwargs (Any): Unused keyword arguments.

    Returns:
        Tensor: Score matrix [B, n_items] or [B, S].
    """
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    user_embeddings = user_all_embeddings[user_indices]  # [B, F]

    if item_indices is None:
        # Full ranking — score against all n_items (drop padding slot)
        item_embeddings = item_all_embeddings[:-1, :]  # [n_items, F]
        return torch.einsum("be,ie->bi", user_embeddings, item_embeddings)

    # Sampled ranking
    item_embeddings = item_all_embeddings[item_indices]  # [B, S, F]
    return torch.einsum("be,bse->bs", user_embeddings, item_embeddings)

training_step(batch, batch_idx)

One training iteration executing all three RecDCL loss components.

Follows Algorithm 1 and Algorithm 2 from Appendix D.4: 1. Encode via LightGCN to obtain E_U and E_I. 2. L2-normalize embeddings (Algorithm 1 lines 4-5). 3. Compute L_UIBT (Eq. 5), L_UUII (Eq. 6), L_BCL (Eq. 9). 4. Combine with trade-off coefficients (Eq. 10). 5. Update historical embedding buffers for the next step.

Parameters:

Name Type Description Default
batch Any

Triplet (user [B], pos_item [B], neg_item [B]). neg_item is discarded — RecDCL is a negative-sampling-free method.

required
batch_idx int

Current batch index.

required

Returns:

Name Type Description
Tensor Tensor

Scalar training loss.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/recdcl.py
def training_step(self, batch: Any, batch_idx: int) -> Tensor:
    """One training iteration executing all three RecDCL loss components.

    Follows Algorithm 1 and Algorithm 2 from Appendix D.4:
      1. Encode via LightGCN to obtain E_U and E_I.
      2. L2-normalize embeddings (Algorithm 1 lines 4-5).
      3. Compute L_UIBT (Eq. 5), L_UUII (Eq. 6), L_BCL (Eq. 9).
      4. Combine with trade-off coefficients (Eq. 10).
      5. Update historical embedding buffers for the next step.

    Args:
        batch (Any): Triplet (user [B], pos_item [B], neg_item [B]).
            neg_item is discarded — RecDCL is a negative-sampling-free method.
        batch_idx (int): Current batch index.

    Returns:
        Tensor: Scalar training loss.
    """
    user, pos_item = batch

    # ---- Step 1: Encode ---- Algorithm 1 line 3
    user_all_emb, item_all_emb = self.forward()

    # Gather batch-level embeddings
    e_u = user_all_emb[user]  # [B, F] — E_U in the paper
    e_i = item_all_emb[pos_item]  # [B, F] — E_I in the paper

    # ---- Step 2: L2-normalize ---- Algorithm 1 lines 4-5
    # "Normalize e_u: e_u = e_u / ||e_u||"
    e_u = F.normalize(e_u, p=2, dim=-1)  # Algorithm 1 line 4
    e_i = F.normalize(e_i, p=2, dim=-1)  # Algorithm 1 line 5

    # ---- Step 3a: FCL — UIBT loss (Eq. 5) ---- Algorithm 1 line 6
    loss_uibt = self._uibt_loss(e_u, e_i, self.gamma)  # Eq. 5 / Algorithm 2

    # ---- Step 3b: FCL — UUII loss (Eq. 6) ---- Algorithm 1 line 7
    # Algorithm 2: L_UUII = UUII(e_u) / 2 + UUII(e_i) / 2
    loss_uuii = self._uuii_loss(
        e_u,
        e_i,
        poly_a=self.poly_a,
        poly_c=self.poly_c,
        poly_e=self.poly_e,
    )  # Eq. 6

    # ---- Step 3c: BCL loss (Eq. 9) ---- Algorithm 1 line 8
    # Historical embeddings are used to form the perturbed augmented view (Eq. 8).
    # On the very first step they are zeroed; we fall back to the current embeddings
    # as a warm-up (equivalent to tau_momentum=0 for the first iteration).
    if not self._hist_initialized:
        # ASSUMPTION: first-step fallback — use current embeddings as history
        # so Eq. 8 reduces to e_hat = current embedding.
        e_u_hist = e_u.detach()
        e_i_hist = e_i.detach()
    else:
        e_u_hist = self.hist_user_emb[user]  # type: ignore[index]
        e_i_hist = self.hist_item_emb[pos_item]  # type: ignore[index]

    # BCL uses the raw (non-normalized) embeddings for perturbed views.
    # ASSUMPTION: Eq. 8 operates on the propagated (possibly normalized) embeddings.
    # We apply normalization after blending for consistency with Algorithm 1.
    loss_bcl = self._bcl_loss(
        e_u,
        e_i,
        e_u_hist,
        e_i_hist,
        tau_momentum=self.tau_momentum,
        projector=self.projector,
    )  # Eq. 9 / Algorithm 2

    # ---- Step 4: Total loss (Eq. 10) ---- Algorithm 1 line 9
    # L = L_UIBT + alpha * L_UUII + beta * L_BCL
    loss = loss_uibt + self.alpha * loss_uuii + self.beta * loss_bcl  # Eq. 10

    # ---- Step 5: Update historical embedding buffers ----
    # Store the propagated (non-normalized) embeddings for the *next* step.
    # ASSUMPTION: historical embeddings are the full-graph propagated vectors
    # (not just the batch slice) to match the "target network" framing in
    # Section 4.2 ("online and target networks share the same graph encoder").
    # Storing only batch slices would be inconsistent across batches; storing
    # the full propagated matrix is more faithful to the momentum update style.
    with torch.no_grad():
        # SIMPLIFICATION: We update only the rows corresponding to users/items
        # seen in this batch to avoid storing a full [n_users+n_items, F] copy
        # every step, which would dominate memory for large catalogues.
        self.hist_user_emb[user] = user_all_emb[user].detach()  # type: ignore[operator]
        self.hist_item_emb[pos_item] = item_all_emb[pos_item].detach()  # type: ignore[operator]
        self._hist_initialized = True

    # ---- Logging ----
    self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
    self.log("loss_uibt", loss_uibt, prog_bar=False, on_step=False, on_epoch=True)
    self.log("loss_uuii", loss_uuii, prog_bar=False, on_step=False, on_epoch=True)
    self.log("loss_bcl", loss_bcl, prog_bar=False, on_step=False, on_epoch=True)
    return loss

warprec.recommenders.collaborative_filtering_recommender.graph_based.rp3beta.RP3Beta

Bases: ItemSimRecommender

Implementation of RP3Beta algorithm from Updatable, accurate, diverse, and scalable recommendations for interactive applications 2016.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
k int

Number of nearest neighbors.

alpha float

The intensity of the normalization.

beta float

The normalization value for the users connections.

normalize bool

Wether or not to normalize the interactions.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/rp3beta.py
@model_registry.register(name="RP3Beta")
class RP3Beta(ItemSimRecommender):
    """Implementation of RP3Beta algorithm from
        Updatable, accurate, diverse, and scalable recommendations for interactive applications 2016.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        k (int): Number of nearest neighbors.
        alpha (float): The intensity of the normalization.
        beta (float): The normalization value for the users connections.
        normalize (bool): Wether or not to normalize the interactions.
    """

    k: int
    alpha: float
    beta: float
    normalize: bool

    @classmethod
    def estimate_space(
        cls,
        params: dict,
        info: dict,
        interactions: Optional[Interactions] = None,
        **kwargs: Any,
    ) -> dict:
        interactions = cls._require_interactions_for_estimate(
            interactions, cls.__name__
        )
        X = interactions.get_sparse()
        n_users = info["n_users"]
        n_items = info["n_items"]
        filtered_nnz = min(n_items * n_items, n_items * params["k"])

        train_matrix_mb = cls._sparse_size_mb(X)
        pui_mb = cls._compressed_sparse_size_mb(
            nnz=X.nnz, ptr_len=n_users + 1, data_dtype=X.dtype
        )
        x_bool_mb = cls._compressed_sparse_size_mb(
            nnz=X.nnz, ptr_len=n_items + 1, data_dtype=np.float32
        )
        piu_mb = cls._compressed_sparse_size_mb(
            nnz=X.nnz, ptr_len=n_items + 1, data_dtype=X.dtype
        )
        degree_mb = cls._dense_size_mb((n_items,), np.float64)
        work_arrays_mb = cls._bytes_to_mb(10000000 * (4 + 4 + 4))
        filtered_sparse_mb = cls._compressed_sparse_size_mb(
            nnz=filtered_nnz,
            ptr_len=n_items + 1,
            data_dtype=np.float32,
        )
        final_similarity_mb = cls._dense_size_mb((n_items, n_items), np.float32)

        resident_mb = train_matrix_mb + pui_mb + x_bool_mb + degree_mb + piu_mb
        block_peak_mb = resident_mb + work_arrays_mb
        final_peak_mb = resident_mb + filtered_sparse_mb + final_similarity_mb
        alpha_peak_mb = (
            resident_mb + pui_mb + piu_mb if params.get("alpha", 1.0) != 1.0 else 0.0
        )

        train_ram_mb = cls._peak_size_mb(
            resident_mb,
            block_peak_mb,
            final_peak_mb,
            alpha_peak_mb,
        )

        return {
            "train_ram_mb": train_ram_mb,
            "notes": "RP3Beta analytical train-space estimate",
        }

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, interactions, *args, seed=seed, **kwargs)

        X = interactions.get_sparse()

        # Step 1: Normalize user-item matrix
        Pui = normalize(X, norm="l1", axis=1)

        # Step 2: Create boolean item-user matrix
        X_bool = X.transpose(copy=True)
        X_bool.data = np.ones(X_bool.data.size, np.float32)

        # Step 3: Calculate item popularity degrees
        X_bool_sum = np.array(X_bool.sum(axis=1)).ravel()
        degree = np.zeros(X.shape[1])
        non_zero_mask = X_bool_sum != 0.0
        degree[non_zero_mask] = np.power(X_bool_sum[non_zero_mask], -self.beta)

        # Step 4: Normalize item-user matrix
        Piu = normalize(X_bool, norm="l1", axis=1)

        # Apply alpha exponent
        if self.alpha != 1.0:
            Pui = Pui.power(self.alpha)
            Piu = Piu.power(self.alpha)

        # Step 5: Compute similarity in blocks
        similarity_matrix = self._compute_blockwise_similarity(Piu, Pui, degree)

        # Step 6: Apply top-k filtering
        filtered_matrix = self._apply_sparse_topk(similarity_matrix, self.k)

        # Step 7: Normalize if required
        if self.normalize:
            filtered_matrix = normalize(filtered_matrix, norm="l1", axis=1)

        # Update item_similarity
        self.item_similarity = filtered_matrix.toarray()

    def _compute_blockwise_similarity(
        self,
        Piu: csr_matrix,
        Pui: csr_matrix,
        degree: csr_matrix,
        initial_block_dim: int = 200,
        initial_data_block: int = 10000000,
    ) -> csr_matrix:
        """
        Computes similarity matrix in blocks to handle large matrices efficiently.

        Args:
            Piu (csr_matrix): User-item interaction matrix.
            Pui (csr_matrix): Item-user interaction matrix.
            degree (csr_matrix): Diagonal matrix containing degree information for normalization.
            initial_block_dim (int): Initial dimension for row blocking.
            initial_data_block (int): Initial storage allocation for non-zero elements.

        Returns:
            csr_matrix: Computed similarity matrix between items (items x items)
        """
        block_dim = initial_block_dim
        data_block = initial_data_block

        # Initialize storage arrays with initial allocation
        rows = np.zeros(data_block, dtype=np.int32)
        cols = np.zeros(data_block, dtype=np.int32)
        values = np.zeros(data_block, dtype=np.float32)
        num_cells = 0

        # Process matrix in blocks along rows
        for current_block_start_row in range(0, Pui.shape[1], block_dim):
            # Adjust block dimension for last block
            block_dim = min(block_dim, Pui.shape[1] - current_block_start_row)

            # Compute similarity block matrix product
            similarity_block = (
                Piu[current_block_start_row : current_block_start_row + block_dim] @ Pui
            )
            similarity_block = similarity_block.multiply(degree).tocoo()

            # Remove self-similarity entries (diagonal elements)
            mask = (
                similarity_block.row + current_block_start_row
            ) != similarity_block.col
            similarity_block.row = similarity_block.row[mask] + current_block_start_row
            similarity_block.col = similarity_block.col[mask]
            similarity_block.data = similarity_block.data[mask]

            # Check if we need to expand storage
            new_entries = len(similarity_block.data)
            while num_cells + new_entries > len(rows):
                # Expand storage arrays exponentially
                rows = np.concatenate((rows, np.zeros(data_block, dtype=np.int32)))  # type: ignore
                cols = np.concatenate((cols, np.zeros(data_block, dtype=np.int32)))  # type: ignore
                values = np.concatenate(
                    (values, np.zeros(data_block, dtype=np.float32))
                )  # type: ignore

            # Store computed values
            rows[num_cells : num_cells + new_entries] = similarity_block.row
            cols[num_cells : num_cells + new_entries] = similarity_block.col
            values[num_cells : num_cells + new_entries] = similarity_block.data
            num_cells += new_entries

        # Create final sparse matrix from accumulated values
        return csr_matrix(
            (values[:num_cells], (rows[:num_cells], cols[:num_cells])),
            shape=(Pui.shape[1], Pui.shape[1]),
        )

    def _apply_sparse_topk(
        self, matrix: Union[csr_matrix, coo_matrix], k: int
    ) -> csr_matrix:
        """
        Applies top-k filtering to each row of a sparse matrix efficiently.

        Args:
            matrix (Union[csr_matrix, coo_matrix]): Input sparse matrix.
            k (int): Number of maximum values to preserve per row.

        Returns:
            csr_matrix: Sparse matrix with only the top-k elements per row preserved
        """
        filtered_matrix: lil_matrix = lil_matrix(matrix.shape, dtype=np.float32)

        # Process each row individually
        for i in range(matrix.shape[0]):
            row: coo_matrix = matrix[i].tocoo()
            if row.nnz == 0:  # Skip empty rows
                continue

            # Determine actual number of elements to keep
            top_k: int = min(k, row.nnz)

            # Find indices of top-k largest values using argpartition
            idx: np.ndarray = np.argpartition(row.data, -top_k)[-top_k:]

            # Store top-k values in their original column positions
            filtered_matrix[i, row.col[idx]] = row.data[idx]

        # Convert to CSR format for efficient subsequent operations
        return filtered_matrix.tocsr()

warprec.recommenders.collaborative_filtering_recommender.graph_based.sgcl.SGCL

Bases: IterativeRecommender, GraphRecommenderUtils

Implementation of SGCL from "SGCL: Unifying Self-Supervised and Supervised Learning for Graph Recommendation" (RecSys '25).

SGCL unifies the recommendation and contrastive learning tasks into a single supervised contrastive loss function. It eliminates the need for data augmentation, negative sampling, and multi-task optimization.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of user and item.

n_layers int

The number of graph convolution layers.

temperature float

The temperature parameter for the contrastive loss.

reg_weight float

The L2 regularization weight.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/sgcl.py
@model_registry.register(name="SGCL")
class SGCL(IterativeRecommender, GraphRecommenderUtils):
    """Implementation of SGCL from
        "SGCL: Unifying Self-Supervised and Supervised Learning for Graph Recommendation" (RecSys '25).

    SGCL unifies the recommendation and contrastive learning tasks into a single
    supervised contrastive loss function. It eliminates the need for data augmentation,
    negative sampling, and multi-task optimization.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of user and item.
        n_layers (int): The number of graph convolution layers.
        temperature (float): The temperature parameter for the contrastive loss.
        reg_weight (float): The L2 regularization weight.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.ITEM_RATING_LOADER

    # Model hyperparameters
    embedding_size: int
    n_layers: int
    temperature: float
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # Graph Construction (LightGCN style)
        self.adj = self.get_adj_mat(
            interactions.get_sparse().tocoo(),
            self.n_users,
            self.n_items + 1,
            normalize=True,
        )

        self.apply(self._init_weights)
        self.reg_loss = EmbLoss()

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_pointwise_dataloader(
            neg_samples=0,
            batch_size=self.batch_size,
            **kwargs,
        )

    def forward(self) -> Tuple[Tensor, Tensor]:
        """Forward pass of SGCL (Standard LightGCN propagation)."""
        ego_u = self.user_embedding.weight
        ego_i = self.item_embedding.weight
        ego_all = torch.cat([ego_u, ego_i], dim=0)

        embeddings_list = [ego_all]

        if self.adj.device() != self.device:
            self.adj = self.adj.to(self.device)

        for _ in range(self.n_layers):
            next_emb = self.adj.matmul(embeddings_list[-1])
            embeddings_list.append(next_emb)

        final_embeddings = torch.stack(embeddings_list, dim=0).mean(dim=0)
        user_final, item_final = torch.split(
            final_embeddings, [self.n_users, self.n_items + 1]
        )

        return user_final, item_final

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, pos_item, _ = batch  # Ignore rating for SGCL

        # Get embeddings from propagation network
        user_all, item_all = self.forward()

        u_emb = user_all[user]  # [Batch, Dim]
        i_emb = item_all[pos_item]  # [Batch, Dim]

        # Normalize embeddings
        u_emb_norm = F.normalize(u_emb, dim=1)
        i_emb_norm = F.normalize(i_emb, dim=1)

        # Compute SGCL loss
        # L = - log ( exp(pos) / (sum(exp(uu)) + sum(exp(vv))) )
        #   = - pos + log(sum(exp(uu)) + sum(exp(vv)))

        # Numerator (Positive alignment)
        # (u * i) / tau
        pos_scores = (u_emb_norm * i_emb_norm).sum(dim=1) / self.temperature

        # Denominator (Batch Uniformity)
        u_sim_matrix = (
            torch.mm(u_emb_norm, u_emb_norm.t()) / self.temperature
        )  # [Batch, Batch]
        i_sim_matrix = (
            torch.mm(i_emb_norm, i_emb_norm.t()) / self.temperature
        )  # [Batch, Batch]
        all_sims = torch.cat([u_sim_matrix, i_sim_matrix], dim=1)
        log_denominator = torch.logsumexp(all_sims, dim=1)

        # Loss = - (Alignment - Uniformity)
        sgcl_loss = -(pos_scores - log_denominator).mean()

        # Calculate L2 loss
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user), self.item_embedding(pos_item)
        )

        # Loss logging
        loss = sgcl_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Retrieve all user and item embeddings from the propagation network
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        # Get the embeddings for the specific users in the batch
        user_embeddings = user_all_embeddings[
            user_indices
        ]  # [batch_size, embedding_size]

        if item_indices is None:
            # Case 'full': prediction on all items
            item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
            einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
        else:
            # Case 'sampled': prediction on a sampled set of items
            item_embeddings = item_all_embeddings[
                item_indices
            ]  # [batch_size, pad_seq, embedding_size]
            einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

        predictions = torch.einsum(
            einsum_string, user_embeddings, item_embeddings
        )  # [batch_size, n_items] or [batch_size, pad_seq]
        return predictions

forward()

Forward pass of SGCL (Standard LightGCN propagation).

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/sgcl.py
def forward(self) -> Tuple[Tensor, Tensor]:
    """Forward pass of SGCL (Standard LightGCN propagation)."""
    ego_u = self.user_embedding.weight
    ego_i = self.item_embedding.weight
    ego_all = torch.cat([ego_u, ego_i], dim=0)

    embeddings_list = [ego_all]

    if self.adj.device() != self.device:
        self.adj = self.adj.to(self.device)

    for _ in range(self.n_layers):
        next_emb = self.adj.matmul(embeddings_list[-1])
        embeddings_list.append(next_emb)

    final_embeddings = torch.stack(embeddings_list, dim=0).mean(dim=0)
    user_final, item_final = torch.split(
        final_embeddings, [self.n_users, self.n_items + 1]
    )

    return user_final, item_final

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/sgcl.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Retrieve all user and item embeddings from the propagation network
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    # Get the embeddings for the specific users in the batch
    user_embeddings = user_all_embeddings[
        user_indices
    ]  # [batch_size, embedding_size]

    if item_indices is None:
        # Case 'full': prediction on all items
        item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
        einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
    else:
        # Case 'sampled': prediction on a sampled set of items
        item_embeddings = item_all_embeddings[
            item_indices
        ]  # [batch_size, pad_seq, embedding_size]
        einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

    predictions = torch.einsum(
        einsum_string, user_embeddings, item_embeddings
    )  # [batch_size, n_items] or [batch_size, pad_seq]
    return predictions

warprec.recommenders.collaborative_filtering_recommender.graph_based.simgcl.SimGCL

Bases: IterativeRecommender, GraphRecommenderUtils

Implementation of SimGCL from "Are Graph Augmentations Necessary? Simple Graph Contrastive Learning for Recommendation" (SIGIR 2022).

SimGCL discards graph augmentations entirely and instead adds uniform random noise to node embeddings at each GCN layer to create contrastive views. Two independently perturbed views are generated per forward pass; an InfoNCE loss maximizes agreement between same-node representations across the two views while a BPR loss drives the recommendation task.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of user and item.

n_layers int

The number of graph convolution layers.

lambda_ float

Coefficient for the contrastive loss.

eps float

L2 norm of the perturbation noise vectors.

temperature float

Temperature for InfoNCE loss.

reg_weight float

The L2 regularization weight.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/simgcl.py
@model_registry.register(name="SimGCL")
class SimGCL(IterativeRecommender, GraphRecommenderUtils):
    """Implementation of SimGCL from
        "Are Graph Augmentations Necessary? Simple Graph Contrastive Learning
        for Recommendation" (SIGIR 2022).

    SimGCL discards graph augmentations entirely and instead adds uniform
    random noise to node embeddings at each GCN layer to create contrastive
    views.  Two independently perturbed views are generated per forward pass;
    an InfoNCE loss maximizes agreement between same-node representations
    across the two views while a BPR loss drives the recommendation task.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of user and item.
        n_layers (int): The number of graph convolution layers.
        lambda_ (float): Coefficient for the contrastive loss.
        eps (float): L2 norm of the perturbation noise vectors.
        temperature (float): Temperature for InfoNCE loss.
        reg_weight (float): The L2 regularization weight.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Model hyperparameters
    embedding_size: int
    n_layers: int
    lambda_: float  # Eq. 1 — weight of the CL loss in the joint objective
    eps: float  # Eq. 7 — perturbation magnitude (||Δ||_2 = ε)
    temperature: float  # Eq. 2 — InfoNCE temperature τ
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # Initialize Embeddings
        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        self.apply(self._init_weights)

        # Symmetric-normalized adjacency matrix  — Eq. 3
        # Only built once; SimGCL never reconstructs/perturbs the graph.
        self.adj = self.get_adj_mat(
            interactions.get_sparse().tocoo(),
            self.n_users,
            self.n_items + 1,
            normalize=True,
        )

        # Losses
        self.bpr_loss = BPRLoss()
        self.reg_loss = EmbLoss()
        self.nce_loss = InfoNCELoss(self.temperature)

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def _perturb_embedding(self, embedding: Tensor) -> Tensor:
        """Add noise to embeddings following Eq. 7 of the paper.

        Noise generation:
            Δ_bar ~ U(0, 1)  (same shape as embedding)
            Δ = Δ_bar ⊙ sign(e_i)        — same hyperoctant constraint
            Δ = eps * Δ / ||Δ||_2         — normalize to L2 = eps

        Args:
            embedding (Tensor): Node embeddings [N, d].

        Returns:
            Tensor: Perturbed embeddings [N, d].
        """
        # Eq. 7 — sample uniform noise and constrain to same hyperoctant
        noise = torch.rand_like(embedding)  # Δ_bar ~ U(0,1)
        noise = noise * embedding.sign()  # Δ = Δ_bar ⊙ sign(e_i)
        # Normalize each row to unit L2 norm, then scale by eps
        noise = F.normalize(noise, p=2, dim=1) * self.eps  # ||Δ||_2 = eps
        return embedding + noise

    def forward(
        self, perturbed: bool = False
    ) -> Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]:
        """Propagate embeddings through the LightGCN encoder.

        When ``perturbed=True`` two independent noise-augmented views are
        produced alongside the clean final embeddings.

        # SIMPLIFICATION: The paper states that E(0) (the raw ego embeddings)
        # is skipped in the final aggregation (Eq. 8).  We follow this exactly:
        # E = (1/L) * sum_{l=1}^{L} E^(l).

        Args:
            perturbed (bool): If True, generates two noisy contrastive views.

        Returns:
            Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]:
                - user_final, item_final: clean aggregated embeddings
                - user_cl, item_cl: None when perturbed=False; otherwise a tuple
                  of (view1_users, view1_items, view2_users, view2_items) packed
                  as two extra return values would break the interface, so we
                  return them via the instance attributes ``_cl_view1`` and
                  ``_cl_view2`` instead.
        """
        ego_embeddings = self.get_ego_embeddings(
            self.user_embedding, self.item_embedding
        )

        # Ensure adj is on the same device
        if self.adj.device() != ego_embeddings.device:
            self.adj = self.adj.to(ego_embeddings.device)

        # --- Clean encoder (used for the BPR recommendation loss) ---
        # Eq. 8 — skip E(0): aggregate only layers 1 .. L
        clean_sum = torch.zeros_like(ego_embeddings)
        current = ego_embeddings
        for _ in range(self.n_layers):
            current = self.adj.matmul(current)  # E^(l) = A * E^(l-1)
            clean_sum.add_(current)
        # Mean pooling: (1/L) * sum
        clean_final = clean_sum / self.n_layers  # Eq. 8

        user_final, item_final = torch.split(
            clean_final, [self.n_users, self.n_items + 1]
        )

        # --- Perturbed encoders (two independent views for CL) ---
        if perturbed:
            # View 1 — Eq. 8 with noise at every layer
            v1_sum = torch.zeros_like(ego_embeddings)
            v1_current = ego_embeddings
            for _ in range(self.n_layers):
                v1_current = self.adj.matmul(v1_current)
                v1_current = self._perturb_embedding(v1_current)  # Eq. 7
                v1_sum.add_(v1_current)
            v1_final = v1_sum / self.n_layers

            # View 2 — Eq. 8 with independent noise at every layer
            v2_sum = torch.zeros_like(ego_embeddings)
            v2_current = ego_embeddings
            for _ in range(self.n_layers):
                v2_current = self.adj.matmul(v2_current)
                v2_current = self._perturb_embedding(v2_current)  # Eq. 7
                v2_sum.add_(v2_current)
            v2_final = v2_sum / self.n_layers

            user_v1, item_v1 = torch.split(v1_final, [self.n_users, self.n_items + 1])
            user_v2, item_v2 = torch.split(v2_final, [self.n_users, self.n_items + 1])
            # Store views for training_step to consume
            self._cl_view1 = (user_v1, item_v1)
            self._cl_view2 = (user_v2, item_v2)

        return user_final, item_final, None, None

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        """Compute the joint BPR + CL + L2 loss.

        Loss = L_rec + lambda * L_cl + reg_weight * L_reg    (Eq. 1)

        Args:
            batch (Any): Tuple of (user, pos_item, neg_item).
            batch_idx (int): The current batch index.

        Returns:
            Tensor: The computed loss for the batch.
        """
        user, pos_item, neg_item = batch

        # Forward pass: clean embeddings + two perturbed views
        users_final, items_final, _, _ = self.forward(perturbed=True)

        # Retrieve the two CL views generated during forward
        user_v1, item_v1 = self._cl_view1
        user_v2, item_v2 = self._cl_view2

        # --- BPR recommendation loss — Eq. 5 ---
        batch_users = users_final[user]
        batch_pos = items_final[pos_item]
        batch_neg = items_final[neg_item]

        pos_scores = (batch_users * batch_pos).sum(dim=1)
        neg_scores = (batch_users * batch_neg).sum(dim=1)
        bpr_loss = self.bpr_loss(pos_scores, neg_scores)

        # --- Contrastive loss — Eq. 2 (InfoNCE between view1 and view2) ---
        cl_loss_user = self.nce_loss(user_v1[user], user_v2[user])
        cl_loss_item = self.nce_loss(item_v1[pos_item], item_v2[pos_item])
        cl_loss = self.lambda_ * (cl_loss_user + cl_loss_item)  # Eq. 1

        # --- L2 regularization on ego embeddings ---
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user),
            self.item_embedding(pos_item),
            self.item_embedding(neg_item),
        )

        # Joint loss — Eq. 1
        loss = bpr_loss + cl_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # ASSUMPTION: At inference time we use the clean encoder (no noise).
        # This is consistent with the paper which only uses noise during training.
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        user_embeddings = user_all_embeddings[
            user_indices
        ]  # [batch_size, embedding_size]

        if item_indices is None:
            # Case 'full': prediction on all items
            item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
            einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
        else:
            # Case 'sampled': prediction on a sampled set of items
            item_embeddings = item_all_embeddings[
                item_indices
            ]  # [batch_size, pad_seq, embedding_size]
            einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

        predictions = torch.einsum(
            einsum_string, user_embeddings, item_embeddings
        )  # [batch_size, n_items] or [batch_size, pad_seq]
        return predictions

forward(perturbed=False)

Propagate embeddings through the LightGCN encoder.

When perturbed=True two independent noise-augmented views are produced alongside the clean final embeddings.

SIMPLIFICATION: The paper states that E(0) (the raw ego embeddings)
is skipped in the final aggregation (Eq. 8). We follow this exactly:
E = (1/L) * sum_{l=1}^{L} E^(l).

Parameters:

Name Type Description Default
perturbed bool

If True, generates two noisy contrastive views.

False

Returns:

Type Description
Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]

Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]: - user_final, item_final: clean aggregated embeddings - user_cl, item_cl: None when perturbed=False; otherwise a tuple of (view1_users, view1_items, view2_users, view2_items) packed as two extra return values would break the interface, so we return them via the instance attributes _cl_view1 and _cl_view2 instead.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/simgcl.py
def forward(
    self, perturbed: bool = False
) -> Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]:
    """Propagate embeddings through the LightGCN encoder.

    When ``perturbed=True`` two independent noise-augmented views are
    produced alongside the clean final embeddings.

    # SIMPLIFICATION: The paper states that E(0) (the raw ego embeddings)
    # is skipped in the final aggregation (Eq. 8).  We follow this exactly:
    # E = (1/L) * sum_{l=1}^{L} E^(l).

    Args:
        perturbed (bool): If True, generates two noisy contrastive views.

    Returns:
        Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]:
            - user_final, item_final: clean aggregated embeddings
            - user_cl, item_cl: None when perturbed=False; otherwise a tuple
              of (view1_users, view1_items, view2_users, view2_items) packed
              as two extra return values would break the interface, so we
              return them via the instance attributes ``_cl_view1`` and
              ``_cl_view2`` instead.
    """
    ego_embeddings = self.get_ego_embeddings(
        self.user_embedding, self.item_embedding
    )

    # Ensure adj is on the same device
    if self.adj.device() != ego_embeddings.device:
        self.adj = self.adj.to(ego_embeddings.device)

    # --- Clean encoder (used for the BPR recommendation loss) ---
    # Eq. 8 — skip E(0): aggregate only layers 1 .. L
    clean_sum = torch.zeros_like(ego_embeddings)
    current = ego_embeddings
    for _ in range(self.n_layers):
        current = self.adj.matmul(current)  # E^(l) = A * E^(l-1)
        clean_sum.add_(current)
    # Mean pooling: (1/L) * sum
    clean_final = clean_sum / self.n_layers  # Eq. 8

    user_final, item_final = torch.split(
        clean_final, [self.n_users, self.n_items + 1]
    )

    # --- Perturbed encoders (two independent views for CL) ---
    if perturbed:
        # View 1 — Eq. 8 with noise at every layer
        v1_sum = torch.zeros_like(ego_embeddings)
        v1_current = ego_embeddings
        for _ in range(self.n_layers):
            v1_current = self.adj.matmul(v1_current)
            v1_current = self._perturb_embedding(v1_current)  # Eq. 7
            v1_sum.add_(v1_current)
        v1_final = v1_sum / self.n_layers

        # View 2 — Eq. 8 with independent noise at every layer
        v2_sum = torch.zeros_like(ego_embeddings)
        v2_current = ego_embeddings
        for _ in range(self.n_layers):
            v2_current = self.adj.matmul(v2_current)
            v2_current = self._perturb_embedding(v2_current)  # Eq. 7
            v2_sum.add_(v2_current)
        v2_final = v2_sum / self.n_layers

        user_v1, item_v1 = torch.split(v1_final, [self.n_users, self.n_items + 1])
        user_v2, item_v2 = torch.split(v2_final, [self.n_users, self.n_items + 1])
        # Store views for training_step to consume
        self._cl_view1 = (user_v1, item_v1)
        self._cl_view2 = (user_v2, item_v2)

    return user_final, item_final, None, None

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/simgcl.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # ASSUMPTION: At inference time we use the clean encoder (no noise).
    # This is consistent with the paper which only uses noise during training.
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    user_embeddings = user_all_embeddings[
        user_indices
    ]  # [batch_size, embedding_size]

    if item_indices is None:
        # Case 'full': prediction on all items
        item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
        einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
    else:
        # Case 'sampled': prediction on a sampled set of items
        item_embeddings = item_all_embeddings[
            item_indices
        ]  # [batch_size, pad_seq, embedding_size]
        einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

    predictions = torch.einsum(
        einsum_string, user_embeddings, item_embeddings
    )  # [batch_size, n_items] or [batch_size, pad_seq]
    return predictions

training_step(batch, batch_idx)

Compute the joint BPR + CL + L2 loss.

Loss = L_rec + lambda * L_cl + reg_weight * L_reg (Eq. 1)

Parameters:

Name Type Description Default
batch Any

Tuple of (user, pos_item, neg_item).

required
batch_idx int

The current batch index.

required

Returns:

Name Type Description
Tensor Tensor

The computed loss for the batch.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/simgcl.py
def training_step(self, batch: Any, batch_idx: int) -> Tensor:
    """Compute the joint BPR + CL + L2 loss.

    Loss = L_rec + lambda * L_cl + reg_weight * L_reg    (Eq. 1)

    Args:
        batch (Any): Tuple of (user, pos_item, neg_item).
        batch_idx (int): The current batch index.

    Returns:
        Tensor: The computed loss for the batch.
    """
    user, pos_item, neg_item = batch

    # Forward pass: clean embeddings + two perturbed views
    users_final, items_final, _, _ = self.forward(perturbed=True)

    # Retrieve the two CL views generated during forward
    user_v1, item_v1 = self._cl_view1
    user_v2, item_v2 = self._cl_view2

    # --- BPR recommendation loss — Eq. 5 ---
    batch_users = users_final[user]
    batch_pos = items_final[pos_item]
    batch_neg = items_final[neg_item]

    pos_scores = (batch_users * batch_pos).sum(dim=1)
    neg_scores = (batch_users * batch_neg).sum(dim=1)
    bpr_loss = self.bpr_loss(pos_scores, neg_scores)

    # --- Contrastive loss — Eq. 2 (InfoNCE between view1 and view2) ---
    cl_loss_user = self.nce_loss(user_v1[user], user_v2[user])
    cl_loss_item = self.nce_loss(item_v1[pos_item], item_v2[pos_item])
    cl_loss = self.lambda_ * (cl_loss_user + cl_loss_item)  # Eq. 1

    # --- L2 regularization on ego embeddings ---
    reg_loss = self.reg_weight * self.reg_loss(
        self.user_embedding(user),
        self.item_embedding(pos_item),
        self.item_embedding(neg_item),
    )

    # Joint loss — Eq. 1
    loss = bpr_loss + cl_loss + reg_loss
    self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
    return loss

warprec.recommenders.collaborative_filtering_recommender.graph_based.simrec.SimRec

Bases: IterativeRecommender

Implementation of SimRec from Graph-less Collaborative Filtering via Contrastive Knowledge Distillation (WWW 2023)

SimRec distills knowledge from a GCN teacher into a lightweight MLP student using prediction-level KD (L1), embedding-level contrastive KD (L2), and adaptive contrastive regularization (L3) to address over-smoothing.

Parameters:

Name Type Description Default
params dict

Model hyperparameters.

required
info dict

Dataset metadata (n_users, n_items).

required
interactions Interactions

Training user-item interactions.

required
*args Any

Variable length argument list.

()
seed int

Random seed.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

Uses POS_NEG_LOADER for BPR-style contrastive sampling of T2.

embedding_size int

Dimensionality of embedding vectors.

n_teacher_layers int

GCN propagation layers.

n_student_layers int

MLP FC layers.

teacher_reg_weight float

L2 weight for teacher pre-training.

lambda1 float

Weight for prediction-level distillation loss L1.

lambda2 float

Weight for embedding-level distillation loss L2.

lambda3 float

Weight for adaptive contrastive regularization L3.

lambda4 float

Weight for MLP weight-decay L4.

tau1 float

Temperature for prediction-level distillation.

tau2 float

Temperature for embedding-level distillation.

tau3 float

Temperature for adaptive contrastive regularization.

eps float

Epsilon for adaptive weight adjustment.

batch_size_kd int

Number of KD samples |T1| per step.

teacher_epochs int

Epochs to pre-train the GCN teacher.

batch_size int

Mini-batch size |T2|.

epochs int

Number of student training epochs.

learning_rate float

Adam LR for student.

teacher_learning_rate float

Adam LR for teacher.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/simrec.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
@model_registry.register(name="SimRec")
class SimRec(IterativeRecommender):
    """Implementation of SimRec from
    Graph-less Collaborative Filtering via Contrastive Knowledge Distillation (WWW 2023)

    SimRec distills knowledge from a GCN teacher into a lightweight MLP student
    using prediction-level KD (L1), embedding-level contrastive KD (L2), and
    adaptive contrastive regularization (L3) to address over-smoothing.

    Args:
        params (dict): Model hyperparameters.
        info (dict): Dataset metadata (n_users, n_items).
        interactions (Interactions): Training user-item interactions.
        *args (Any): Variable length argument list.
        seed (int): Random seed.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: Uses POS_NEG_LOADER for BPR-style contrastive sampling of T2.
        embedding_size (int): Dimensionality of embedding vectors.
        n_teacher_layers (int): GCN propagation layers.
        n_student_layers (int): MLP FC layers.
        teacher_reg_weight (float): L2 weight for teacher pre-training.
        lambda1 (float): Weight for prediction-level distillation loss L1.
        lambda2 (float): Weight for embedding-level distillation loss L2.
        lambda3 (float): Weight for adaptive contrastive regularization L3.
        lambda4 (float): Weight for MLP weight-decay L4.
        tau1 (float): Temperature for prediction-level distillation.
        tau2 (float): Temperature for embedding-level distillation.
        tau3 (float): Temperature for adaptive contrastive regularization.
        eps (float): Epsilon for adaptive weight adjustment.
        batch_size_kd (int): Number of KD samples |T1| per step.
        teacher_epochs (int): Epochs to pre-train the GCN teacher.
        batch_size (int): Mini-batch size |T2|.
        epochs (int): Number of student training epochs.
        learning_rate (float): Adam LR for student.
        teacher_learning_rate (float): Adam LR for teacher.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Model hyperparameters
    embedding_size: int
    n_teacher_layers: int
    n_student_layers: int
    teacher_reg_weight: float
    lambda1: float
    lambda2: float
    lambda3: float
    lambda4: float
    tau1: float
    tau2: float
    tau3: float
    eps: float
    batch_size_kd: int
    teacher_epochs: int
    batch_size: int
    epochs: int
    learning_rate: float
    teacher_learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # ---- Build normalized adjacency matrix (D^{-1/2} (A_bar + I) D^{-1/2}) ----
        adj_norm = self._build_norm_adj(
            interactions.get_sparse().tocoo(), self.n_users, self.n_items
        )

        # ---- Teacher (frozen after pre-training) ----
        self.teacher = GCNTeacher(
            n_users=self.n_users,
            n_items=self.n_items,
            embedding_size=self.embedding_size,
            n_layers=self.n_teacher_layers,
            adj_norm=adj_norm,
        )

        # ---- Student (trained with KD) ----
        # Eq 5 — initial embeddings h_bar^(s)
        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )
        # Eq 5 — shared MLP network
        self.mlp = MLPStudent(self.embedding_size, self.n_student_layers)

        # ---- Losses ----
        self.bpr_loss = BPRLoss()
        self.reg_loss = EmbLoss()

        # ---- Pre-training state ----
        # ASSUMPTION: teacher pre-training is done in the first `teacher_epochs` epochs
        # using the same training_step method with a phase flag.
        self._teacher_pretrained: bool = False

        # ---- Teacher optimizer (separate from Lightning-managed student optimizer) ----
        self._teacher_optimizer: Optional[torch.optim.Adam] = None

        # ---- Weight initialization ----
        self.apply(self._init_weights)

    @staticmethod
    def _build_norm_adj(
        interaction_matrix: coo_matrix, n_users: int, n_items: int
    ) -> Tensor:
        """Build symmetric-normalized adjacency matrix D^{-1/2}(A_bar+I)D^{-1/2}.

        # Eq 4 — symmetric normalization used in the GCN teacher propagation

        Args:
            interaction_matrix (coo_matrix): User-item interactions in COO format.
            n_users (int): Number of users.
            n_items (int): Number of items (excluding padding).

        Returns:
            Tensor: Sparse float tensor of shape [n_users + n_items + 1, n_users + n_items + 1].
        """
        # SIMPLIFICATION: We include a padding node (item index n_items) so that the
        # adjacency dimension aligns with the item embedding which uses padding_idx=n_items.
        total = n_users + n_items + 1

        user_nodes = interaction_matrix.row.astype(np.int64)
        item_nodes = (interaction_matrix.col + n_users).astype(np.int64)

        # Build bipartite + identity edges (A_bar + I)
        row = np.concatenate([user_nodes, item_nodes, np.arange(total, dtype=np.int64)])
        col = np.concatenate([item_nodes, user_nodes, np.arange(total, dtype=np.int64)])
        data = np.ones(len(row), dtype=np.float32)

        # Aggregate duplicates via CSR
        adj_csr = csr_matrix((data, (row, col)), shape=(total, total))
        adj_coo = adj_csr.tocoo()

        rows_t = torch.from_numpy(adj_coo.row.astype(np.int64))
        cols_t = torch.from_numpy(adj_coo.col.astype(np.int64))
        vals_t = torch.from_numpy(adj_coo.data.astype(np.float32))

        # Degree vector for symmetric normalization
        deg = torch.zeros(total, dtype=torch.float32)
        deg.scatter_add_(0, rows_t, vals_t)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float("inf")] = 0.0

        # D^{-1/2} * A * D^{-1/2}
        norm_vals = deg_inv_sqrt[rows_t] * vals_t * deg_inv_sqrt[cols_t]

        adj_norm = torch.sparse_coo_tensor(
            torch.stack([rows_t, cols_t], dim=0),
            norm_vals,
            (total, total),
        ).coalesce()
        return adj_norm

    def get_dataloader(
        self, interactions: Interactions, sessions: Sessions, **kwargs: Any
    ) -> DataLoader:
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size, **kwargs
        )

    def _pretrain_teacher_step(
        self, user: Tensor, pos_item: Tensor, neg_item: Tensor
    ) -> Tensor:
        """One BPR gradient step on the GCN teacher.

        # Eq 11 — L^(t) = -sum log sigma(y^(t)_{i,j} - y^(t)_{i,k}) + lambda^(t) ||H_bar^(t)||^2_F
        """
        if self._teacher_optimizer is None:
            self._teacher_optimizer = torch.optim.Adam(
                self.teacher.parameters(), lr=self.teacher_learning_rate
            )

        self._teacher_optimizer.zero_grad()

        user_emb, item_emb = self.teacher.get_user_item_embeddings()
        u = user_emb[user]  # [B, d]
        pos = item_emb[pos_item]  # [B, d]
        neg = item_emb[neg_item]  # [B, d]

        pos_scores = (u * pos).sum(dim=-1)
        neg_scores = (u * neg).sum(dim=-1)

        # Eq 11 — BPR loss
        bpr = self.bpr_loss(pos_scores, neg_scores)

        # Eq 11 — weight-decay on initial embeddings
        reg = self.teacher_reg_weight * self.reg_loss(
            self.teacher.user_embedding(user),
            self.teacher.item_embedding(pos_item),
            self.teacher.item_embedding(neg_item),
        )
        loss = bpr + reg
        loss.backward()
        self._teacher_optimizer.step()
        return loss.detach()

    def forward(self, user_indices: Tensor) -> Tensor:
        """Apply MLP student to user embeddings.

        # Eq 5 — h^(s)_i = FC_{L'}(h_bar^(s)_i)

        This signature satisfies the Lightning/WarpRec abstract requirement.
        For item encoding use _encode_item; training_step calls both directly.

        Args:
            user_indices (Tensor): User indices, shape [B].

        Returns:
            Tensor: Refined user embeddings, shape [B, d].
        """
        return self._encode_user(user_indices)

    def _encode_user(self, user_idx: Tensor) -> Tensor:
        """Eq 5 — student embedding for users."""
        return self.mlp(self.user_embedding(user_idx))

    def _encode_item(self, item_idx: Tensor) -> Tensor:
        """Eq 5 — student embedding for items."""
        return self.mlp(self.item_embedding(item_idx))

    def _loss_l1(
        self, user_s: Tensor, teacher_user_emb: Tensor, teacher_item_emb: Tensor
    ) -> Tensor:
        """Prediction-level knowledge distillation loss.

        Samples |T1| random (u_i, v_j, v_k) triplets where v_j, v_k are drawn
        uniformly from ALL items (not just positive/negative).

        # Eq 6 — z_{i,j,k} = h_i^T h_j - h_i^T h_k
        # Eq 7 — L1 = sum -[z_bar^(t) log z_bar^(s) + (1-z_bar^(t)) log(1-z_bar^(s))]
        #             z_bar = sigmoid(z / tau1)

        # SIMPLIFICATION: T1 is a random sub-batch of size min(batch_size_kd, n_items^2)
        #   drawn by sampling random item indices rather than building all pairs.
        #   This keeps memory bounded without changing the expectation of the gradient.
        """
        B = user_s.size(0)
        device = user_s.device

        # Sample random item pairs (v_j, v_k) from all items
        kd_size = min(self.batch_size_kd, B * 16)
        j_idx = torch.randint(0, self.n_items, (kd_size,), device=device)
        k_idx = torch.randint(0, self.n_items, (kd_size,), device=device)

        # User indices aligned with item samples (cycle over the batch)
        u_idx = torch.arange(kd_size, device=device) % B

        u_s = user_s[u_idx]  # [kd_size, d]
        h_j_t = teacher_item_emb[j_idx]  # [kd_size, d]
        h_k_t = teacher_item_emb[k_idx]

        # Eq 6 — difference scores for teacher
        # ASSUMPTION: teacher embeddings are from the final summed H^(t), not per-layer
        h_j_s = self._encode_item(j_idx)  # student item embeddings
        h_k_s = self._encode_item(k_idx)

        u_t = teacher_user_emb[torch.arange(B, device=device)[u_idx % B]]

        z_s = (u_s * h_j_s).sum(-1) - (u_s * h_k_s).sum(-1)  # Eq 6 — student
        z_t = (u_t * h_j_t).sum(-1) - (u_t * h_k_t).sum(-1)  # Eq 6 — teacher (no grad)
        z_t = z_t.detach()

        # Eq 7 — soft labels via sigmoid with temperature
        z_bar_t = torch.sigmoid(z_t / self.tau1)
        z_bar_s = torch.sigmoid(z_s / self.tau1)

        # Eq 7 — binary cross-entropy between soft teacher and student labels
        loss = F.binary_cross_entropy(z_bar_s.clamp(1e-7, 1 - 1e-7), z_bar_t.detach())
        return loss

    def _loss_l2(
        self,
        user_s: Tensor,
        item_s: Tensor,
        user_idx: Tensor,
        item_idx: Tensor,
        teacher_layer_embeddings: list,
    ) -> Tensor:
        """Embedding-level contrastive knowledge distillation.

        # Eq 8 — InfoNCE between student embeddings and sum of high-order teacher layers (l=2..L)

        # ASSUMPTION: "high-order" is defined as layers l>=2 as per the paper's Eq 8 notation.
        #   If n_teacher_layers < 2, we fall back to using all available teacher layers.
        """
        # Eq 8 — high-order teacher embeddings: sum of layers l=2..L
        n_layers = len(teacher_layer_embeddings) - 1  # 0-indexed; last is layer L
        start = min(2, n_layers)  # fallback if fewer than 2 layers

        # Sum layers [start..L] for users and items separately
        # teacher_layer_embeddings[l] has shape [(n_users + n_items + 1), d]
        high_order_layers = teacher_layer_embeddings[start:]
        if len(high_order_layers) == 0:
            high_order_layers = teacher_layer_embeddings  # fallback
        high_order = (
            torch.stack(high_order_layers, dim=0).sum(dim=0).detach()
        )  # no grad through teacher

        t_user_high = high_order[user_idx]  # [B, d] high-order teacher user embeddings
        t_item_high = high_order[
            self.n_users + item_idx
        ]  # [B, d] high-order teacher item embeddings

        # Eq 8 — InfoNCE loss over users
        # numerator: cos(h^(s)_i, sum_{l>=2} h^(t)_{i,l}) / tau2
        def _infonce(
            anchors: Tensor, positives: Tensor, all_negatives: Tensor
        ) -> Tensor:
            """InfoNCE: -log( exp(cos(a,p)/tau) / sum_j exp(cos(a,n_j)/tau) )."""
            anchors_n = F.normalize(anchors, dim=-1)
            positives_n = F.normalize(positives, dim=-1)
            all_n = F.normalize(all_negatives, dim=-1)

            pos_sim = (anchors_n * positives_n).sum(
                -1, keepdim=True
            ) / self.tau2  # [B, 1]
            all_sim = torch.matmul(anchors_n, all_n.T) / self.tau2  # [B, N]

            # Eq 8 — numerator exp / denominator sum exp
            loss = -pos_sim.squeeze(1) + torch.logsumexp(all_sim, dim=-1)
            return loss.mean()

        l2_user = _infonce(user_s, t_user_high, t_user_high)
        l2_item = _infonce(item_s, t_item_high, t_item_high)
        return l2_user + l2_item

    def _compute_adaptive_weight(
        self,
        user_s: Tensor,
        item_s: Tensor,
        rec_loss: Tensor,
        l1_loss: Tensor,
        l2_loss: Tensor,
    ) -> Tuple[Tensor, Tensor]:
        """Compute per-node adaptive contrastive regularization weights omega_i, omega_j.

        # Eq 10 — omega_i = 1-eps if grad_{1,2}^T grad_{rec} > grad_1^T grad_2, else 1+eps

        # SIMPLIFICATION: Computing per-sample exact gradient vectors is expensive.
        #   We approximate by computing scalar gradient norms via autograd for the
        #   current batch, rather than per-node gradient vectors as in the paper.
        #   The sign of the dot-products is approximated via the sign of
        #   (g_12 . g_rec > g_1 . g_2) evaluated at the batch level.
        """
        # Retain graph so we can differentiate multiple times
        g_12 = torch.autograd.grad(
            l1_loss + l2_loss,
            [user_s, item_s],
            retain_graph=True,
            allow_unused=True,
            create_graph=False,
        )
        g_rec = torch.autograd.grad(
            rec_loss,
            [user_s, item_s],
            retain_graph=True,
            allow_unused=True,
            create_graph=False,
        )
        g_1 = torch.autograd.grad(
            l1_loss,
            [user_s, item_s],
            retain_graph=True,
            allow_unused=True,
            create_graph=False,
        )
        g_2 = torch.autograd.grad(
            l2_loss,
            [user_s, item_s],
            retain_graph=True,
            allow_unused=True,
            create_graph=False,
        )

        # Helper to safely handle None gradients (e.g., l1_loss doesn't depend on item_s)
        def _safe_grad(
            grad_tuple: Tuple[Optional[Tensor], ...], idx: int, ref_tensor: Tensor
        ) -> Tensor:
            g = grad_tuple[idx]
            return g if g is not None else torch.zeros_like(ref_tensor)

        g_12_u = _safe_grad(g_12, 0, user_s)
        g_12_i = _safe_grad(g_12, 1, item_s)

        g_rec_u = _safe_grad(g_rec, 0, user_s)
        g_rec_i = _safe_grad(g_rec, 1, item_s)

        g_1_u = _safe_grad(g_1, 0, user_s)
        g_1_i = _safe_grad(g_1, 1, item_s)

        g_2_u = _safe_grad(g_2, 0, user_s)
        g_2_i = _safe_grad(g_2, 1, item_s)

        # Eq 10 — compare dot products to decide weight direction
        # Users (index 0)
        dot_12_rec_u = (g_12_u * g_rec_u).sum(dim=-1)  # Shape: [B]
        dot_1_2_u = (g_1_u * g_2_u).sum(dim=-1)  # Shape: [B]

        # Items (index 1)
        dot_12_rec_i = (g_12_i * g_rec_i).sum(dim=-1)  # Shape: [B]
        dot_1_2_i = (g_1_i * g_2_i).sum(dim=-1)  # Shape: [B]

        # Per-node conditions (Shape: [B, 1] so it broadcasts with losses later)
        cond_u = (dot_12_rec_u > dot_1_2_u).float().unsqueeze(1)
        omega_u = 1.0 - self.eps * cond_u + self.eps * (1.0 - cond_u)

        cond_i = (dot_12_rec_i > dot_1_2_i).float().unsqueeze(1)
        omega_i = 1.0 - self.eps * cond_i + self.eps * (1.0 - cond_i)

        return omega_u, omega_i

    def _loss_l3(
        self,
        user_s: Tensor,
        item_s: Tensor,
        omega_u: Tensor,
        omega_i: Tensor,
    ) -> Tensor:
        """Adaptive contrastive regularization pushing all node embeddings apart.

        # Eq 9 — L3 = sum_u [phi(u,U,w_u) + phi(u,V,w_u)] + sum_v phi(v,V,w_v)
        #   phi(u_i, U, w_i) = w_i * log sum_{u'} exp(h^(s)_i^T h^(s)_{i'} / tau3)

        # SIMPLIFICATION: All-pairs similarity within the batch is used as an
        #   approximation of the full-population sum in Eq 9. The paper sums over
        #   the full U and V; using batch-level negatives is standard practice
        #   and avoids O(N^2) complexity.
        """
        # Ensure omega is broadcastable to [B]
        omega_u = omega_u.squeeze(-1)
        omega_i = omega_i.squeeze(-1)

        # Eq 9 — user-user push-away
        uu_sim = torch.matmul(user_s, user_s.T) / self.tau3  # [B, B]
        uu_sim.fill_diagonal_(-float("inf"))  # exclude self-similarity
        l3_uu = (omega_u * torch.logsumexp(uu_sim, dim=-1)).mean()

        # Eq 9 — user-item push-away
        ui_sim = torch.matmul(user_s, item_s.T) / self.tau3  # [B, B]
        l3_ui = (omega_u * torch.logsumexp(ui_sim, dim=-1)).mean()

        # Eq 9 — item-item push-away
        ii_sim = torch.matmul(item_s, item_s.T) / self.tau3  # [B, B]
        ii_sim.fill_diagonal_(-float("inf"))  # exclude self-similarity
        l3_ii = (omega_i * torch.logsumexp(ii_sim, dim=-1)).mean()

        return l3_uu + l3_ui + l3_ii

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        """Combined teacher pre-training and student KD training.

        Phase 1 (epochs 0..teacher_epochs-1): Only update the GCN teacher via BPR.
        Phase 2 (epochs teacher_epochs..total): Freeze teacher, update student with
            the full SimRec objective L^(s) = Lrec + lambda1*L1 + lambda2*L2 + lambda3*L3 + lambda4*L4.

        # Algorithm 1 — SimRec learning procedure
        # Eq 12 — L^(s) = Lrec + lambda1*L1 + lambda2*L2 + lambda3*L3 + lambda4*L4
        """
        user, pos_item, neg_item = batch

        current_epoch = self.current_epoch  # provided by Lightning

        # ---- Phase 1: Teacher pre-training ----
        if current_epoch < self.teacher_epochs:
            # Algorithm 1, line 2 — Train GCN teacher until convergence
            teacher_loss = self._pretrain_teacher_step(user, pos_item, neg_item)
            self.log(
                "teacher_loss",
                teacher_loss,
                prog_bar=True,
                on_step=False,
                on_epoch=True,
            )
            # Return a zero-grad placeholder loss to satisfy Lightning (student params untouched)
            # SIMPLIFICATION: Return a zero tensor with grad to keep Lightning happy.
            dummy: Tensor = torch.tensor(0.0, device=user.device, requires_grad=True)
            for p in self.mlp.parameters():
                dummy = dummy + p.sum() * 0
            for p in self.user_embedding.parameters():
                dummy = dummy + p.sum() * 0
            return dummy

        # ---- Phase 2: Student KD training (teacher frozen) ----
        # Mark teacher as pre-trained and freeze its parameters
        if not self._teacher_pretrained:
            for param in self.teacher.parameters():
                param.requires_grad_(False)
            self._teacher_pretrained = True

        # Algorithm 1, line 4 — mini-batch T2 drawn from E
        # T2 = {(u_i, v_j)} from the batch (we have user + pos_item)
        user_idx = user
        item_idx = pos_item

        # Student embeddings (requires_grad=True for adaptive weight computation)
        user_s = self._encode_user(user_idx)  # h^(s)_i, Eq 5
        item_s = self._encode_item(item_idx)  # h^(s)_j, Eq 5

        # Retain grad for adaptive weight computation (Eq 10)
        user_s.retain_grad()
        item_s.retain_grad()

        # Teacher forward (no grad)
        with torch.no_grad():
            teacher_sum, teacher_layers = self.teacher()

        teacher_user_emb = teacher_sum[: self.n_users]
        teacher_item_emb = teacher_sum[self.n_users : self.n_users + self.n_items]

        # Eq 12 — Lrec: recommendation objective (positive pair similarity)
        # Lrec = -sum_{(u_i,v_j) in T2} y_{i,j}  (maximize positive scores)
        rec_scores = (user_s * item_s).sum(dim=-1)
        l_rec = -rec_scores.mean()

        # Algorithm 1, line 5 — sample T1 for prediction-level distillation
        # Eq 7 — L1 prediction-level KD
        l1 = self._loss_l1(user_s, teacher_user_emb, teacher_item_emb)

        # Algorithm 1, line 8 — Eq 8 embedding-level KD
        l2 = self._loss_l2(user_s, item_s, user_idx, item_idx, teacher_layers)

        # Algorithm 1, line 9 — Eq 10 adaptive weight computation
        # We compute omega using retained gradients from l_rec, l1, l2
        try:
            omega_u, omega_i = self._compute_adaptive_weight(
                user_s, item_s, l_rec, l1, l2
            )
        except (RuntimeError, ValueError):
            # ASSUMPTION: If gradient graph is broken (e.g., first step), fall back to omega=1
            omega_u = torch.ones(user_s.size(0), 1, device=user_s.device)
            omega_i = torch.ones(item_s.size(0), 1, device=item_s.device)

        # Algorithm 1, line 10 — Eq 9 adaptive contrastive regularization
        l3 = self._loss_l3(user_s, item_s, omega_u, omega_i)

        # Eq 12 — L4 weight-decay on student embedding tables
        l4 = self.reg_loss(
            self.user_embedding(user_idx),
            self.item_embedding(item_idx),
        )

        # Eq 12 — full student objective
        loss = (
            l_rec
            + self.lambda1 * l1
            + self.lambda2 * l2
            + self.lambda3 * l3
            + self.lambda4 * l4
        )

        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        self.log("l_rec", l_rec.detach(), on_step=False, on_epoch=True)
        self.log("l1", l1.detach(), on_step=False, on_epoch=True)
        self.log("l2", l2.detach(), on_step=False, on_epoch=True)
        self.log("l3", l3.detach(), on_step=False, on_epoch=True)
        return loss

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Score users against items using the trained MLP student.

        # Eq 2 — y_{i,j} = h_i^T h_j,  h_i = M-Embed(h_bar_i)

        Args:
            user_indices (Tensor): User indices, shape [batch_size].
            *args (Any): Variable length argument list.
            item_indices (Optional[Tensor]): Optional item indices, shape [batch_size, k].
                If None, scores all items.
            **kwargs (Any): Arbitrary keyword arguments.

        Returns:
            Tensor: Score tensor, shape [batch_size, n_items] or [batch_size, k].
        """
        user_emb = self._encode_user(user_indices)  # [B, d]

        if item_indices is None:
            # Full prediction: score against all items
            all_item_emb = self.mlp(
                self.item_embedding.weight[: self.n_items]
            )  # [n_items, d]
            return torch.matmul(user_emb, all_item_emb.T)  # [B, n_items]

        # Sampled prediction
        item_emb = self.mlp(self.item_embedding(item_indices))  # [B, k, d]
        return torch.einsum("be,bse->bs", user_emb, item_emb)  # [B, k]

forward(user_indices)

Apply MLP student to user embeddings.

Eq 5 — h^(s)i = FC(h_bar^(s)_i)

This signature satisfies the Lightning/WarpRec abstract requirement. For item encoding use _encode_item; training_step calls both directly.

Parameters:

Name Type Description Default
user_indices Tensor

User indices, shape [B].

required

Returns:

Name Type Description
Tensor Tensor

Refined user embeddings, shape [B, d].

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/simrec.py
def forward(self, user_indices: Tensor) -> Tensor:
    """Apply MLP student to user embeddings.

    # Eq 5 — h^(s)_i = FC_{L'}(h_bar^(s)_i)

    This signature satisfies the Lightning/WarpRec abstract requirement.
    For item encoding use _encode_item; training_step calls both directly.

    Args:
        user_indices (Tensor): User indices, shape [B].

    Returns:
        Tensor: Refined user embeddings, shape [B, d].
    """
    return self._encode_user(user_indices)

predict(user_indices, *args, item_indices=None, **kwargs)

Score users against items using the trained MLP student.

Eq 2 — y_{i,j} = h_i^T h_j, h_i = M-Embed(h_bar_i)

Parameters:

Name Type Description Default
user_indices Tensor

User indices, shape [batch_size].

required
*args Any

Variable length argument list.

()
item_indices Optional[Tensor]

Optional item indices, shape [batch_size, k]. If None, scores all items.

None
**kwargs Any

Arbitrary keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

Score tensor, shape [batch_size, n_items] or [batch_size, k].

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/simrec.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Score users against items using the trained MLP student.

    # Eq 2 — y_{i,j} = h_i^T h_j,  h_i = M-Embed(h_bar_i)

    Args:
        user_indices (Tensor): User indices, shape [batch_size].
        *args (Any): Variable length argument list.
        item_indices (Optional[Tensor]): Optional item indices, shape [batch_size, k].
            If None, scores all items.
        **kwargs (Any): Arbitrary keyword arguments.

    Returns:
        Tensor: Score tensor, shape [batch_size, n_items] or [batch_size, k].
    """
    user_emb = self._encode_user(user_indices)  # [B, d]

    if item_indices is None:
        # Full prediction: score against all items
        all_item_emb = self.mlp(
            self.item_embedding.weight[: self.n_items]
        )  # [n_items, d]
        return torch.matmul(user_emb, all_item_emb.T)  # [B, n_items]

    # Sampled prediction
    item_emb = self.mlp(self.item_embedding(item_indices))  # [B, k, d]
    return torch.einsum("be,bse->bs", user_emb, item_emb)  # [B, k]

training_step(batch, batch_idx)

Combined teacher pre-training and student KD training.

Phase 1 (epochs 0..teacher_epochs-1): Only update the GCN teacher via BPR. Phase 2 (epochs teacher_epochs..total): Freeze teacher, update student with the full SimRec objective L^(s) = Lrec + lambda1L1 + lambda2L2 + lambda3L3 + lambda4L4.

Algorithm 1 — SimRec learning procedure
Eq 12 — L^(s) = Lrec + lambda1L1 + lambda2L2 + lambda3L3 + lambda4L4
Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/simrec.py
def training_step(self, batch: Any, batch_idx: int) -> Tensor:
    """Combined teacher pre-training and student KD training.

    Phase 1 (epochs 0..teacher_epochs-1): Only update the GCN teacher via BPR.
    Phase 2 (epochs teacher_epochs..total): Freeze teacher, update student with
        the full SimRec objective L^(s) = Lrec + lambda1*L1 + lambda2*L2 + lambda3*L3 + lambda4*L4.

    # Algorithm 1 — SimRec learning procedure
    # Eq 12 — L^(s) = Lrec + lambda1*L1 + lambda2*L2 + lambda3*L3 + lambda4*L4
    """
    user, pos_item, neg_item = batch

    current_epoch = self.current_epoch  # provided by Lightning

    # ---- Phase 1: Teacher pre-training ----
    if current_epoch < self.teacher_epochs:
        # Algorithm 1, line 2 — Train GCN teacher until convergence
        teacher_loss = self._pretrain_teacher_step(user, pos_item, neg_item)
        self.log(
            "teacher_loss",
            teacher_loss,
            prog_bar=True,
            on_step=False,
            on_epoch=True,
        )
        # Return a zero-grad placeholder loss to satisfy Lightning (student params untouched)
        # SIMPLIFICATION: Return a zero tensor with grad to keep Lightning happy.
        dummy: Tensor = torch.tensor(0.0, device=user.device, requires_grad=True)
        for p in self.mlp.parameters():
            dummy = dummy + p.sum() * 0
        for p in self.user_embedding.parameters():
            dummy = dummy + p.sum() * 0
        return dummy

    # ---- Phase 2: Student KD training (teacher frozen) ----
    # Mark teacher as pre-trained and freeze its parameters
    if not self._teacher_pretrained:
        for param in self.teacher.parameters():
            param.requires_grad_(False)
        self._teacher_pretrained = True

    # Algorithm 1, line 4 — mini-batch T2 drawn from E
    # T2 = {(u_i, v_j)} from the batch (we have user + pos_item)
    user_idx = user
    item_idx = pos_item

    # Student embeddings (requires_grad=True for adaptive weight computation)
    user_s = self._encode_user(user_idx)  # h^(s)_i, Eq 5
    item_s = self._encode_item(item_idx)  # h^(s)_j, Eq 5

    # Retain grad for adaptive weight computation (Eq 10)
    user_s.retain_grad()
    item_s.retain_grad()

    # Teacher forward (no grad)
    with torch.no_grad():
        teacher_sum, teacher_layers = self.teacher()

    teacher_user_emb = teacher_sum[: self.n_users]
    teacher_item_emb = teacher_sum[self.n_users : self.n_users + self.n_items]

    # Eq 12 — Lrec: recommendation objective (positive pair similarity)
    # Lrec = -sum_{(u_i,v_j) in T2} y_{i,j}  (maximize positive scores)
    rec_scores = (user_s * item_s).sum(dim=-1)
    l_rec = -rec_scores.mean()

    # Algorithm 1, line 5 — sample T1 for prediction-level distillation
    # Eq 7 — L1 prediction-level KD
    l1 = self._loss_l1(user_s, teacher_user_emb, teacher_item_emb)

    # Algorithm 1, line 8 — Eq 8 embedding-level KD
    l2 = self._loss_l2(user_s, item_s, user_idx, item_idx, teacher_layers)

    # Algorithm 1, line 9 — Eq 10 adaptive weight computation
    # We compute omega using retained gradients from l_rec, l1, l2
    try:
        omega_u, omega_i = self._compute_adaptive_weight(
            user_s, item_s, l_rec, l1, l2
        )
    except (RuntimeError, ValueError):
        # ASSUMPTION: If gradient graph is broken (e.g., first step), fall back to omega=1
        omega_u = torch.ones(user_s.size(0), 1, device=user_s.device)
        omega_i = torch.ones(item_s.size(0), 1, device=item_s.device)

    # Algorithm 1, line 10 — Eq 9 adaptive contrastive regularization
    l3 = self._loss_l3(user_s, item_s, omega_u, omega_i)

    # Eq 12 — L4 weight-decay on student embedding tables
    l4 = self.reg_loss(
        self.user_embedding(user_idx),
        self.item_embedding(item_idx),
    )

    # Eq 12 — full student objective
    loss = (
        l_rec
        + self.lambda1 * l1
        + self.lambda2 * l2
        + self.lambda3 * l3
        + self.lambda4 * l4
    )

    self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
    self.log("l_rec", l_rec.detach(), on_step=False, on_epoch=True)
    self.log("l1", l1.detach(), on_step=False, on_epoch=True)
    self.log("l2", l2.detach(), on_step=False, on_epoch=True)
    self.log("l3", l3.detach(), on_step=False, on_epoch=True)
    return loss

warprec.recommenders.collaborative_filtering_recommender.graph_based.sgl.SGL

Bases: IterativeRecommender, GraphRecommenderUtils

Implementation of SGL algorithm from Self-supervised Graph Learning for Recommendation (SIGIR 2021)

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of user and item.

n_layers int

The number of graph convolution layers.

ssl_tau float

The temperature parameter for SSL loss.

ssl_reg float

The weight for SSL loss.

dropout float

The dropout rate for graph augmentation.

aug_type str

The type of graph augmentation ('ED', 'ND', 'RW').

reg_weight float

The L2 regularization weight.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Raises:

Type Description
ValueError

If aug_type is not one of the supported types.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/sgl.py
@model_registry.register(name="SGL")
class SGL(IterativeRecommender, GraphRecommenderUtils):
    """Implementation of SGL algorithm from
        Self-supervised Graph Learning for Recommendation (SIGIR 2021)

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of user and item.
        n_layers (int): The number of graph convolution layers.
        ssl_tau (float): The temperature parameter for SSL loss.
        ssl_reg (float): The weight for SSL loss.
        dropout (float): The dropout rate for graph augmentation.
        aug_type (str): The type of graph augmentation ('ED', 'ND', 'RW').
        reg_weight (float): The L2 regularization weight.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.

    Raises:
        ValueError: If aug_type is not one of the supported types.
    """

    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Hyperparameters
    embedding_size: int
    n_layers: int
    ssl_tau: float
    ssl_reg: float
    dropout: float
    aug_type: str
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # Validation
        self.aug_type = self.aug_type.upper()
        if self.aug_type not in ["ED", "ND", "RW"]:
            raise ValueError(
                f"Invalid aug_type: {self.aug_type}. "
                "Supported types: 'ED' (Edge Dropout), 'ND' (Node Dropout), 'RW' (Random Walk)."
            )

        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # Graph Construction
        # We keep the original adjacency matrix for the main task
        self.adj = self.get_adj_mat(
            interactions.get_sparse().tocoo(),
            self.n_users,
            self.n_items + 1,
            normalize=True,
        )

        # Pre-fetch COO representation for fast augmentation.
        # We cache the indices on the device for the augmentation methods
        row, col, _ = self.adj.coo()
        self.adj_row = row
        self.adj_col = col
        self.adj_size = self.adj.sparse_sizes()

        # Initialize weights
        self.apply(self._init_weights)

        # Losses
        self.bpr_loss = BPRLoss()
        self.reg_loss = EmbLoss()
        self.nce_loss = InfoNCELoss(temperature=self.ssl_tau)

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def _graph_augmentation(self) -> SparseTensor:
        """Fast generation of Edge Dropout view using cached indices."""
        if self.dropout == 0:
            return self.adj

        # Generate mask on the device directly
        num_edges = self.adj_row.size(0)
        keep_mask = torch.rand(num_edges, device=self.device) > self.dropout

        # Apply mask to cached indices
        new_row = self.adj_row[keep_mask]
        new_col = self.adj_col[keep_mask]

        # Apply mask to values
        _, _, vals = self.adj.coo()
        new_val = vals[keep_mask]

        # Create temporary SparseTensor before normalization
        temp_adj = SparseTensor(
            row=new_row,
            col=new_col,
            value=new_val,
            sparse_sizes=self.adj_size,
            is_sorted=True,
        )

        # Normalize the new adjacency matrix
        row_sum = temp_adj.sum(dim=1)  # D
        d_inv_sqrt = row_sum.pow(-0.5)  # D^-0.5
        d_inv_sqrt.masked_fill_(
            d_inv_sqrt == float("inf"), 0.0
        )  # Handle division by zero

        # Get the normalization values
        d_mat_rows = d_inv_sqrt[new_row]
        d_mat_cols = d_inv_sqrt[new_col]
        new_norm_vals = d_mat_rows * d_mat_cols

        # Fast construction using is_sorted=True
        return SparseTensor(
            row=new_row,
            col=new_col,
            value=new_norm_vals,
            sparse_sizes=self.adj_size,
            is_sorted=True,
        )

    def _node_dropout_mask(self, num_nodes: int) -> Tensor:
        """Generates a mask for Node Dropout (ND)."""
        if self.dropout == 0:
            return torch.ones(num_nodes, 1, device=self.device)
        return (torch.rand(num_nodes, 1, device=self.device) > self.dropout).float()

    def forward(
        self, adj: SparseTensor, augment: bool = False
    ) -> Tuple[Tensor, Tensor]:
        """Forward pass with optional augmentation logic."""
        ego_u = self.user_embedding.weight
        ego_i = self.item_embedding.weight
        ego_all = torch.cat([ego_u, ego_i], dim=0)

        # Node Dropout (ND) applied to features
        if augment and self.aug_type == "ND":
            mask = self._node_dropout_mask(ego_all.size(0))
            ego_all = ego_all * mask

        embeddings_list = [ego_all]

        # Ensure adj is on device
        if adj.device() != self.device:
            adj = adj.to(self.device)

        curr_adj = adj

        for _ in range(self.n_layers):
            # Random Walk (RW): New graph structure at EACH layer
            if augment and self.aug_type == "RW":
                curr_adj = self._graph_augmentation()

            # Propagation
            next_emb = curr_adj.matmul(embeddings_list[-1])
            embeddings_list.append(next_emb)

        # Aggregation (Mean)
        final_embeddings = torch.stack(embeddings_list, dim=0).mean(dim=0)

        user_final, item_final = torch.split(
            final_embeddings, [self.n_users, self.n_items + 1]
        )
        return user_final, item_final

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, pos_item, neg_item = batch

        # Get propagated embeddings
        user_all_embeddings, item_all_embeddings = self.forward(self.adj, augment=False)

        # Get embeddings for current batch users and items
        u_embeddings = user_all_embeddings[user]
        pos_embeddings = item_all_embeddings[pos_item]
        neg_embeddings = item_all_embeddings[neg_item]

        # Calculate BPR loss
        pos_scores = torch.mul(u_embeddings, pos_embeddings).sum(dim=1)
        neg_scores = torch.mul(u_embeddings, neg_embeddings).sum(dim=1)
        bpr_loss = self.bpr_loss(pos_scores, neg_scores)

        # Calculate SSL loss
        ssl_loss = torch.tensor(0.0, device=self.device)

        if self.ssl_reg > 0:
            # View 1 Generation
            if self.aug_type == "ED":
                adj_v1 = self._graph_augmentation()
                user_v1, item_v1 = self.forward(adj_v1, augment=False)
            else:
                # ND and RW handle augmentation internally
                user_v1, item_v1 = self.forward(self.adj, augment=True)

            # View 2 Generation
            if self.aug_type == "ED":
                adj_v2 = self._graph_augmentation()
                user_v2, item_v2 = self.forward(adj_v2, augment=False)
            else:
                user_v2, item_v2 = self.forward(self.adj, augment=True)

            # Calculate InfoNCE loss
            # Users
            loss_u = self.nce_loss(user_v1[user], user_v2[user])

            # Items (Positive items only)
            loss_i = self.nce_loss(item_v1[pos_item], item_v2[pos_item])

            ssl_loss = self.ssl_reg * (loss_u + loss_i)

        # Calculate L2 loss
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user),
            self.item_embedding(pos_item),
            self.item_embedding(neg_item),
        )

        # Loss logging
        loss = bpr_loss + ssl_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Retrieve all user and item embeddings from the propagation network
        user_all_embeddings, item_all_embeddings = self.forward(self.adj)

        # Get the embeddings for the specific users in the batch
        user_embeddings = user_all_embeddings[
            user_indices
        ]  # [batch_size, embedding_size]

        if item_indices is None:
            # Case 'full': prediction on all items
            item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
            einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
        else:
            # Case 'sampled': prediction on a sampled set of items
            item_embeddings = item_all_embeddings[
                item_indices
            ]  # [batch_size, pad_seq, embedding_size]
            einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

        predictions = torch.einsum(
            einsum_string, user_embeddings, item_embeddings
        )  # [batch_size, n_items] or [batch_size, pad_seq]
        return predictions

forward(adj, augment=False)

Forward pass with optional augmentation logic.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/sgl.py
def forward(
    self, adj: SparseTensor, augment: bool = False
) -> Tuple[Tensor, Tensor]:
    """Forward pass with optional augmentation logic."""
    ego_u = self.user_embedding.weight
    ego_i = self.item_embedding.weight
    ego_all = torch.cat([ego_u, ego_i], dim=0)

    # Node Dropout (ND) applied to features
    if augment and self.aug_type == "ND":
        mask = self._node_dropout_mask(ego_all.size(0))
        ego_all = ego_all * mask

    embeddings_list = [ego_all]

    # Ensure adj is on device
    if adj.device() != self.device:
        adj = adj.to(self.device)

    curr_adj = adj

    for _ in range(self.n_layers):
        # Random Walk (RW): New graph structure at EACH layer
        if augment and self.aug_type == "RW":
            curr_adj = self._graph_augmentation()

        # Propagation
        next_emb = curr_adj.matmul(embeddings_list[-1])
        embeddings_list.append(next_emb)

    # Aggregation (Mean)
    final_embeddings = torch.stack(embeddings_list, dim=0).mean(dim=0)

    user_final, item_final = torch.split(
        final_embeddings, [self.n_users, self.n_items + 1]
    )
    return user_final, item_final

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/sgl.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Retrieve all user and item embeddings from the propagation network
    user_all_embeddings, item_all_embeddings = self.forward(self.adj)

    # Get the embeddings for the specific users in the batch
    user_embeddings = user_all_embeddings[
        user_indices
    ]  # [batch_size, embedding_size]

    if item_indices is None:
        # Case 'full': prediction on all items
        item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
        einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
    else:
        # Case 'sampled': prediction on a sampled set of items
        item_embeddings = item_all_embeddings[
            item_indices
        ]  # [batch_size, pad_seq, embedding_size]
        einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

    predictions = torch.einsum(
        einsum_string, user_embeddings, item_embeddings
    )  # [batch_size, n_items] or [batch_size, pad_seq]
    return predictions

warprec.recommenders.collaborative_filtering_recommender.graph_based.ultragcn.UltraGCN

Bases: IterativeRecommender, GraphRecommenderUtils

Implementation of UltraGCN algorithm from "UltraGCN: Ultra Simplification of Graph Convolutional Networks for Recommendation" (CIKM 2021).

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of user and item.

w_lambda float

Weight for the User-Item Constraint Loss (L_C).

w_gamma float

Weight for the Item-Item Constraint Loss (L_I).

w_neg float

Weight for negative samples in the constraint loss.

ii_k int

Number of neighbors (K) for the Item-Item graph construction.

reg_weight float

The L2 regularization weight.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/ultragcn.py
@model_registry.register(name="UltraGCN")
class UltraGCN(IterativeRecommender, GraphRecommenderUtils):
    """Implementation of UltraGCN algorithm from
    "UltraGCN: Ultra Simplification of Graph Convolutional Networks for Recommendation" (CIKM 2021).

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of user and item.
        w_lambda (float): Weight for the User-Item Constraint Loss (L_C).
        w_gamma (float): Weight for the Item-Item Constraint Loss (L_I).
        w_neg (float): Weight for negative samples in the constraint loss.
        ii_k (int): Number of neighbors (K) for the Item-Item graph construction.
        reg_weight (float): The L2 regularization weight.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Hyperparameters
    embedding_size: int
    w_lambda: float
    w_gamma: float
    w_neg: float
    ii_k: int
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # Initialize weights
        self.apply(self._init_weights)

        # Initialize Regularization Loss
        self.reg_loss = EmbLoss()

        # Pre-compute Constraint Weights (Beta) and Item-Item Graph (Omega)
        self._prepare_constraints(interactions)

    def _prepare_constraints(self, interactions: Interactions):
        """Pre-computes the weights required for the constraint losses."""
        # Get sparse interaction matrix (User x Item)
        R: csr_matrix = interactions.get_sparse()

        # Compute Degrees
        # NOTE: We add +1 to avoid division by zero
        user_degree = np.array(R.sum(axis=1)).squeeze() + 1
        item_degree = np.array(R.sum(axis=0)).squeeze() + 1

        # Optimization: Pre-compute sqrt(degree) to save ops in training loop
        # Beta calculation involves sqrt(d_u) and sqrt(d_i)
        self.register_buffer(
            "user_degree_pow",
            torch.from_numpy(user_degree).pow(-1.0).float().to(self.device),
        )  # 1/d_u
        self.register_buffer(
            "user_sqrt_deg",
            torch.from_numpy(user_degree).sqrt().float().to(self.device),
        )  # sqrt(d_u)
        self.register_buffer(
            "item_sqrt_deg",
            torch.from_numpy(item_degree).sqrt().float().to(self.device),
        )  # sqrt(d_i)

        # Construct Item-Item Graph (L_I)
        # Calculate Co-occurrence: G = R^T * R
        G = R.transpose().dot(R)

        # Set diagonal to 0
        G.setdiag(0)
        G.eliminate_zeros()

        # Calculate Item Degrees in G
        g_degree = np.array(G.sum(axis=1)).squeeze() + 1

        # Select Top-K neighbors for each item
        ii_neighbors = []
        ii_weights = []

        for i in range(self.n_items):
            row_start = G.indptr[i]
            row_end = G.indptr[i + 1]

            if row_end - row_start == 0:
                ii_neighbors.append([0] * self.ii_k)
                ii_weights.append([0.0] * self.ii_k)
                continue

            cols = G.indices[row_start:row_end]
            data = G.data[row_start:row_end]

            if len(data) > self.ii_k:
                top_k_idx = np.argpartition(data, -self.ii_k)[-self.ii_k :]
                cols = cols[top_k_idx]
                data = data[top_k_idx]

            g_i = g_degree[i]
            g_j = g_degree[cols]

            omega = (data / g_i) * np.sqrt(g_i / g_j)

            if len(cols) < self.ii_k:
                pad_len = self.ii_k - len(cols)
                cols = np.pad(
                    cols, (0, pad_len), "constant", constant_values=self.n_items
                )
                omega = np.pad(omega, (0, pad_len), "constant")

            ii_neighbors.append(cols)
            ii_weights.append(omega)

        # Convert lists to arrays before creating the buffers
        ii_neighbors_np = np.array(ii_neighbors)
        ii_weights_np = np.array(ii_weights)

        self.register_buffer(
            "ii_neighbors",
            torch.tensor(ii_neighbors_np, dtype=torch.long).to(self.device),
        )
        self.register_buffer(
            "ii_weights", torch.tensor(ii_weights_np, dtype=torch.float).to(self.device)
        )

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def forward(self) -> Tuple[Tensor, Tensor]:
        """Forward pass just returns the embeddings."""
        return self.user_embedding.weight, self.item_embedding.weight

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, pos_item, neg_item = batch

        user_emb = self.user_embedding(user)
        pos_item_emb = self.item_embedding(pos_item)
        neg_item_emb = self.item_embedding(neg_item)

        # Calculate scores
        pos_scores = torch.mul(user_emb, pos_item_emb).sum(dim=1)
        neg_scores = torch.mul(user_emb, neg_item_emb).sum(dim=1)

        # Main Objective (L_O)
        # -log(sigmoid(pos)) - log(sigmoid(-neg))
        loss_O = -F.logsigmoid(pos_scores).mean() - F.logsigmoid(-neg_scores).mean()

        # Constraint Loss (L_C)
        # Beta = (1/d_u) * sqrt(d_u) * (1/sqrt(d_i)) = (1/sqrt(d_u)) * (1/sqrt(d_i))

        # Common term for user part of Beta
        beta_user_part = self.user_degree_pow[user] * self.user_sqrt_deg[user]  # type: ignore[index]

        beta_pos = beta_user_part / self.item_sqrt_deg[pos_item]  # type: ignore[index]
        beta_neg = beta_user_part / self.item_sqrt_deg[neg_item]  # type: ignore[index]

        loss_C = (
            -(beta_pos * F.logsigmoid(pos_scores)).mean()
            - (beta_neg * F.logsigmoid(-neg_scores)).mean() * self.w_neg
        )

        # Item-Item Constraint Loss (L_I)
        ii_neighbors = self.ii_neighbors[pos_item]  # type: ignore[index]
        ii_weights = self.ii_weights[pos_item]  # type: ignore[index]
        neighbor_emb = self.item_embedding(ii_neighbors)

        # Dot product [batch_size, 1, embedding_size] * [batch_size, K, embedding_size] -> [batch_size, k]
        ii_scores = torch.mul(pos_item_emb.unsqueeze(1), neighbor_emb).sum(dim=2)
        loss_I = -(ii_weights * F.logsigmoid(ii_scores)).sum(dim=1).mean()

        # Calculate L2 loss
        reg_loss = self.reg_weight * self.reg_loss(user_emb, pos_item_emb, neg_item_emb)

        # Loss logging
        loss = loss_O + (self.w_lambda * loss_C) + (self.w_gamma * loss_I) + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Retrieve user embeddings
        user_embeddings = self.user_embedding(
            user_indices
        )  # [batch_size, embedding_size]

        if item_indices is None:
            # Case 'full': prediction on all items
            item_embeddings = self.item_embedding.weight[
                :-1, :
            ]  # [n_items, embedding_size]
            einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
        else:
            # Case 'sampled': prediction on a sampled set of items
            item_embeddings = self.item_embedding(
                item_indices
            )  # [batch_size, pad_seq, embedding_size]
            einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

        # Common prediction step
        predictions = torch.einsum(
            einsum_string, user_embeddings, item_embeddings
        )  # [batch_size, n_items] or [batch_size, pad_seq]
        return predictions

forward()

Forward pass just returns the embeddings.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/ultragcn.py
def forward(self) -> Tuple[Tensor, Tensor]:
    """Forward pass just returns the embeddings."""
    return self.user_embedding.weight, self.item_embedding.weight

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/ultragcn.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Retrieve user embeddings
    user_embeddings = self.user_embedding(
        user_indices
    )  # [batch_size, embedding_size]

    if item_indices is None:
        # Case 'full': prediction on all items
        item_embeddings = self.item_embedding.weight[
            :-1, :
        ]  # [n_items, embedding_size]
        einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
    else:
        # Case 'sampled': prediction on a sampled set of items
        item_embeddings = self.item_embedding(
            item_indices
        )  # [batch_size, pad_seq, embedding_size]
        einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

    # Common prediction step
    predictions = torch.einsum(
        einsum_string, user_embeddings, item_embeddings
    )  # [batch_size, n_items] or [batch_size, pad_seq]
    return predictions

warprec.recommenders.collaborative_filtering_recommender.graph_based.xsimgcl.XSimGCL

Bases: IterativeRecommender, GraphRecommenderUtils

Implementation of XSimGCL algorithm from XSimGCL: Towards Extremely Simple Graph Contrastive Learning for Recommendation (TKDE 2023).

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of user and item.

n_layers int

The number of graph convolution layers.

lambda_ float

Coefficient for contrastive loss.

eps float

Perturbation noise scale

temperature float

Temperature for InfoNCE loss.

layer_cl int

Layer to pick for contrastive learning.

reg_weight float

The L2 regularization weight.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/xsimgcl.py
@model_registry.register(name="XSimGCL")
class XSimGCL(IterativeRecommender, GraphRecommenderUtils):
    """Implementation of XSimGCL algorithm from
        XSimGCL: Towards Extremely Simple Graph Contrastive Learning for Recommendation (TKDE 2023).

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of user and item.
        n_layers (int): The number of graph convolution layers.
        lambda_ (float): Coefficient for contrastive loss.
        eps (float): Perturbation noise scale
        temperature (float): Temperature for InfoNCE loss.
        layer_cl (int): Layer to pick for contrastive learning.
        reg_weight (float): The L2 regularization weight.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Hyperparameters
    embedding_size: int
    n_layers: int
    lambda_: float
    eps: float
    temperature: float
    layer_cl: int
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # Initialize Embeddings
        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        self.apply(self._init_weights)

        # Normalize Adjacency Matrix explicitly (Symmetric Normalization)
        self.adj = self.get_adj_mat(
            interactions.get_sparse().tocoo(),
            self.n_users,
            self.n_items + 1,
            normalize=True,
        )

        # Initialize Losses
        self.bpr_loss = BPRLoss()
        self.reg_loss = EmbLoss()
        self.nce_loss = InfoNCELoss(self.temperature)

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def _perturb_embedding(self, embedding: Tensor) -> Tensor:
        """Adds noise to embeddings: E' = E + eps * normalize(noise)."""
        noise = torch.rand_like(embedding)
        noise = F.normalize(noise, p=2, dim=1)
        return embedding + (self.eps * noise)

    def forward(
        self, perturbed: bool = False
    ) -> Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]:
        """Propagates embeddings through the graph.

        Args:
            perturbed (bool): If True, adds noise during propagation for CL.

        Returns:
            Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]: Tuple containing:
            - user_final_emb, item_final_emb: The averaged embeddings for prediction.
            - user_cl_emb, item_cl_emb: The specific layer embeddings for CL (if perturbed=True).
        """
        ego_embeddings = self.get_ego_embeddings(
            self.user_embedding, self.item_embedding
        )

        # Ensure adj is on the same device
        if self.adj.device() != ego_embeddings.device:
            self.adj = self.adj.to(ego_embeddings.device)

        final_embeddings = ego_embeddings.clone()
        cl_embeddings = None
        current_embeddings = ego_embeddings

        for layer_idx in range(self.n_layers):
            # Graph Convolution: E(l) = A * E(l-1)
            # SparseTensor matmul is efficient
            current_embeddings = self.adj.matmul(current_embeddings)

            # XSimGCL Perturbation
            if perturbed:
                current_embeddings = self._perturb_embedding(current_embeddings)

            # Accumulate sum for final representation
            final_embeddings.add_(current_embeddings)

            # Capture specific layer for Contrastive Learning
            if layer_idx == (self.layer_cl - 1):
                cl_embeddings = current_embeddings

        # Mean Pooling: 1/(L+1) * sum(E_0 ... E_L)
        final_embeddings.div_(self.n_layers + 1)

        # Split into user and item embeddings
        user_final, item_final = torch.split(
            final_embeddings, [self.n_users, self.n_items + 1]
        )

        user_cl, item_cl = None, None
        if cl_embeddings is not None:
            user_cl, item_cl = torch.split(
                cl_embeddings, [self.n_users, self.n_items + 1]
            )

        return user_final, item_final, user_cl, item_cl

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, pos_item, neg_item = batch

        # Forward Pass with Perturbation
        # We get both final embeddings (for BPR) and CL embeddings (for InfoNCE)
        users_final, items_final, users_cl, items_cl = self.forward(perturbed=True)

        # Get embeddings for current batch
        batch_users = users_final[user]
        batch_pos = items_final[pos_item]
        batch_neg = items_final[neg_item]

        # Calculate scores
        pos_scores = (batch_users * batch_pos).sum(dim=1)
        neg_scores = (batch_users * batch_neg).sum(dim=1)

        bpr_loss = self.bpr_loss(pos_scores, neg_scores)

        # Calculate loss between the final view and the specific layer view
        cl_loss_user = self.nce_loss(users_final[user], users_cl[user])
        cl_loss_item = self.nce_loss(items_final[pos_item], items_cl[pos_item])
        cl_loss = self.lambda_ * (cl_loss_user + cl_loss_item)

        # Regularize initial (ego) embeddings, not the propagated ones
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user),
            self.item_embedding(pos_item),
            self.item_embedding(neg_item),
        )

        # Loss logging
        loss = bpr_loss + cl_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Retrieve all user and item embeddings from the propagation network
        user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

        # Get the embeddings for the specific users in the batch
        user_embeddings = user_all_embeddings[
            user_indices
        ]  # [batch_size, embedding_size]

        if item_indices is None:
            # Case 'full': prediction on all items
            item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
            einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
        else:
            # Case 'sampled': prediction on a sampled set of items
            item_embeddings = item_all_embeddings[
                item_indices
            ]  # [batch_size, pad_seq, embedding_size]
            einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

        predictions = torch.einsum(
            einsum_string, user_embeddings, item_embeddings
        )  # [batch_size, n_items] or [batch_size, pad_seq]
        return predictions

forward(perturbed=False)

Propagates embeddings through the graph.

Parameters:

Name Type Description Default
perturbed bool

If True, adds noise during propagation for CL.

False

Returns:

Type Description
Tensor

Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]: Tuple containing:

Tensor
  • user_final_emb, item_final_emb: The averaged embeddings for prediction.
Optional[Tensor]
  • user_cl_emb, item_cl_emb: The specific layer embeddings for CL (if perturbed=True).
Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/xsimgcl.py
def forward(
    self, perturbed: bool = False
) -> Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]:
    """Propagates embeddings through the graph.

    Args:
        perturbed (bool): If True, adds noise during propagation for CL.

    Returns:
        Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]: Tuple containing:
        - user_final_emb, item_final_emb: The averaged embeddings for prediction.
        - user_cl_emb, item_cl_emb: The specific layer embeddings for CL (if perturbed=True).
    """
    ego_embeddings = self.get_ego_embeddings(
        self.user_embedding, self.item_embedding
    )

    # Ensure adj is on the same device
    if self.adj.device() != ego_embeddings.device:
        self.adj = self.adj.to(ego_embeddings.device)

    final_embeddings = ego_embeddings.clone()
    cl_embeddings = None
    current_embeddings = ego_embeddings

    for layer_idx in range(self.n_layers):
        # Graph Convolution: E(l) = A * E(l-1)
        # SparseTensor matmul is efficient
        current_embeddings = self.adj.matmul(current_embeddings)

        # XSimGCL Perturbation
        if perturbed:
            current_embeddings = self._perturb_embedding(current_embeddings)

        # Accumulate sum for final representation
        final_embeddings.add_(current_embeddings)

        # Capture specific layer for Contrastive Learning
        if layer_idx == (self.layer_cl - 1):
            cl_embeddings = current_embeddings

    # Mean Pooling: 1/(L+1) * sum(E_0 ... E_L)
    final_embeddings.div_(self.n_layers + 1)

    # Split into user and item embeddings
    user_final, item_final = torch.split(
        final_embeddings, [self.n_users, self.n_items + 1]
    )

    user_cl, item_cl = None, None
    if cl_embeddings is not None:
        user_cl, item_cl = torch.split(
            cl_embeddings, [self.n_users, self.n_items + 1]
        )

    return user_final, item_final, user_cl, item_cl

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/graph_based/xsimgcl.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Retrieve all user and item embeddings from the propagation network
    user_all_embeddings, item_all_embeddings = self.propagate_embeddings()

    # Get the embeddings for the specific users in the batch
    user_embeddings = user_all_embeddings[
        user_indices
    ]  # [batch_size, embedding_size]

    if item_indices is None:
        # Case 'full': prediction on all items
        item_embeddings = item_all_embeddings[:-1, :]  # [n_items, embedding_size]
        einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
    else:
        # Case 'sampled': prediction on a sampled set of items
        item_embeddings = item_all_embeddings[
            item_indices
        ]  # [batch_size, pad_seq, embedding_size]
        einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

    predictions = torch.einsum(
        einsum_string, user_embeddings, item_embeddings
    )  # [batch_size, n_items] or [batch_size, pad_seq]
    return predictions

KNN

warprec.recommenders.collaborative_filtering_recommender.knn.itemknn.ItemKNN

Bases: ItemSimRecommender

Implementation of ItemKNN algorithm from Amazon.com recommendations: item-to-item collaborative filtering 2003.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
k int

Number of nearest neighbors.

similarity str

Similarity measure.

Source code in warprec/recommenders/collaborative_filtering_recommender/knn/itemknn.py
@model_registry.register(name="ItemKNN")
class ItemKNN(ItemSimRecommender):
    """Implementation of ItemKNN algorithm from
        Amazon.com recommendations: item-to-item collaborative filtering 2003.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        k (int): Number of nearest neighbors.
        similarity (str): Similarity measure.
    """

    k: int
    similarity: str

    @classmethod
    def estimate_space(
        cls,
        params: dict,
        info: dict,
        interactions: Optional[Interactions] = None,
        **kwargs: Any,
    ) -> dict:
        interactions = cls._require_interactions_for_estimate(
            interactions, cls.__name__
        )
        X = interactions.get_sparse()
        n_items = info["n_items"]

        train_matrix_mb = cls._sparse_size_mb(X)
        similarity_matrix_mb = cls._dense_size_mb((n_items, n_items), X.dtype)

        return {
            "train_ram_mb": train_matrix_mb + similarity_matrix_mb,
            "notes": "ItemKNN analytical train-space estimate",
        }

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, interactions, *args, seed=seed, **kwargs)

        X = self.train_matrix
        similarity = similarities_registry.get(self.similarity)

        # Compute similarity matrix
        sim_matrix = torch.from_numpy(similarity.compute(X.T))

        # Compute top_k filtering
        filtered_sim_matrix = self._apply_topk_filtering(sim_matrix, self.k)

        # Update item_similarity
        self.item_similarity = filtered_sim_matrix.numpy()

warprec.recommenders.collaborative_filtering_recommender.knn.itemknntd.ItemKNNTD

Bases: ItemKNN

Implementation of ItemKNN with Temporal Decay (ItemKNN-TD). from Time Weight Collaborative Filtering (CIKM 2005).

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
k int

Number of nearest neighbors.

similarity str

Similarity measure.

beta float

The decay rate parameter. A higher beta means older interactions decay faster.

Raises:

Type Description
ValueError

If the timestamp column is not found in the dataset, as ItemKNNTD requires timestamps to compute temporal decay.

Source code in warprec/recommenders/collaborative_filtering_recommender/knn/itemknntd.py
@model_registry.register(name="ItemKNNTD")
class ItemKNNTD(ItemKNN):
    """Implementation of ItemKNN with Temporal Decay (ItemKNN-TD).
    from Time Weight Collaborative Filtering (CIKM 2005).

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        k (int): Number of nearest neighbors.
        similarity (str): Similarity measure.
        beta (float): The decay rate parameter. A higher beta means older
            interactions decay faster.

    Raises:
        ValueError: If the timestamp column is not found in the dataset, as
            ItemKNNTD requires timestamps to compute temporal decay.
    """

    k: int
    similarity: str
    beta: float

    @classmethod
    def estimate_space(
        cls,
        params: dict,
        info: dict,
        interactions: Optional[Interactions] = None,
        **kwargs: Any,
    ) -> dict:
        interactions = cls._require_interactions_for_estimate(
            interactions, cls.__name__
        )
        timestamps = interactions.get_flat()[3]
        if timestamps is None:
            raise ValueError("ItemKNNTD requires timestamps to estimate space.")

        base_estimate = ItemKNN.estimate_space(
            params=params, info=info, interactions=interactions, **kwargs
        )
        return {
            "train_ram_mb": base_estimate["train_ram_mb"],
            "notes": "ItemKNNTD analytical train-space estimate",
        }

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, interactions, *args, seed=seed, **kwargs)

        # Retrieve flat user-item interactions
        users, items, values, timestamps = interactions.get_flat()

        # Extract the data and check for timestamp column
        df = interactions.get_df()
        if interactions.timestamp_label not in df.columns:
            raise ValueError(
                f"Timestamp column '{interactions.timestamp_label}' not found in the dataset. "
                "ItemKNNTD requires timestamps to compute temporal decay."
            )

        # Compute the temporal decay weights
        referring_timestamp = np.max(timestamps)

        diff_seconds = referring_timestamp - timestamps
        diff_days = diff_seconds / (24 * 3600)

        # Formula: decay = exp(-(beta * ndays))
        decay_weights = np.exp(-(self.beta * diff_days))
        decayed_values = values * decay_weights

        # Save the trained matrix as sparse
        self.train_matrix = sp.csr_matrix(
            (decayed_values, (users, items)), shape=(self.n_users, self.n_items)
        )

warprec.recommenders.collaborative_filtering_recommender.knn.userknn.UserKNN

Bases: Recommender

Implementation of UserKNN algorithm from GroupLens: an open architecture for collaborative filtering of netnews 1994.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
k int

Number of nearest neighbors.

similarity str

Similarity measure.

Source code in warprec/recommenders/collaborative_filtering_recommender/knn/userknn.py
@model_registry.register(name="UserKNN")
class UserKNN(Recommender):
    """Implementation of UserKNN algorithm from
        GroupLens: an open architecture for collaborative filtering of netnews 1994.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        k (int): Number of nearest neighbors.
        similarity (str): Similarity measure.
    """

    k: int
    similarity: str

    @classmethod
    def estimate_space(
        cls,
        params: dict,
        info: dict,
        interactions: Optional[Interactions] = None,
        **kwargs: Any,
    ) -> dict:
        interactions = cls._require_interactions_for_estimate(
            interactions, cls.__name__
        )
        X = interactions.get_sparse()
        n_users = info["n_users"]

        train_matrix_mb = cls._sparse_size_mb(X)
        similarity_matrix_mb = cls._dense_size_mb((n_users, n_users), X.dtype)

        return {
            "train_ram_mb": train_matrix_mb + similarity_matrix_mb,
            "notes": "UserKNN analytical train-space estimate",
        }

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # Store the training matrix for prediction
        self.train_matrix = interactions.get_sparse()

        X = self.train_matrix
        similarity = similarities_registry.get(self.similarity)

        # Compute similarity matrix
        sim_matrix = torch.from_numpy(similarity.compute(X))

        # Compute top_k filtering
        filtered_sim_matrix = self._apply_topk_filtering(sim_matrix, self.k)

        # Update item_similarity
        self.user_similarity = filtered_sim_matrix.numpy()

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction in the form of B@X where B is a {user x user} similarity matrix.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Compute predictions and convert to Tensor
        predictions = self.user_similarity[user_indices.cpu(), :] @ self.train_matrix
        predictions = torch.from_numpy(predictions)

        if item_indices is None:
            # Case 'full': prediction on all items
            return predictions  # [batch_size, n_items]

        # Case 'sampled': prediction on a sampled set of items
        return predictions.gather(
            1,
            item_indices.to(predictions.device).clamp(
                max=self.n_items - 1
            ),  # [batch_size, pad_seq]
        )

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction in the form of B@X where B is a {user x user} similarity matrix.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/knn/userknn.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction in the form of B@X where B is a {user x user} similarity matrix.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Compute predictions and convert to Tensor
    predictions = self.user_similarity[user_indices.cpu(), :] @ self.train_matrix
    predictions = torch.from_numpy(predictions)

    if item_indices is None:
        # Case 'full': prediction on all items
        return predictions  # [batch_size, n_items]

    # Case 'sampled': prediction on a sampled set of items
    return predictions.gather(
        1,
        item_indices.to(predictions.device).clamp(
            max=self.n_items - 1
        ),  # [batch_size, pad_seq]
    )

warprec.recommenders.collaborative_filtering_recommender.knn.userknntd.UserKNNTD

Bases: UserKNN

Implementation of UserKNN with Temporal Decay (UserKNN-TD). Adapted from Time Weight Collaborative Filtering (CIKM 2005).

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
k int

Number of nearest neighbors.

similarity str

Similarity measure.

beta float

The decay rate parameter. A higher beta means older interactions decay faster.

Raises:

Type Description
ValueError

If the timestamp column is not found in the dataset, as UserKNNTD requires timestamps to compute temporal decay.

Source code in warprec/recommenders/collaborative_filtering_recommender/knn/userknntd.py
@model_registry.register(name="UserKNNTD")
class UserKNNTD(UserKNN):
    """Implementation of UserKNN with Temporal Decay (UserKNN-TD).
    Adapted from Time Weight Collaborative Filtering (CIKM 2005).

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        k (int): Number of nearest neighbors.
        similarity (str): Similarity measure.
        beta (float): The decay rate parameter. A higher beta means older
            interactions decay faster.

    Raises:
        ValueError: If the timestamp column is not found in the dataset, as
            UserKNNTD requires timestamps to compute temporal decay.
    """

    k: int
    similarity: str
    beta: float

    @classmethod
    def estimate_space(
        cls,
        params: dict,
        info: dict,
        interactions: Optional[Interactions] = None,
        **kwargs: Any,
    ) -> dict:
        interactions = cls._require_interactions_for_estimate(
            interactions, cls.__name__
        )
        timestamps = interactions.get_flat()[3]
        if timestamps is None:
            raise ValueError("UserKNNTD requires timestamps to estimate space.")

        base_estimate = UserKNN.estimate_space(
            params=params, info=info, interactions=interactions, **kwargs
        )
        return {
            "train_ram_mb": base_estimate["train_ram_mb"],
            "notes": "UserKNNTD analytical train-space estimate",
        }

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, interactions, *args, seed=seed, **kwargs)

        # Retrieve flat user-item interactions
        users, items, values, timestamps = interactions.get_flat()

        # Extract the data and check for timestamp column
        df = interactions.get_df()
        if interactions.timestamp_label not in df.columns:
            raise ValueError(
                f"Timestamp column '{interactions.timestamp_label}' not found in the dataset. "
                "UserKNNTD requires timestamps to compute temporal decay."
            )

        # Compute the temporal decay weights
        referring_timestamp = np.max(timestamps)

        diff_seconds = referring_timestamp - timestamps
        diff_days = diff_seconds / (24 * 3600)

        # Formula: decay = exp(-(beta * ndays))
        decay_weights = np.exp(-(self.beta * diff_days))
        decayed_values = values * decay_weights

        # Save the trained matrix as sparse
        self.train_matrix = sp.csr_matrix(
            (decayed_values, (users, items)), shape=(self.n_users, self.n_items)
        )

Latent Factor

warprec.recommenders.collaborative_filtering_recommender.latent_factor.admmslim.ADMMSlim

Bases: ItemSimRecommender

Implementation of ADMMSlim algorithm from ADMM SLIM: Sparse Recommendations for Many Users 2020.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
lambda_1 float

The first regularization parameter.

lambda_2 float

The second regularization parameter.

alpha float

The alpha parameter for the item means.

rho float

The rho parameter for the ADMM algorithm.

it int

The number of iterations for the ADMM algorithm.

positive_only bool

Wether or not to keep the similarity matrix positive.

center_columns bool

Wether or not to center the columns of the interactions.

Source code in warprec/recommenders/collaborative_filtering_recommender/latent_factor/admmslim.py
@model_registry.register(name="ADMMSlim")
class ADMMSlim(ItemSimRecommender):
    """Implementation of ADMMSlim algorithm from
        ADMM SLIM: Sparse Recommendations for Many Users 2020.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        lambda_1 (float): The first regularization parameter.
        lambda_2 (float): The second regularization parameter.
        alpha (float): The alpha parameter for the item means.
        rho (float): The rho parameter for the ADMM algorithm.
        it (int): The number of iterations for the ADMM algorithm.
        positive_only (bool): Wether or not to keep the similarity matrix positive.
        center_columns (bool): Wether or not to center the columns of the interactions.
    """

    lambda_1: float
    lambda_2: float
    alpha: float
    rho: float
    it: int
    positive_only: bool
    center_columns: bool

    @classmethod
    def estimate_space(
        cls,
        params: dict,
        info: dict,
        interactions: Optional[Interactions] = None,
        **kwargs: Any,
    ) -> dict:
        interactions = cls._require_interactions_for_estimate(
            interactions, cls.__name__
        )
        X = interactions.get_sparse()
        n_users = info["n_users"]
        n_items = info["n_items"]

        train_matrix_mb = cls._sparse_size_mb(X)
        item_means_mb = cls._dense_size_mb((n_items,), np.float64)
        zero_mean_mb = (
            cls._dense_size_mb((n_users, n_items), np.float64)
            if params.get("center_columns")
            else 0.0
        )
        dense_item_float64_mb = cls._dense_size_mb((n_items, n_items), np.float64)
        dense_item_float32_mb = cls._dense_size_mb((n_items, n_items), np.float32)
        gamma_mb = cls._dense_size_mb((n_items,), np.float32)
        resident_mb = train_matrix_mb + item_means_mb

        build_gram_peak_mb = resident_mb + zero_mean_mb + dense_item_float64_mb
        diag_peak_mb = resident_mb + dense_item_float64_mb + 3 * dense_item_float64_mb
        inverse_peak_mb = (
            resident_mb + 3 * dense_item_float64_mb + dense_item_float32_mb
        )
        loop_peak_mb = resident_mb + 7 * dense_item_float32_mb + gamma_mb

        train_ram_mb = cls._peak_size_mb(
            build_gram_peak_mb,
            diag_peak_mb,
            inverse_peak_mb,
            loop_peak_mb,
        )
        return {
            "train_ram_mb": train_ram_mb,
            "notes": "ADMMSlim analytical train-space estimate",
        }

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, interactions, *args, seed=seed, **kwargs)

        X = self.train_matrix

        # Calculate the item means
        self.item_means = X.mean(axis=0).getA1()

        if self.center_columns:
            # Center the columns of the interactions
            # This is memory expensive, on large dataset it's better to
            # leave this parameter to false
            zero_mean_X = X.toarray() - self.item_means
            G = zero_mean_X.T @ zero_mean_X

            del zero_mean_X  # We remove zero_mean_X cause of high cost in memory
        else:
            G = (X.T @ X).toarray()

        # Pre-compute values for ADMMSlim algorithm
        diag = self.lambda_2 * np.diag(
            np.power(self.item_means, self.alpha)
        ) + self.rho * np.identity(self.n_items)
        P = np.linalg.inv(G + diag).astype(np.float32)
        B_aux = (P @ G).astype(np.float32)

        # Initialize
        Gamma = np.zeros_like(G, dtype=np.float32)
        C = np.zeros_like(G, dtype=np.float32)

        del diag, G  # We also remove G cause of high cost in memory

        # ADMM iterations
        for _ in range(self.it):
            B_tilde = B_aux + P @ (self.rho * C - Gamma)
            gamma = np.diag(B_tilde) / (np.diag(P) + 1e-7)
            B = B_tilde - P * gamma
            C = self._soft_threshold(B + Gamma / self.rho, self.lambda_1 / self.rho)
            if self.positive_only:
                C = np.maximum(C, 0)
            Gamma += self.rho * (B - C)

        # Update item_similarity
        self.item_similarity = C

    def _soft_threshold(self, x: np.ndarray, threshold: float) -> np.ndarray:
        return (np.abs(x) > threshold) * (np.abs(x) - threshold) * np.sign(x)

warprec.recommenders.collaborative_filtering_recommender.latent_factor.bpr.BPR

Bases: IterativeRecommender

Implementation of BPR algorithm from BPR: Bayesian Personalized Ranking from Implicit Feedback 2012

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size of user and item.

reg_weight float

The L2 regularization weight.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Source code in warprec/recommenders/collaborative_filtering_recommender/latent_factor/bpr.py
@model_registry.register(name="BPR")
class BPR(IterativeRecommender):
    """Implementation of BPR algorithm from
        BPR: Bayesian Personalized Ranking from Implicit Feedback 2012

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size of user and item.
        reg_weight (float): The L2 regularization weight.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Model hyperparameters
    embedding_size: int
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # Embeddings
        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # Init embedding weights
        self.apply(self._init_weights)
        self.bpr_loss = BPRLoss()
        self.reg_loss = EmbLoss()

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, pos_item, neg_item = batch

        # Compute BPR loss
        pos_item_score = self.forward(user, pos_item)
        neg_item_score = self.forward(user, neg_item)
        bpr_loss = self.bpr_loss(pos_item_score, neg_item_score)

        # Compute L2 regularization
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user),
            self.item_embedding(pos_item),
            self.item_embedding(neg_item),
        )

        # Loss logging
        loss = bpr_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(self, user: Tensor, item: Tensor) -> Tensor:
        """Forward pass of the BPR model.

        Args:
            user (Tensor): The tensor containing the user indexes.
            item (Tensor): The tensor containing the item indexes.

        Returns:
            Tensor: The predicted score for each pair of positive and negative items.
        """
        user_e = self.user_embedding(user)
        item_e = self.item_embedding(item)

        return torch.mul(user_e, item_e).sum(dim=1)

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Retrieve user embeddings
        user_embeddings = self.user_embedding(
            user_indices
        )  # [batch_size, embedding_size]

        if item_indices is None:
            # Case 'full': prediction on all items
            item_embeddings = self.item_embedding.weight[
                :-1, :
            ]  # [n_items, embedding_size]
            einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
        else:
            # Case 'sampled': prediction on a sampled set of items
            item_embeddings = self.item_embedding(
                item_indices
            )  # [batch_size, pad_seq, embedding_size]
            einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

        # Common prediction step
        predictions = torch.einsum(
            einsum_string, user_embeddings, item_embeddings
        )  # [batch_size, n_items] or [batch_size, pad_seq]
        return predictions

forward(user, item)

Forward pass of the BPR model.

Parameters:

Name Type Description Default
user Tensor

The tensor containing the user indexes.

required
item Tensor

The tensor containing the item indexes.

required

Returns:

Name Type Description
Tensor Tensor

The predicted score for each pair of positive and negative items.

Source code in warprec/recommenders/collaborative_filtering_recommender/latent_factor/bpr.py
def forward(self, user: Tensor, item: Tensor) -> Tensor:
    """Forward pass of the BPR model.

    Args:
        user (Tensor): The tensor containing the user indexes.
        item (Tensor): The tensor containing the item indexes.

    Returns:
        Tensor: The predicted score for each pair of positive and negative items.
    """
    user_e = self.user_embedding(user)
    item_e = self.item_embedding(item)

    return torch.mul(user_e, item_e).sum(dim=1)

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/latent_factor/bpr.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Retrieve user embeddings
    user_embeddings = self.user_embedding(
        user_indices
    )  # [batch_size, embedding_size]

    if item_indices is None:
        # Case 'full': prediction on all items
        item_embeddings = self.item_embedding.weight[
            :-1, :
        ]  # [n_items, embedding_size]
        einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
    else:
        # Case 'sampled': prediction on a sampled set of items
        item_embeddings = self.item_embedding(
            item_indices
        )  # [batch_size, pad_seq, embedding_size]
        einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

    # Common prediction step
    predictions = torch.einsum(
        einsum_string, user_embeddings, item_embeddings
    )  # [batch_size, n_items] or [batch_size, pad_seq]
    return predictions

warprec.recommenders.collaborative_filtering_recommender.latent_factor.fism.FISM

Bases: IterativeRecommender

Implementation of FISM model from FISM: Factored Item Similarity Models for Top-N Recommender Systems (KDD 2013).

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The number of factors for item feature embeddings.

alpha float

The alpha parameter, a value between 0 and 1, used in the similarity calculation.

split_to int

Parameter for splitting items into chunks during prediction (for memory management).

reg_weight float

The L2 regularization weight.

batch_size int

The size of the batches used during training.

epochs int

The number of training epochs.

learning_rate float

The learning rate for the optimizer.

Source code in warprec/recommenders/collaborative_filtering_recommender/latent_factor/fism.py
@model_registry.register(name="FISM")
class FISM(IterativeRecommender):
    """Implementation of FISM model from
    FISM: Factored Item Similarity Models for Top-N Recommender Systems (KDD 2013).

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The number of factors for item feature embeddings.
        alpha (float): The alpha parameter, a value between 0 and 1,
            used in the similarity calculation.
        split_to (int): Parameter for splitting items into chunks
            during prediction (for memory management).
        reg_weight (float): The L2 regularization weight.
        batch_size (int): The size of the batches used during training.
        epochs (int): The number of training epochs.
        learning_rate (float): The learning rate for the optimizer.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.HISTORY

    # Model specific parameters
    embedding_size: int
    alpha: float
    split_to: int
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # Embeddings and biases
        self.item_src_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )
        self.item_dst_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )
        self.user_bias = nn.Parameter(torch.zeros(self.n_users))
        self.item_bias = nn.Parameter(torch.zeros(self.n_items + 1))  # +1 for padding

        # Prepare history information
        history_matrix, history_lens, history_mask = interactions.get_history()

        # Use buffers to store non-trainable tensors
        self.register_buffer("history_matrix", history_matrix)
        self.register_buffer("history_lens", history_lens)
        self.register_buffer("history_mask", history_mask)

        # Handle groups
        self.group = torch.chunk(torch.arange(1, self.n_items + 1), self.split_to)

        # Init embedding weights
        self.apply(self._init_weights)
        self.bce_loss = nn.BCEWithLogitsLoss()
        self.reg_loss = EmbLoss()

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs,
    ):
        return interactions.get_pointwise_dataloader(
            neg_samples=0,
            batch_size=self.batch_size,
            **kwargs,
        )

    def training_step(self, batch: Any, batch_idx: int):
        user, item, rating = batch

        # Calculate BCE loss
        predictions = self(user, item)
        bce_loss = self.bce_loss(predictions, rating)

        # Calculate L2 regularization
        reg_loss = self.reg_weight * self.reg_loss(
            self.item_src_embedding(item),
            self.item_dst_embedding(item),
        )

        # Loss logging
        loss = bce_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(self, user: Tensor, item: Tensor) -> Tensor:
        """Forward pass for calculating scores for specific user-item pairs.

        Args:
            user (Tensor): User indices.
            item (Tensor): Item indices.

        Returns:
            Tensor: Predicted scores.
        """
        user_inter = self.history_matrix[user]  # type: ignore[index]
        item_num = self.history_lens[user].unsqueeze(1)  # type: ignore[index]
        batch_mask_mat = self.history_mask[user]  # type: ignore[index]

        user_history = self.item_src_embedding(
            user_inter
        )  # batch_size x max_len x embedding_size
        target = self.item_dst_embedding(item)  # batch_size x embedding_size

        user_bias = self.user_bias[user]  # batch_size
        item_bias = self.item_bias[item]  # batch_size

        # (batch_size, max_len, embedding_size) @ (batch_size, embedding_size, 1) -> (batch_size, max_len, 1)
        similarity = torch.bmm(user_history, target.unsqueeze(2)).squeeze(
            2
        )  # batch_size x max_len

        # Apply mask to similarity
        similarity = batch_mask_mat * similarity

        # coeff = N_u ^ (-alpha)
        # Add a small epsilon to item_num to prevent division by zero for users with no history
        coeff = torch.pow(item_num.squeeze(1).float() + 1e-6, -self.alpha)  # batch_size

        # Scores = coeff * sum(similarity) + user_bias + item_bias
        scores = coeff * torch.sum(similarity, dim=1) + user_bias + item_bias
        return scores

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Select data for current batch
        batch_history_matrix = self.history_matrix[user_indices]  # type: ignore[index]
        batch_history_lens = self.history_lens[user_indices]  # type: ignore[index]
        batch_history_mask = self.history_mask[user_indices]  # type: ignore[index]
        batch_user_bias = self.user_bias[user_indices]

        # Compute aggregated embedding for user in batch
        user_history_emb = self.item_src_embedding(
            batch_history_matrix
        )  # [batch_size, max_len, embedding_size]

        # Apply masking
        masked_user_history_emb = (
            user_history_emb * batch_history_mask.unsqueeze(2).float()
        )
        user_aggregated_emb = masked_user_history_emb.sum(
            dim=1
        )  # [batch_size, embedding_size]

        # Normalization coefficient (N_u ^ -alpha)
        coeff = torch.pow(batch_history_lens.float() + 1e-6, -self.alpha).unsqueeze(1)
        user_final_emb = user_aggregated_emb * coeff  # [batch_size, embedding_size]

        if item_indices is None:
            # Case 'full': prediction on all items
            item_dst_embeddings = self.item_dst_embedding.weight[
                :-1, :
            ]  # [n_items, embedding_size]
            item_biases = self.item_bias[:-1]  # [n_items]
            einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
        else:
            # Case 'sampled': prediction on a sampled set of items
            item_dst_embeddings = self.item_dst_embedding(
                item_indices
            )  # [batch_size, pad_seq, embedding_size]
            item_biases = self.item_bias[item_indices]  # [batch_size, pad_seq]
            einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

        # Compute prediction step
        predictions = torch.einsum(
            einsum_string, user_final_emb, item_dst_embeddings
        )  # [batch_size, n_items] or [batch_size, pad_seq]

        # Add the biases
        predictions += batch_user_bias.unsqueeze(1)
        predictions += item_biases
        return predictions

forward(user, item)

Forward pass for calculating scores for specific user-item pairs.

Parameters:

Name Type Description Default
user Tensor

User indices.

required
item Tensor

Item indices.

required

Returns:

Name Type Description
Tensor Tensor

Predicted scores.

Source code in warprec/recommenders/collaborative_filtering_recommender/latent_factor/fism.py
def forward(self, user: Tensor, item: Tensor) -> Tensor:
    """Forward pass for calculating scores for specific user-item pairs.

    Args:
        user (Tensor): User indices.
        item (Tensor): Item indices.

    Returns:
        Tensor: Predicted scores.
    """
    user_inter = self.history_matrix[user]  # type: ignore[index]
    item_num = self.history_lens[user].unsqueeze(1)  # type: ignore[index]
    batch_mask_mat = self.history_mask[user]  # type: ignore[index]

    user_history = self.item_src_embedding(
        user_inter
    )  # batch_size x max_len x embedding_size
    target = self.item_dst_embedding(item)  # batch_size x embedding_size

    user_bias = self.user_bias[user]  # batch_size
    item_bias = self.item_bias[item]  # batch_size

    # (batch_size, max_len, embedding_size) @ (batch_size, embedding_size, 1) -> (batch_size, max_len, 1)
    similarity = torch.bmm(user_history, target.unsqueeze(2)).squeeze(
        2
    )  # batch_size x max_len

    # Apply mask to similarity
    similarity = batch_mask_mat * similarity

    # coeff = N_u ^ (-alpha)
    # Add a small epsilon to item_num to prevent division by zero for users with no history
    coeff = torch.pow(item_num.squeeze(1).float() + 1e-6, -self.alpha)  # batch_size

    # Scores = coeff * sum(similarity) + user_bias + item_bias
    scores = coeff * torch.sum(similarity, dim=1) + user_bias + item_bias
    return scores

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/latent_factor/fism.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Select data for current batch
    batch_history_matrix = self.history_matrix[user_indices]  # type: ignore[index]
    batch_history_lens = self.history_lens[user_indices]  # type: ignore[index]
    batch_history_mask = self.history_mask[user_indices]  # type: ignore[index]
    batch_user_bias = self.user_bias[user_indices]

    # Compute aggregated embedding for user in batch
    user_history_emb = self.item_src_embedding(
        batch_history_matrix
    )  # [batch_size, max_len, embedding_size]

    # Apply masking
    masked_user_history_emb = (
        user_history_emb * batch_history_mask.unsqueeze(2).float()
    )
    user_aggregated_emb = masked_user_history_emb.sum(
        dim=1
    )  # [batch_size, embedding_size]

    # Normalization coefficient (N_u ^ -alpha)
    coeff = torch.pow(batch_history_lens.float() + 1e-6, -self.alpha).unsqueeze(1)
    user_final_emb = user_aggregated_emb * coeff  # [batch_size, embedding_size]

    if item_indices is None:
        # Case 'full': prediction on all items
        item_dst_embeddings = self.item_dst_embedding.weight[
            :-1, :
        ]  # [n_items, embedding_size]
        item_biases = self.item_bias[:-1]  # [n_items]
        einsum_string = "be,ie->bi"  # b: batch, e: embedding, i: item
    else:
        # Case 'sampled': prediction on a sampled set of items
        item_dst_embeddings = self.item_dst_embedding(
            item_indices
        )  # [batch_size, pad_seq, embedding_size]
        item_biases = self.item_bias[item_indices]  # [batch_size, pad_seq]
        einsum_string = "be,bse->bs"  # b: batch, e: embedding, s: sample

    # Compute prediction step
    predictions = torch.einsum(
        einsum_string, user_final_emb, item_dst_embeddings
    )  # [batch_size, n_items] or [batch_size, pad_seq]

    # Add the biases
    predictions += batch_user_bias.unsqueeze(1)
    predictions += item_biases
    return predictions

warprec.recommenders.collaborative_filtering_recommender.latent_factor.ials.iALS

Bases: Recommender

Implementation of iALS model from "Revisiting the Performance of iALS on Item Recommendation Benchmarks" in RecSys 2022.

The paper revisits iALS for top-n recommendation using a binary user-item matrix, an all-pairs unobserved loss weighted by alpha0, and a frequency-scaled L2 regularizer controlled by nu.

The original implementation of iALS is described in "Collaborative Filtering for Implicit Feedback Datasets" in ICDM 2008, and optimizes a different confidence-weighted objective. The 2022 paper shows that the original iALS performs poorly on standard benchmarks, and that the modified iALS objective is competitive with state-of-the-art methods.

Parameters:

Name Type Description Default
params dict

The dictionary with the model params.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
factors int

Latent factor dimensionality.

alpha0 float

Weight of the all-pairs unobserved loss term.

reg float

L2 regularization weight (lambda).

n_iterations int

Number of full ALS sweeps.

nu float

Frequency scaling exponent for the regularizer.

Source code in warprec/recommenders/collaborative_filtering_recommender/latent_factor/ials.py
@model_registry.register(name="iALS")
class iALS(Recommender):
    """Implementation of iALS model from
    "Revisiting the Performance of iALS on Item Recommendation Benchmarks" in RecSys 2022.

    The paper revisits iALS for top-n recommendation using a binary
    user-item matrix, an all-pairs unobserved loss weighted by ``alpha0``,
    and a frequency-scaled L2 regularizer controlled by ``nu``.

    The original implementation of iALS is described in
    "Collaborative Filtering for Implicit Feedback Datasets" in ICDM 2008, and
    optimizes a different confidence-weighted objective. The 2022 paper shows
    that the original iALS performs poorly on standard benchmarks, and that the
    modified iALS objective is competitive with state-of-the-art methods.

    Args:
        params (dict): The dictionary with the model params.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        factors (int): Latent factor dimensionality.
        alpha0 (float): Weight of the all-pairs unobserved loss term.
        reg (float): L2 regularization weight (lambda).
        n_iterations (int): Number of full ALS sweeps.
        nu (float): Frequency scaling exponent for the regularizer.
    """

    factors: int
    alpha0: float
    reg: float
    n_iterations: int
    nu: float

    @classmethod
    def estimate_space(
        cls,
        params: dict,
        info: dict,
        interactions: Optional[Interactions] = None,
        **kwargs: Any,
    ) -> dict:
        interactions = cls._require_interactions_for_estimate(
            interactions, cls.__name__
        )
        X = interactions.get_sparse()
        factors = params["factors"]

        n_users = info["n_users"]
        n_items = info["n_items"]

        train_matrix_mb = cls._csr_size_mb(X)
        observed_matrix_mb = cls._bytes_to_mb(
            X.data.size * np.dtype(np.float64).itemsize
            + X.indices.nbytes
            + X.indptr.nbytes
        )
        observed_transpose_mb = observed_matrix_mb

        user_factors_mb = cls._dense_size_mb((n_users, factors), np.float64)
        item_factors_mb = cls._dense_size_mb((n_items, factors), np.float64)
        updated_factors_mb = max(user_factors_mb, item_factors_mb)
        final_user_factors_mb = cls._dense_size_mb((n_users, factors), np.float32)
        final_item_factors_mb = cls._dense_size_mb((n_items, factors), np.float32)
        solve_workspace_mb = cls._dense_size_mb((4, factors, factors), np.float64)

        iteration_peak_mb = (
            train_matrix_mb
            + observed_matrix_mb
            + observed_transpose_mb
            + user_factors_mb
            + item_factors_mb
            + updated_factors_mb
            + solve_workspace_mb
        )
        final_buffers_peak_mb = (
            train_matrix_mb
            + observed_matrix_mb
            + user_factors_mb
            + item_factors_mb
            + final_user_factors_mb
            + final_item_factors_mb
        )
        train_ram_mb = cls._peak_size_mb(iteration_peak_mb, final_buffers_peak_mb)
        return {
            "train_ram_mb": train_ram_mb,
            "notes": "iALS analytical train-space estimate",
        }

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        X = interactions.get_sparse().tocsr()  # [n_users, n_items]
        n_users, n_items = X.shape

        # The paper defines S as a set of observed user-item pairs. Any
        # non-zero interaction is therefore treated as a binary positive.
        S = X.copy().astype(np.float64)
        S.data = np.ones_like(S.data, dtype=np.float64)

        # -- Initialise latent factors ----------------------------------------
        # Appendix A.2, Eq. 6: sigma = sigma_star / sqrt(d), with sigma_star=0.1
        rng = np.random.default_rng(seed)
        sigma = 0.1 / np.sqrt(self.factors)
        user_factors = rng.normal(0.0, sigma, size=(n_users, self.factors)).astype(
            np.float64
        )
        item_factors = rng.normal(0.0, sigma, size=(n_items, self.factors)).astype(
            np.float64
        )

        # -- ALS iterations ---------------------------------------------------
        # The paper optimizes the binary objective by alternating least squares.
        for _ in range(self.n_iterations):
            user_factors = self._als_step(
                item_factors, user_factors, S, self.reg, self.alpha0, self.nu
            )

            item_factors = self._als_step(
                user_factors,
                item_factors,
                S.T.tocsr(),
                self.reg,
                self.alpha0,
                self.nu,
            )

        # Store learned factors as buffers so checkpoints preserve the model.
        self.register_buffer("user_factors", torch.from_numpy(user_factors).float())
        self.register_buffer("item_factors", torch.from_numpy(item_factors).float())

    @staticmethod
    def _als_step(
        fixed_factors: np.ndarray,
        target_factors: np.ndarray,
        observed: csr_matrix,
        reg: float,
        alpha0: float,
        nu: float,
    ) -> np.ndarray:
        """Compute one half of an ALS sweep for the paper's loss.

        For one entity e with observed opposite-side embeddings F_e, the
        paper's objective yields the normal equations:

            (alpha0 * F^T F + F_e^T F_e + lambda * #e^nu * I) x_e = sum(F_e)

        where #e = |obs(e)| + alpha0 * |opposite side|.

        Args:
            fixed_factors (np.ndarray): Factor matrix held fixed this step [K, f].
            target_factors (np.ndarray): Factor matrix to update [M, f].
            observed (csr_matrix): Sparse binary interaction matrix [M, K].
            reg (float): L2 regularization weight.
            alpha0 (float): All-pairs unobserved weight.
            nu (float): Frequency scaling exponent for the regularizer.
        Returns:
            np.ndarray: Updated target factor matrix [M, f].
        """
        n_entities = target_factors.shape[0]
        n_opposite_entities = fixed_factors.shape[0]
        f = fixed_factors.shape[1]

        # Shared all-pairs term from L_I in Eq. 4.
        FtF = alpha0 * (fixed_factors.T @ fixed_factors)  # [f, f]
        reg_I = np.eye(f, dtype=np.float64)

        updated = np.empty_like(target_factors)

        for u in range(n_entities):
            row = observed.getrow(u)
            indices = row.indices
            n_interactions = len(indices)

            if n_interactions == 0:
                # With no observed positives, the RHS is zero and the solution
                # is the all-zero vector.
                updated[u] = 0.0
                continue

            F_u = fixed_factors[indices]  # [n_u, f]

            freq_weight = n_interactions + alpha0 * n_opposite_entities
            reg_multiplier = freq_weight**nu
            A = FtF + (F_u.T @ F_u) + (reg * reg_multiplier) * reg_I

            # The observed term L_S contributes one copy of each positive
            # embedding to the RHS.
            rhs = F_u.sum(axis=0)  # [f]

            updated[u] = np.linalg.solve(A, rhs)

        return updated

    @no_type_check
    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Compute predicted preference scores.

        The paper uses the standard matrix-factorization score
        p_hat_ui = x_u^T y_i.

        Args:
            user_indices (Tensor): Batch of user indices [batch_size].
            *args (Any): Additional positional arguments.
            item_indices (Optional[Tensor]): Optional item indices [batch_size, k] for sampled
                evaluation.  If ``None``, scores for all items are returned.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            Tensor: Score tensor [batch_size, n_items] or [batch_size, k].
        """
        user_indices = user_indices.to(self.user_factors.device)
        X_u = self.user_factors[user_indices]  # [batch_size, factors]

        if item_indices is None:
            return X_u @ self.item_factors.T  # [batch_size, n_items]

        item_indices = item_indices.to(self.item_factors.device).clamp(
            max=self.n_items - 1
        )
        selected_item_factors = self.item_factors[
            item_indices
        ]  # [batch_size, k, factors]
        return (X_u.unsqueeze(1) * selected_item_factors).sum(dim=-1)  # [batch_size, k]

predict(user_indices, *args, item_indices=None, **kwargs)

Compute predicted preference scores.

The paper uses the standard matrix-factorization score p_hat_ui = x_u^T y_i.

Parameters:

Name Type Description Default
user_indices Tensor

Batch of user indices [batch_size].

required
*args Any

Additional positional arguments.

()
item_indices Optional[Tensor]

Optional item indices [batch_size, k] for sampled evaluation. If None, scores for all items are returned.

None
**kwargs Any

Additional keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

Score tensor [batch_size, n_items] or [batch_size, k].

Source code in warprec/recommenders/collaborative_filtering_recommender/latent_factor/ials.py
@no_type_check
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Compute predicted preference scores.

    The paper uses the standard matrix-factorization score
    p_hat_ui = x_u^T y_i.

    Args:
        user_indices (Tensor): Batch of user indices [batch_size].
        *args (Any): Additional positional arguments.
        item_indices (Optional[Tensor]): Optional item indices [batch_size, k] for sampled
            evaluation.  If ``None``, scores for all items are returned.
        **kwargs (Any): Additional keyword arguments.

    Returns:
        Tensor: Score tensor [batch_size, n_items] or [batch_size, k].
    """
    user_indices = user_indices.to(self.user_factors.device)
    X_u = self.user_factors[user_indices]  # [batch_size, factors]

    if item_indices is None:
        return X_u @ self.item_factors.T  # [batch_size, n_items]

    item_indices = item_indices.to(self.item_factors.device).clamp(
        max=self.n_items - 1
    )
    selected_item_factors = self.item_factors[
        item_indices
    ]  # [batch_size, k, factors]
    return (X_u.unsqueeze(1) * selected_item_factors).sum(dim=-1)  # [batch_size, k]

warprec.recommenders.collaborative_filtering_recommender.latent_factor.ials2008.iALS2008

Bases: Recommender

Implementation of iALS model from "Collaborative Filtering for Implicit Feedback Datasets" in ICDM 2008.

Decomposes the user-item implicit feedback matrix into user-factor and item-factor matrices via confidence-weighted alternating least squares.

The model treats raw observations r_ui as indicators of preference (binary) and confidence (monotonic in r_ui), then minimizes a weighted squared-error objective with L2 regularization (Eq. 3).

Closed-form factor updates (Eq. 4 & 5) are computed in init, exploiting the sparsity trick Y^T C^u Y = Y^T Y + Y^T (C^u - I) Y to achieve O(f^2 N + f^3 m) per sweep.

Parameters:

Name Type Description Default
params dict

The dictionary with the model params.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
factors int

Latent factor dimensionality.

alpha float

Confidence scaling constant.

reg float

L2 regularization weight (lambda).

n_iterations int

Number of full ALS sweeps.

confidence_type str

"linear" or "log".

epsilon float

Epsilon for the log confidence variant.

Source code in warprec/recommenders/collaborative_filtering_recommender/latent_factor/ials2008.py
@model_registry.register(name="iALS2008")
class iALS2008(Recommender):
    """Implementation of iALS model from
    "Collaborative Filtering for Implicit Feedback Datasets" in ICDM 2008.

    Decomposes the user-item implicit feedback matrix into user-factor and
    item-factor matrices via confidence-weighted alternating least squares.

    The model treats raw observations r_ui as indicators of *preference*
    (binary) and *confidence* (monotonic in r_ui), then minimizes a
    weighted squared-error objective with L2 regularization (Eq. 3).

    Closed-form factor updates (Eq. 4 & 5) are computed in __init__,
    exploiting the sparsity trick Y^T C^u Y = Y^T Y + Y^T (C^u - I) Y
    to achieve O(f^2 N + f^3 m) per sweep.

    Args:
        params (dict): The dictionary with the model params.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        factors (int): Latent factor dimensionality.
        alpha (float): Confidence scaling constant.
        reg (float): L2 regularization weight (lambda).
        n_iterations (int): Number of full ALS sweeps.
        confidence_type (str): "linear" or "log".
        epsilon (float): Epsilon for the log confidence variant.
    """

    factors: int
    alpha: float
    reg: float
    n_iterations: int
    confidence_type: str
    epsilon: float

    @classmethod
    def estimate_space(
        cls,
        params: dict,
        info: dict,
        interactions: Optional[Interactions] = None,
        **kwargs: Any,
    ) -> dict:
        interactions = cls._require_interactions_for_estimate(
            interactions, cls.__name__
        )
        X = interactions.get_sparse()
        factors = params["factors"]

        n_users = info["n_users"]
        n_items = info["n_items"]

        train_matrix_mb = cls._csr_size_mb(X)
        confidence_matrix_mb = cls._bytes_to_mb(
            X.data.size * np.dtype(np.float64).itemsize
            + X.indices.nbytes
            + X.indptr.nbytes
        )
        preference_matrix_mb = confidence_matrix_mb
        confidence_transpose_mb = confidence_matrix_mb
        preference_transpose_mb = preference_matrix_mb

        user_factors_mb = cls._dense_size_mb((n_users, factors), np.float64)
        item_factors_mb = cls._dense_size_mb((n_items, factors), np.float64)
        updated_factors_mb = max(user_factors_mb, item_factors_mb)
        final_user_factors_mb = cls._dense_size_mb((n_users, factors), np.float32)
        final_item_factors_mb = cls._dense_size_mb((n_items, factors), np.float32)
        solve_workspace_mb = cls._dense_size_mb((4, factors, factors), np.float64)

        iteration_peak_mb = (
            train_matrix_mb
            + confidence_matrix_mb
            + preference_matrix_mb
            + confidence_transpose_mb
            + preference_transpose_mb
            + user_factors_mb
            + item_factors_mb
            + updated_factors_mb
            + solve_workspace_mb
        )
        final_buffers_peak_mb = (
            train_matrix_mb
            + confidence_matrix_mb
            + preference_matrix_mb
            + user_factors_mb
            + item_factors_mb
            + final_user_factors_mb
            + final_item_factors_mb
        )
        train_ram_mb = cls._peak_size_mb(iteration_peak_mb, final_buffers_peak_mb)
        return {
            "train_ram_mb": train_ram_mb,
            "notes": "iALS2008 analytical train-space estimate",
        }

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        X = interactions.get_sparse()  # [n_users, n_items]
        n_users, n_items = X.shape

        # -- Compute confidence matrix entries (only non-zero stored) ---------
        # Sec. 4: c_ui = 1 + alpha * r_ui  (linear)
        # Eq. 6:  c_ui = 1 + alpha * log(1 + r_ui / epsilon)  (log)
        if self.confidence_type == "log":
            # Eq. 6 — log confidence variant (used in the paper's experiments)
            C_minus_I = X.copy()
            C_minus_I.data = self.alpha * np.log(1.0 + C_minus_I.data / self.epsilon)
        else:
            # Sec. 4 — linear confidence (paper's primary formulation)
            C_minus_I = X.copy()
            C_minus_I.data = self.alpha * C_minus_I.data

        # -- Preference matrix (binary) --------------------------------------
        # Sec. 4: p_ui = 1 if r_ui > 0, else 0
        P = X.copy()
        P.data = np.ones_like(P.data, dtype=np.float64)

        # -- Initialise latent factors ----------------------------------------
        # ASSUMPTION: Small random normal initialization — the paper does not
        # specify an initialization strategy.  This is standard practice for
        # ALS-based matrix factorization.
        rng = np.random.default_rng(seed)
        user_factors = rng.normal(0.0, 0.01, size=(n_users, self.factors)).astype(
            np.float64
        )
        item_factors = rng.normal(0.0, 0.01, size=(n_items, self.factors)).astype(
            np.float64
        )

        # -- ALS iterations ---------------------------------------------------
        # Sec. 4: "We employ a few sweeps of paired recomputation of user-
        # and item-factors, till they stabilize.  A typical number of
        # sweeps is 10."
        for _ in range(self.n_iterations):
            # --- Update user factors (Eq. 4) ---------------------------------
            # x_u = (Y^T C^u Y + lambda I)^{-1} Y^T C^u p(u)
            # Sparsity trick: Y^T C^u Y = Y^T Y + Y^T (C^u - I) Y
            user_factors = self._als_step(
                item_factors, user_factors, C_minus_I, P, self.reg
            )

            # --- Update item factors (Eq. 5) ---------------------------------
            # y_i = (X^T C^i X + lambda I)^{-1} X^T C^i p(i)
            # Transpose so items are rows
            item_factors = self._als_step(
                user_factors, item_factors, C_minus_I.T.tocsr(), P.T.tocsr(), self.reg
            )

        # Store learned factors as buffers so checkpoints preserve the model.
        self.register_buffer("user_factors", torch.from_numpy(user_factors).float())
        self.register_buffer("item_factors", torch.from_numpy(item_factors).float())

    @staticmethod
    def _als_step(
        fixed_factors: np.ndarray,
        target_factors: np.ndarray,
        C_minus_I: csr_matrix,
        P: csr_matrix,
        reg: float,
    ) -> np.ndarray:
        """Compute one half of an ALS sweep (Eq. 4 / Eq. 5).

        For each row *u* of the target factor matrix, solve:
            target_u = (F^T C^u F + lambda I)^{-1} F^T C^u p(u)

        where F is ``fixed_factors``, exploiting the decomposition:
            F^T C^u F = F^T F + F^T (C^u - I) F

        Running time per target entity: O(f^2 n_u + f^3) where n_u is
        the number of non-zero entries for that entity.

        Args:
            fixed_factors (np.ndarray): Factor matrix held fixed this step [K, f].
            target_factors (np.ndarray): Factor matrix to update [M, f].
            C_minus_I (csr_matrix): Sparse matrix of (c_ui - 1) values [M, K].
            P (csr_matrix): Sparse binary preference matrix [M, K].
            reg (float): L2 regularization weight.
        Returns:
            np.ndarray: Updated target factor matrix [M, f].
        """
        n_entities = target_factors.shape[0]
        f = fixed_factors.shape[1]

        # Precompute F^T F — O(f^2 K), shared across all entities
        # Sec. 4: "Y^T Y is independent of u and was already precomputed."
        FtF = fixed_factors.T @ fixed_factors  # [f, f]
        reg_I = reg * np.eye(f, dtype=np.float64)

        updated = np.empty_like(target_factors)

        for u in range(n_entities):
            # Indices and values of non-zero entries for entity u
            # These correspond to items with r_ui > 0
            row = C_minus_I.getrow(u)
            indices = row.indices
            c_minus_1 = row.data  # c_ui - 1 for non-zero entries

            if len(indices) == 0:
                # No interactions — factor determined purely by regularization
                # ASSUMPTION: For entities with no interactions, factors are
                # set to zero (regularization dominates).
                updated[u] = 0.0
                continue

            # F_u = fixed_factors[indices]  — the factor rows for observed items
            F_u = fixed_factors[indices]  # [n_u, f]

            # Sec. 4 sparsity trick:
            # F^T C^u F = F^T F + F^T (C^u - I) F
            # F^T (C^u - I) F = F_u^T diag(c_ui - 1) F_u
            FtCuF = FtF + (F_u.T * c_minus_1) @ F_u  # [f, f]

            # A = F^T C^u F + lambda I
            A = FtCuF + reg_I  # [f, f]

            # Sec. 4: F^T C^u p(u)
            # C^u p(u) has only n_u non-zero entries: c_ui * p_ui = c_ui
            # (since p_ui = 1 for observed, 0 otherwise)
            # c_ui = (c_ui - 1) + 1 = c_minus_1 + 1
            p_u = P.getrow(u).toarray().ravel()[indices]  # p_ui values (all 1)
            c_u = c_minus_1 + 1.0  # full c_ui for observed entries
            rhs = F_u.T @ (c_u * p_u)  # [f]

            # Eq. 4: x_u = A^{-1} rhs
            updated[u] = np.linalg.solve(A, rhs)

        return updated

    @no_type_check
    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Compute predicted preference scores.

        Sec. 5: p_hat_ui = x_u^T y_i

        Args:
            user_indices (Tensor): Batch of user indices [batch_size].
            *args (Any): Additional positional arguments.
            item_indices (Optional[Tensor]): Optional item indices [batch_size, k] for sampled
                evaluation.  If ``None``, scores for all items are returned.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            Tensor: Score tensor [batch_size, n_items] or [batch_size, k].
        """
        user_indices = user_indices.to(self.user_factors.device)
        X_u = self.user_factors[user_indices]  # [batch_size, factors]

        if item_indices is None:
            return X_u @ self.item_factors.T  # [batch_size, n_items]

        item_indices = item_indices.to(self.item_factors.device).clamp(
            max=self.n_items - 1
        )
        selected_item_factors = self.item_factors[
            item_indices
        ]  # [batch_size, k, factors]
        return (X_u.unsqueeze(1) * selected_item_factors).sum(dim=-1)  # [batch_size, k]

predict(user_indices, *args, item_indices=None, **kwargs)

Compute predicted preference scores.

Sec. 5: p_hat_ui = x_u^T y_i

Parameters:

Name Type Description Default
user_indices Tensor

Batch of user indices [batch_size].

required
*args Any

Additional positional arguments.

()
item_indices Optional[Tensor]

Optional item indices [batch_size, k] for sampled evaluation. If None, scores for all items are returned.

None
**kwargs Any

Additional keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

Score tensor [batch_size, n_items] or [batch_size, k].

Source code in warprec/recommenders/collaborative_filtering_recommender/latent_factor/ials2008.py
@no_type_check
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Compute predicted preference scores.

    Sec. 5: p_hat_ui = x_u^T y_i

    Args:
        user_indices (Tensor): Batch of user indices [batch_size].
        *args (Any): Additional positional arguments.
        item_indices (Optional[Tensor]): Optional item indices [batch_size, k] for sampled
            evaluation.  If ``None``, scores for all items are returned.
        **kwargs (Any): Additional keyword arguments.

    Returns:
        Tensor: Score tensor [batch_size, n_items] or [batch_size, k].
    """
    user_indices = user_indices.to(self.user_factors.device)
    X_u = self.user_factors[user_indices]  # [batch_size, factors]

    if item_indices is None:
        return X_u @ self.item_factors.T  # [batch_size, n_items]

    item_indices = item_indices.to(self.item_factors.device).clamp(
        max=self.n_items - 1
    )
    selected_item_factors = self.item_factors[
        item_indices
    ]  # [batch_size, k, factors]
    return (X_u.unsqueeze(1) * selected_item_factors).sum(dim=-1)  # [batch_size, k]

warprec.recommenders.collaborative_filtering_recommender.latent_factor.macrmf.MACRMF

Bases: IterativeRecommender

Implementation of MACR model from Model-Agnostic Counterfactual Reasoning for Eliminating Popularity Bias in Recommender System (KDD 2021).

  • Item module Y_i(I): captures the direct effect of item popularity on the ranking score via path I → Y.
  • User module Y_u(U): captures user conformity (tendency to interact regardless of item-user match) via path U → Y.

During training the three branches are fused multiplicatively (Eq. 7) and supervised jointly with a multi-task BCE loss (Eq. 8).

During inference the counterfactual score (Eq. 9) subtracts the natural direct effect of item popularity, leaving only the total indirect effect through user-item matching (TIE = TE - NDE, Section 3.4).

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

Dataset information dictionary (must contain n_users, n_items).

required
*args Any

Variable length argument list.

()
seed int

Random seed for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

Pointwise (user, item, rating) loader with negative sampling.

embedding_size int

Dimension of user and item embeddings.

alpha float

Weight for item-module auxiliary loss L_I (Eq. 8).

beta float

Weight for user-module auxiliary loss L_U (Eq. 8).

c float

Reference matching score for counterfactual inference (Eq. 9).

reg_weight float

L2 regularization coefficient.

batch_size int

Training batch size.

neg_samples int

Number of negative samples per positive interaction.

epochs int

Number of training epochs.

learning_rate float

Adam learning rate.

Source code in warprec/recommenders/collaborative_filtering_recommender/latent_factor/macrmf.py
@model_registry.register(name="MACRMF")
class MACRMF(IterativeRecommender):
    """Implementation of MACR model
    from Model-Agnostic Counterfactual Reasoning for Eliminating Popularity Bias in Recommender System (KDD 2021).

    - **Item module** Y_i(I): captures the direct effect of item popularity
      on the ranking score via path I → Y.
    - **User module** Y_u(U): captures user conformity (tendency to interact
      regardless of item-user match) via path U → Y.

    During **training** the three branches are fused multiplicatively (Eq. 7)
    and supervised jointly with a multi-task BCE loss (Eq. 8).

    During **inference** the counterfactual score (Eq. 9) subtracts the
    natural direct effect of item popularity, leaving only the total indirect
    effect through user-item matching (TIE = TE - NDE, Section 3.4).

    Args:
        params (dict): Model parameters.
        info (dict): Dataset information dictionary (must contain n_users, n_items).
        *args (Any): Variable length argument list.
        seed (int): Random seed for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: Pointwise (user, item, rating) loader with negative sampling.
        embedding_size (int): Dimension of user and item embeddings.
        alpha (float): Weight for item-module auxiliary loss L_I (Eq. 8).
        beta (float): Weight for user-module auxiliary loss L_U (Eq. 8).
        c (float): Reference matching score for counterfactual inference (Eq. 9).
        reg_weight (float): L2 regularization coefficient.
        batch_size (int): Training batch size.
        neg_samples (int): Number of negative samples per positive interaction.
        epochs (int): Number of training epochs.
        learning_rate (float): Adam learning rate.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.ITEM_RATING_LOADER

    # Model hyperparameters
    embedding_size: int
    alpha: float
    beta: float
    c: float
    reg_weight: float
    batch_size: int
    neg_samples: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ) -> None:
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # ------------------------------------------------------------------
        # Main branch — MF backbone (path U & I → K → Y)
        # "User-item matching: ŷ_k = Y_k(K(U=u, I=i))" — Section 3.3
        # ------------------------------------------------------------------
        # ASSUMPTION: dot-product inner product as matching function K(U, I),
        # consistent with standard MF and the paper's description of MF as
        # the backbone (Section 3.2, Appendix B).
        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )

        # ------------------------------------------------------------------
        # Item module — captures I → Y (item popularity direct effect)
        # "Item module: ŷ_i = Y_i(I=i)" — Section 3.3, Figure 5
        # SIMPLIFICATION: single linear projection from embedding to scalar,
        # rather than a deeper MLP. Paper states "can be implemented as MLPs"
        # but uses the simplest form; ablation (Table 3) shows even this
        # form yields substantial gains.
        # ------------------------------------------------------------------
        self.item_module = nn.Linear(self.embedding_size, 1)

        # ------------------------------------------------------------------
        # User module — captures U → Y (user conformity direct effect)
        # "User module: ŷ_u = Y_u(U=u)" — Section 3.3, Figure 5
        # SIMPLIFICATION: same single linear projection rationale as above.
        # ------------------------------------------------------------------
        self.user_module = nn.Linear(self.embedding_size, 1)

        self.apply(self._init_weights)
        self.reg_loss = EmbLoss()

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_pointwise_dataloader(
            neg_samples=self.neg_samples,
            batch_size=self.batch_size,
            **kwargs,
        )

    def forward(self, user: Tensor, item: Tensor) -> tuple[Tensor, Tensor, Tensor]:
        """Compute per-branch scores and the fused training prediction.

        Implements the three-branch fusion:
            ŷ_ui = ŷ_k * σ(ŷ_i) * σ(ŷ_u)   — Eq. 7, Section 3.3

        Args:
            user (Tensor): User index tensor [batch_size].
            item (Tensor): Item index tensor [batch_size].

        Returns:
            tuple[Tensor, Tensor, Tensor]:
                y_hat_k: Raw MF dot-product matching score [batch_size].
                y_hat_i: Raw item-module score [batch_size].
                y_hat_u: Raw user-module score [batch_size].
        """
        u_emb = self.user_embedding(user)  # [B, D]
        i_emb = self.item_embedding(item)  # [B, D]

        # Main branch: dot-product matching score (MF)
        # Section 3.2: "MF implements these functions as an element-wise
        # product … and a summation across embedding dimensions."
        y_hat_k = torch.mul(u_emb, i_emb).sum(dim=1)  # [B]  — Eq. matching score

        # Item module: popularity proxy score
        # Eq. in Section 3.3: ŷ_i = Y_i(I=i)
        y_hat_i = self.item_module(i_emb).squeeze(-1)  # [B]

        # User module: conformity proxy score
        # Eq. in Section 3.3: ŷ_u = Y_u(U=u)
        y_hat_u = self.user_module(u_emb).squeeze(-1)  # [B]

        return y_hat_k, y_hat_i, y_hat_u

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        """Single gradient step computing the MACR multi-task loss.

        Loss function (Eq. 8, Section 3.3):
            L = L_O + α * L_I + β * L_U

        where L_O, L_I, L_U are all binary cross-entropy losses.

        L_O supervises the fused prediction ŷ_ui = ŷ_k * σ(ŷ_i) * σ(ŷ_u).
        L_I supervises the item-module output σ(ŷ_i) alone.
        L_U supervises the user-module output σ(ŷ_u) alone.

        Args:
            batch (Any): Tuple of (user, item, rating) tensors.
            batch_idx (int): Batch index (unused).

        Returns:
            Tensor: The total scalar loss.
        """
        user, item, rating = batch  # rating ∈ {0.0, 1.0}
        rating = rating.float()

        y_hat_k, y_hat_i, y_hat_u = self.forward(user, item)

        # ------------------------------------------------------------------
        # Fused prediction for main task: ŷ_ui = ŷ_k * σ(ŷ_i) * σ(ŷ_u)
        # Eq. 7 — the overall ranking score used for L_O
        # ASSUMPTION: BCEWithLogitsLoss cannot be applied directly to ŷ_ui
        # because ŷ_ui is already a product of probabilities (not a raw logit).
        # We therefore apply BCE(σ(ŷ_ui_logit), y) as an approximation, but
        # the paper uses plain BCE on the product:
        #   L_O = Σ -y*log(ŷ_ui) - (1-y)*log(1-ŷ_ui)   — Eq. 6
        # We compute ŷ_ui in [0,1] space and apply binary_cross_entropy.
        # ------------------------------------------------------------------
        sig_i = torch.sigmoid(y_hat_i)  # σ(ŷ_i) ∈ (0, 1)
        sig_u = torch.sigmoid(y_hat_u)  # σ(ŷ_u) ∈ (0, 1)

        # Fused prediction — Eq. 7: ŷ_ui = ŷ_k * σ(ŷ_i) * σ(ŷ_u)
        # ASSUMPTION: ŷ_k (raw dot product) is used directly as the matching
        # score without further sigmoid, consistent with the paper's description
        # and the fact that the auxiliary losses on σ(ŷ_i) and σ(ŷ_u) already
        # bound the probability-like fused output implicitly.
        y_hat_ui = y_hat_k * sig_i * sig_u  # Eq. 7 — fused score [B]

        # Main recommendation loss L_O — Eq. 6 (BCE over fused prediction)
        loss_o = F.binary_cross_entropy_with_logits(y_hat_ui, rating)

        # Item auxiliary loss L_I — Eq. 8 bottom: BCE over σ(ŷ_i)
        # "L_I = Σ -y*log(σ(ŷ_i)) - (1-y)*log(1-σ(ŷ_i))"  — Section 3.3
        loss_i = F.binary_cross_entropy_with_logits(y_hat_i, rating)

        # User auxiliary loss L_U — Eq. 8 top: BCE over σ(ŷ_u)
        # "L_U = Σ -y*log(σ(ŷ_u)) - (1-y)*log(1-σ(ŷ_u))"  — Section 3.3
        loss_u = F.binary_cross_entropy_with_logits(y_hat_u, rating)

        # Combined multi-task loss — Eq. 8: L = L_O + α*L_I + β*L_U
        loss_main = loss_o + self.alpha * loss_i + self.beta * loss_u

        # L2 regularization on embeddings — Appendix B (coefficient 1e-5 default)
        loss_reg = self.reg_weight * self.reg_loss(
            self.user_embedding(user),
            self.item_embedding(item),
        )

        loss = loss_main + loss_reg
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        self.log("loss_o", loss_o, prog_bar=False, on_step=False, on_epoch=True)
        self.log("loss_i", loss_i, prog_bar=False, on_step=False, on_epoch=True)
        self.log("loss_u", loss_u, prog_bar=False, on_step=False, on_epoch=True)
        return loss

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Counterfactual debiased prediction (TIE-based ranking).

        Implements Eq. 9 from Section 3.3:
            ŷ_cf = ŷ_k * σ(ŷ_i) * σ(ŷ_u) − c * σ(ŷ_i) * σ(ŷ_u)

        This is the Total Indirect Effect (TIE = TE − NDE) which removes the
        Natural Direct Effect of item popularity from the ranking score,
        leaving only the user–item matching contribution (Section 3.4).

        The hyperparameter `c` represents the reference status of ŷ_k when
        user–item matching is blocked (i.e. K = K_{u*,i*}), typically the
        mean matching score at reference embeddings (Section 3.4, Eq. 10).

        Args:
            user_indices (Tensor): Batch of user indices [batch_size].
            *args (Any): Unused positional arguments.
            item_indices (Optional[Tensor]): Item indices for sampled evaluation
                [batch_size, pad_seq]. If None, full ranking over all items.
            **kwargs (Any): Unused keyword arguments.

        Returns:
            Tensor: Debiased ranking scores [batch_size, n_items] or
                [batch_size, pad_seq].
        """
        u_emb = self.user_embedding(user_indices)  # [B, D]

        # User-module score for the batch
        y_hat_u = self.user_module(u_emb).squeeze(-1)  # [B]
        sig_u = torch.sigmoid(y_hat_u)  # [B]

        if item_indices is None:
            # Full ranking — score against all n_items
            i_emb = self.item_embedding.weight[:-1, :]  # [n_items, D]

            # MF dot-product: ŷ_k [B, n_items]
            y_hat_k = torch.einsum("be,ie->bi", u_emb, i_emb)

            # Item popularity scores [n_items]
            y_hat_i_all = self.item_module(i_emb).squeeze(-1)  # [n_items]
            sig_i = torch.sigmoid(y_hat_i_all).unsqueeze(0)  # [1, n_items]
            sig_u_2d = sig_u.unsqueeze(1)  # [B, 1]

        else:
            # Sampled ranking — score against a provided subset of items
            i_emb = self.item_embedding(item_indices)  # [B, S, D]

            # MF dot-product: ŷ_k [B, S]
            y_hat_k = torch.einsum("be,bse->bs", u_emb, i_emb)

            # Item popularity scores [B, S]
            y_hat_i_all = self.item_module(i_emb).squeeze(-1)  # [B, S]
            sig_i = torch.sigmoid(y_hat_i_all)  # [B, S]
            sig_u_2d = sig_u.unsqueeze(1)  # [B, 1]

        # Counterfactual score — Eq. 9:
        #   ŷ_cf = ŷ_k * σ(ŷ_i) * σ(ŷ_u) − c * σ(ŷ_i) * σ(ŷ_u)
        #        = (ŷ_k − c) * σ(ŷ_i) * σ(ŷ_u)
        # This equals TIE = TE − NDE (Section 3.4, Eq. 10).
        scores = (y_hat_k - self.c) * sig_i * sig_u_2d  # [B, n_items] or [B, S]
        return scores

forward(user, item)

Compute per-branch scores and the fused training prediction.

Implements the three-branch fusion

ŷ_ui = ŷ_k * σ(ŷ_i) * σ(ŷ_u) — Eq. 7, Section 3.3

Parameters:

Name Type Description Default
user Tensor

User index tensor [batch_size].

required
item Tensor

Item index tensor [batch_size].

required

Returns:

Type Description
tuple[Tensor, Tensor, Tensor]

tuple[Tensor, Tensor, Tensor]: y_hat_k: Raw MF dot-product matching score [batch_size]. y_hat_i: Raw item-module score [batch_size]. y_hat_u: Raw user-module score [batch_size].

Source code in warprec/recommenders/collaborative_filtering_recommender/latent_factor/macrmf.py
def forward(self, user: Tensor, item: Tensor) -> tuple[Tensor, Tensor, Tensor]:
    """Compute per-branch scores and the fused training prediction.

    Implements the three-branch fusion:
        ŷ_ui = ŷ_k * σ(ŷ_i) * σ(ŷ_u)   — Eq. 7, Section 3.3

    Args:
        user (Tensor): User index tensor [batch_size].
        item (Tensor): Item index tensor [batch_size].

    Returns:
        tuple[Tensor, Tensor, Tensor]:
            y_hat_k: Raw MF dot-product matching score [batch_size].
            y_hat_i: Raw item-module score [batch_size].
            y_hat_u: Raw user-module score [batch_size].
    """
    u_emb = self.user_embedding(user)  # [B, D]
    i_emb = self.item_embedding(item)  # [B, D]

    # Main branch: dot-product matching score (MF)
    # Section 3.2: "MF implements these functions as an element-wise
    # product … and a summation across embedding dimensions."
    y_hat_k = torch.mul(u_emb, i_emb).sum(dim=1)  # [B]  — Eq. matching score

    # Item module: popularity proxy score
    # Eq. in Section 3.3: ŷ_i = Y_i(I=i)
    y_hat_i = self.item_module(i_emb).squeeze(-1)  # [B]

    # User module: conformity proxy score
    # Eq. in Section 3.3: ŷ_u = Y_u(U=u)
    y_hat_u = self.user_module(u_emb).squeeze(-1)  # [B]

    return y_hat_k, y_hat_i, y_hat_u

predict(user_indices, *args, item_indices=None, **kwargs)

Counterfactual debiased prediction (TIE-based ranking).

Implements Eq. 9 from Section 3.3: ŷ_cf = ŷ_k * σ(ŷ_i) * σ(ŷ_u) − c * σ(ŷ_i) * σ(ŷ_u)

This is the Total Indirect Effect (TIE = TE − NDE) which removes the Natural Direct Effect of item popularity from the ranking score, leaving only the user–item matching contribution (Section 3.4).

The hyperparameter c represents the reference status of ŷ_k when user–item matching is blocked (i.e. K = K_{u,i}), typically the mean matching score at reference embeddings (Section 3.4, Eq. 10).

Parameters:

Name Type Description Default
user_indices Tensor

Batch of user indices [batch_size].

required
*args Any

Unused positional arguments.

()
item_indices Optional[Tensor]

Item indices for sampled evaluation [batch_size, pad_seq]. If None, full ranking over all items.

None
**kwargs Any

Unused keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

Debiased ranking scores [batch_size, n_items] or [batch_size, pad_seq].

Source code in warprec/recommenders/collaborative_filtering_recommender/latent_factor/macrmf.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Counterfactual debiased prediction (TIE-based ranking).

    Implements Eq. 9 from Section 3.3:
        ŷ_cf = ŷ_k * σ(ŷ_i) * σ(ŷ_u) − c * σ(ŷ_i) * σ(ŷ_u)

    This is the Total Indirect Effect (TIE = TE − NDE) which removes the
    Natural Direct Effect of item popularity from the ranking score,
    leaving only the user–item matching contribution (Section 3.4).

    The hyperparameter `c` represents the reference status of ŷ_k when
    user–item matching is blocked (i.e. K = K_{u*,i*}), typically the
    mean matching score at reference embeddings (Section 3.4, Eq. 10).

    Args:
        user_indices (Tensor): Batch of user indices [batch_size].
        *args (Any): Unused positional arguments.
        item_indices (Optional[Tensor]): Item indices for sampled evaluation
            [batch_size, pad_seq]. If None, full ranking over all items.
        **kwargs (Any): Unused keyword arguments.

    Returns:
        Tensor: Debiased ranking scores [batch_size, n_items] or
            [batch_size, pad_seq].
    """
    u_emb = self.user_embedding(user_indices)  # [B, D]

    # User-module score for the batch
    y_hat_u = self.user_module(u_emb).squeeze(-1)  # [B]
    sig_u = torch.sigmoid(y_hat_u)  # [B]

    if item_indices is None:
        # Full ranking — score against all n_items
        i_emb = self.item_embedding.weight[:-1, :]  # [n_items, D]

        # MF dot-product: ŷ_k [B, n_items]
        y_hat_k = torch.einsum("be,ie->bi", u_emb, i_emb)

        # Item popularity scores [n_items]
        y_hat_i_all = self.item_module(i_emb).squeeze(-1)  # [n_items]
        sig_i = torch.sigmoid(y_hat_i_all).unsqueeze(0)  # [1, n_items]
        sig_u_2d = sig_u.unsqueeze(1)  # [B, 1]

    else:
        # Sampled ranking — score against a provided subset of items
        i_emb = self.item_embedding(item_indices)  # [B, S, D]

        # MF dot-product: ŷ_k [B, S]
        y_hat_k = torch.einsum("be,bse->bs", u_emb, i_emb)

        # Item popularity scores [B, S]
        y_hat_i_all = self.item_module(i_emb).squeeze(-1)  # [B, S]
        sig_i = torch.sigmoid(y_hat_i_all)  # [B, S]
        sig_u_2d = sig_u.unsqueeze(1)  # [B, 1]

    # Counterfactual score — Eq. 9:
    #   ŷ_cf = ŷ_k * σ(ŷ_i) * σ(ŷ_u) − c * σ(ŷ_i) * σ(ŷ_u)
    #        = (ŷ_k − c) * σ(ŷ_i) * σ(ŷ_u)
    # This equals TIE = TE − NDE (Section 3.4, Eq. 10).
    scores = (y_hat_k - self.c) * sig_i * sig_u_2d  # [B, n_items] or [B, S]
    return scores

training_step(batch, batch_idx)

Single gradient step computing the MACR multi-task loss.

Loss function (Eq. 8, Section 3.3): L = L_O + α * L_I + β * L_U

where L_O, L_I, L_U are all binary cross-entropy losses.

L_O supervises the fused prediction ŷ_ui = ŷ_k * σ(ŷ_i) * σ(ŷ_u). L_I supervises the item-module output σ(ŷ_i) alone. L_U supervises the user-module output σ(ŷ_u) alone.

Parameters:

Name Type Description Default
batch Any

Tuple of (user, item, rating) tensors.

required
batch_idx int

Batch index (unused).

required

Returns:

Name Type Description
Tensor Tensor

The total scalar loss.

Source code in warprec/recommenders/collaborative_filtering_recommender/latent_factor/macrmf.py
def training_step(self, batch: Any, batch_idx: int) -> Tensor:
    """Single gradient step computing the MACR multi-task loss.

    Loss function (Eq. 8, Section 3.3):
        L = L_O + α * L_I + β * L_U

    where L_O, L_I, L_U are all binary cross-entropy losses.

    L_O supervises the fused prediction ŷ_ui = ŷ_k * σ(ŷ_i) * σ(ŷ_u).
    L_I supervises the item-module output σ(ŷ_i) alone.
    L_U supervises the user-module output σ(ŷ_u) alone.

    Args:
        batch (Any): Tuple of (user, item, rating) tensors.
        batch_idx (int): Batch index (unused).

    Returns:
        Tensor: The total scalar loss.
    """
    user, item, rating = batch  # rating ∈ {0.0, 1.0}
    rating = rating.float()

    y_hat_k, y_hat_i, y_hat_u = self.forward(user, item)

    # ------------------------------------------------------------------
    # Fused prediction for main task: ŷ_ui = ŷ_k * σ(ŷ_i) * σ(ŷ_u)
    # Eq. 7 — the overall ranking score used for L_O
    # ASSUMPTION: BCEWithLogitsLoss cannot be applied directly to ŷ_ui
    # because ŷ_ui is already a product of probabilities (not a raw logit).
    # We therefore apply BCE(σ(ŷ_ui_logit), y) as an approximation, but
    # the paper uses plain BCE on the product:
    #   L_O = Σ -y*log(ŷ_ui) - (1-y)*log(1-ŷ_ui)   — Eq. 6
    # We compute ŷ_ui in [0,1] space and apply binary_cross_entropy.
    # ------------------------------------------------------------------
    sig_i = torch.sigmoid(y_hat_i)  # σ(ŷ_i) ∈ (0, 1)
    sig_u = torch.sigmoid(y_hat_u)  # σ(ŷ_u) ∈ (0, 1)

    # Fused prediction — Eq. 7: ŷ_ui = ŷ_k * σ(ŷ_i) * σ(ŷ_u)
    # ASSUMPTION: ŷ_k (raw dot product) is used directly as the matching
    # score without further sigmoid, consistent with the paper's description
    # and the fact that the auxiliary losses on σ(ŷ_i) and σ(ŷ_u) already
    # bound the probability-like fused output implicitly.
    y_hat_ui = y_hat_k * sig_i * sig_u  # Eq. 7 — fused score [B]

    # Main recommendation loss L_O — Eq. 6 (BCE over fused prediction)
    loss_o = F.binary_cross_entropy_with_logits(y_hat_ui, rating)

    # Item auxiliary loss L_I — Eq. 8 bottom: BCE over σ(ŷ_i)
    # "L_I = Σ -y*log(σ(ŷ_i)) - (1-y)*log(1-σ(ŷ_i))"  — Section 3.3
    loss_i = F.binary_cross_entropy_with_logits(y_hat_i, rating)

    # User auxiliary loss L_U — Eq. 8 top: BCE over σ(ŷ_u)
    # "L_U = Σ -y*log(σ(ŷ_u)) - (1-y)*log(1-σ(ŷ_u))"  — Section 3.3
    loss_u = F.binary_cross_entropy_with_logits(y_hat_u, rating)

    # Combined multi-task loss — Eq. 8: L = L_O + α*L_I + β*L_U
    loss_main = loss_o + self.alpha * loss_i + self.beta * loss_u

    # L2 regularization on embeddings — Appendix B (coefficient 1e-5 default)
    loss_reg = self.reg_weight * self.reg_loss(
        self.user_embedding(user),
        self.item_embedding(item),
    )

    loss = loss_main + loss_reg
    self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
    self.log("loss_o", loss_o, prog_bar=False, on_step=False, on_epoch=True)
    self.log("loss_i", loss_i, prog_bar=False, on_step=False, on_epoch=True)
    self.log("loss_u", loss_u, prog_bar=False, on_step=False, on_epoch=True)
    return loss

warprec.recommenders.collaborative_filtering_recommender.latent_factor.slim.Slim

Bases: ItemSimRecommender

Implementation of Slim model from Sparse Linear Methods for Top-N Recommender Systems 2011.

Parameters:

Name Type Description Default
params dict

The dictionary with the model params.

required
info dict

The dictionary containing dataset information.

required
interactions Interactions

The training interactions.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
l1 float

The normalization value.

alpha float

The alpha multiplication constant value.

Source code in warprec/recommenders/collaborative_filtering_recommender/latent_factor/slim.py
@model_registry.register(name="Slim")
class Slim(ItemSimRecommender):
    """Implementation of Slim model from
        Sparse Linear Methods for Top-N Recommender Systems 2011.

    Args:
        params (dict): The dictionary with the model params.
        info (dict): The dictionary containing dataset information.
        interactions (Interactions): The training interactions.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        l1 (float): The normalization value.
        alpha (float): The alpha multiplication constant value.
    """

    l1: float
    alpha: float

    @classmethod
    def estimate_space(
        cls,
        params: dict,
        info: dict,
        interactions: Optional[Interactions] = None,
        **kwargs: Any,
    ) -> dict:
        interactions = cls._require_interactions_for_estimate(
            interactions, cls.__name__
        )
        X = interactions.get_sparse()
        n_users = info["n_users"]
        n_items = info["n_items"]

        train_matrix_mb = cls._sparse_size_mb(X)
        train_matrix_lil_mb = cls._compressed_sparse_size_mb(
            nnz=X.nnz,
            ptr_len=n_users + 1,
            data_dtype=X.dtype,
            index_dtype=np.int64,
        )
        response_vector_mb = cls._dense_size_mb((n_users,), np.float64)
        gram_matrix_mb = cls._dense_size_mb((n_items, n_items), np.float64)
        final_similarity_mb = cls._dense_size_mb((n_items, n_items), np.float64)

        train_ram_mb = cls._peak_size_mb(
            train_matrix_mb + train_matrix_lil_mb + gram_matrix_mb + response_vector_mb,
            train_matrix_mb + train_matrix_lil_mb + final_similarity_mb,
        )
        return {
            "train_ram_mb": train_ram_mb,
            "notes": "Slim analytical train-space estimate",
        }

    def __init__(
        self,
        params: dict,
        info: dict,
        interactions: Interactions,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, interactions, *args, seed=seed, **kwargs)

        # Predefine the number of items, similarity matrix and ElasticNet
        X = self.train_matrix.tolil()

        num_items = X.shape[1]
        item_coeffs = []
        model = ElasticNet(
            alpha=self.alpha,
            l1_ratio=self.l1,
            positive=True,
            fit_intercept=False,
            copy_X=False,
            precompute=True,
            selection="random",
            max_iter=100,
            tol=1e-4,
        )

        for j in range(num_items):
            # Current column
            r = X[:, j]

            # ElasticNet fitting
            model.fit(X, r.todense().getA1())

            # Get coefficients in sparse format
            coeffs = model.sparse_coef_

            # Add them to list
            item_coeffs.append(coeffs)

        # Stack the coefficients and make the matrix dense
        self.item_similarity = sp.vstack(item_coeffs).T.todense()

Neural

warprec.recommenders.collaborative_filtering_recommender.neural.convncf.ConvNCF

Bases: IterativeRecommender

Implementation of ConvNCF algorithm from Outer Product-based Neural Collaborative Filtering 2018.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The embedding size for users and items.

cnn_channels List[int]

The list of output channels for each CNN layer.

cnn_kernels List[int]

The list of kernel sizes for each CNN layer.

cnn_strides List[int]

The list of stride sizes for each CNN layer.

dropout_prob float

The dropout probability for the prediction layer.

reg_weight float

The L2 regularization weight.

weight_decay float

The value of weight decay used in the optimizer.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

Source code in warprec/recommenders/collaborative_filtering_recommender/neural/convncf.py
@model_registry.register(name="ConvNCF")
class ConvNCF(IterativeRecommender):
    """Implementation of ConvNCF algorithm from
        Outer Product-based Neural Collaborative Filtering 2018.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The embedding size for users and items.
        cnn_channels (List[int]): The list of output channels for each CNN layer.
        cnn_kernels (List[int]): The list of kernel sizes for each CNN layer.
        cnn_strides (List[int]): The list of stride sizes for each CNN layer.
        dropout_prob (float): The dropout probability for the prediction layer.
        reg_weight (float): The L2 regularization weight.
        weight_decay (float): The value of weight decay used in the optimizer.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.POS_NEG_LOADER

    # Model hyperparameters
    embedding_size: int
    cnn_channels: List[int]
    cnn_kernels: List[int]
    cnn_strides: List[int]
    dropout_prob: float
    reg_weight: float
    weight_decay: float
    batch_size: int
    epochs: int
    learning_rate: float

    def __init__(
        self,
        params: dict,
        info: dict,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # Check for optional value of block size
        self.block_size = kwargs.get("block_size", 50)

        # Ray Tune converts lists to tuples
        self.cnn_channels = list(self.cnn_channels)
        self.cnn_kernels = list(self.cnn_kernels)
        self.cnn_strides = list(self.cnn_strides)

        self.user_embedding = nn.Embedding(self.n_users, self.embedding_size)
        self.item_embedding = nn.Embedding(
            self.n_items + 1, self.embedding_size, padding_idx=self.n_items
        )
        self.cnn_layers = CNN(
            self.cnn_channels,
            self.cnn_kernels,
            self.cnn_strides,
            activation="relu",
        )

        # Prediction layer (MLP)
        # The input of the prediction layer is the output
        # of the CNN, so self.cnn_channels[-1]
        self.predict_layers = MLP(
            [self.cnn_channels[-1], 1], self.dropout_prob, activation=None
        )  # We set no activation for last layer

        # Init embedding weights
        self.apply(self._init_weights)
        self.bpr_loss = BPRLoss()
        self.reg_loss = EmbLoss()

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs: Any,
    ):
        return interactions.get_contrastive_dataloader(
            batch_size=self.batch_size,
            **kwargs,
        )

    def training_step(self, batch: Any, batch_idx: int):
        user, pos_item, neg_item = batch

        # Calculate BPR loss
        pos_item_score = self.forward(user, pos_item)
        neg_item_score = self.forward(user, neg_item)
        bpr_loss = self.bpr_loss(pos_item_score, neg_item_score)

        # Calculate L2 regularization
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_embedding(user),
            self.item_embedding(pos_item),
            self.item_embedding(neg_item),
        )

        # Loss logging
        loss = bpr_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(self, user: Tensor, item: Tensor) -> Tensor:
        """Forward pass of the ConvNCF model.

        Args:
            user (Tensor): The tensor containing the user indexes.
            item (Tensor): The tensor containing the item indexes.

        Returns:
            Tensor: The predicted score for each pair (user, item).
        """
        user_e = self.user_embedding(user)  # [batch_size, embedding_size]
        item_e = self.item_embedding(item)  # [batch_size, embedding_size]

        # Outer product to create interaction map
        interaction_map = torch.bmm(
            user_e.unsqueeze(2), item_e.unsqueeze(1)
        )  # [batch_size, embedding_size, embedding_size]

        # Add a channel dimension for CNN input: [batch_size, 1, embedding_size, embedding_size]
        interaction_map = interaction_map.unsqueeze(1)

        # CNN layers
        cnn_output = self.cnn_layers(
            interaction_map
        )  # [batch_size, cnn_channels[-1], H', W']

        # Sum across spatial dimensions (H', W')
        # This reduces the feature map to [batch_size, cnn_channels[-1]]
        cnn_output = cnn_output.sum(axis=(2, 3))

        # Prediction layer (MLP)
        prediction = self.predict_layers(cnn_output)  # [batch_size, 1]

        return prediction.squeeze(-1)  # [batch_size]

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Retrieve batch size from user batch
        batch_size = user_indices.size(0)

        if item_indices is None:
            # Case 'full': iterate through all items in memory-safe blocks
            all_scores = []
            for start in range(0, self.n_items, self.block_size):
                end = min(start + self.block_size, self.n_items)
                items_block_indices = torch.arange(start, end)

                # Expand user and item indices to create all pairs for the block
                n_items_in_block = end - start
                users_expanded = (
                    user_indices.unsqueeze(1).expand(-1, n_items_in_block).reshape(-1)
                )
                items_expanded = (
                    items_block_indices.unsqueeze(0).expand(batch_size, -1).reshape(-1)
                ).to(users_expanded.device)

                # Call forward on the flattened batch of pairs for the current block
                scores_flat = self.forward(users_expanded, items_expanded)

                # Reshape the result and append
                scores_block = scores_flat.view(batch_size, n_items_in_block)
                all_scores.append(scores_block)

            # Concatenate the results from all blocks
            predictions = torch.cat(all_scores, dim=1)
            return predictions

        # Case 'sampled': process all given item_indices at once
        pad_seq = item_indices.size(1)

        # Expand user and item indices to create all pairs
        users_expanded = user_indices.unsqueeze(1).expand(-1, pad_seq).reshape(-1)
        items_expanded = item_indices.reshape(-1)

        # Call forward on the flattened batch of pairs
        predictions_flat = self.forward(users_expanded, items_expanded)

        # Reshape the flat predictions back to the original batch shape
        predictions = predictions_flat.view(batch_size, pad_seq)
        return predictions

forward(user, item)

Forward pass of the ConvNCF model.

Parameters:

Name Type Description Default
user Tensor

The tensor containing the user indexes.

required
item Tensor

The tensor containing the item indexes.

required

Returns:

Name Type Description
Tensor Tensor

The predicted score for each pair (user, item).

Source code in warprec/recommenders/collaborative_filtering_recommender/neural/convncf.py
def forward(self, user: Tensor, item: Tensor) -> Tensor:
    """Forward pass of the ConvNCF model.

    Args:
        user (Tensor): The tensor containing the user indexes.
        item (Tensor): The tensor containing the item indexes.

    Returns:
        Tensor: The predicted score for each pair (user, item).
    """
    user_e = self.user_embedding(user)  # [batch_size, embedding_size]
    item_e = self.item_embedding(item)  # [batch_size, embedding_size]

    # Outer product to create interaction map
    interaction_map = torch.bmm(
        user_e.unsqueeze(2), item_e.unsqueeze(1)
    )  # [batch_size, embedding_size, embedding_size]

    # Add a channel dimension for CNN input: [batch_size, 1, embedding_size, embedding_size]
    interaction_map = interaction_map.unsqueeze(1)

    # CNN layers
    cnn_output = self.cnn_layers(
        interaction_map
    )  # [batch_size, cnn_channels[-1], H', W']

    # Sum across spatial dimensions (H', W')
    # This reduces the feature map to [batch_size, cnn_channels[-1]]
    cnn_output = cnn_output.sum(axis=(2, 3))

    # Prediction layer (MLP)
    prediction = self.predict_layers(cnn_output)  # [batch_size, 1]

    return prediction.squeeze(-1)  # [batch_size]

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/neural/convncf.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Retrieve batch size from user batch
    batch_size = user_indices.size(0)

    if item_indices is None:
        # Case 'full': iterate through all items in memory-safe blocks
        all_scores = []
        for start in range(0, self.n_items, self.block_size):
            end = min(start + self.block_size, self.n_items)
            items_block_indices = torch.arange(start, end)

            # Expand user and item indices to create all pairs for the block
            n_items_in_block = end - start
            users_expanded = (
                user_indices.unsqueeze(1).expand(-1, n_items_in_block).reshape(-1)
            )
            items_expanded = (
                items_block_indices.unsqueeze(0).expand(batch_size, -1).reshape(-1)
            ).to(users_expanded.device)

            # Call forward on the flattened batch of pairs for the current block
            scores_flat = self.forward(users_expanded, items_expanded)

            # Reshape the result and append
            scores_block = scores_flat.view(batch_size, n_items_in_block)
            all_scores.append(scores_block)

        # Concatenate the results from all blocks
        predictions = torch.cat(all_scores, dim=1)
        return predictions

    # Case 'sampled': process all given item_indices at once
    pad_seq = item_indices.size(1)

    # Expand user and item indices to create all pairs
    users_expanded = user_indices.unsqueeze(1).expand(-1, pad_seq).reshape(-1)
    items_expanded = item_indices.reshape(-1)

    # Call forward on the flattened batch of pairs
    predictions_flat = self.forward(users_expanded, items_expanded)

    # Reshape the flat predictions back to the original batch shape
    predictions = predictions_flat.view(batch_size, pad_seq)
    return predictions

warprec.recommenders.collaborative_filtering_recommender.neural.neumf.NeuMF

Bases: IterativeRecommender

Implementation of NeuMF algorithm from Neural Collaborative Filtering 2017.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
*args Any

Variable length argument list.

()
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

mf_embedding_size int

The MF embedding size.

mlp_embedding_size int

The MLP embedding size.

mlp_hidden_size List[int]

The MLP hidden layer size list.

mf_train bool

Wether or not to train MF embedding.

mlp_train bool

Wether or not to train MLP embedding.

dropout float

The dropout probability.

reg_weight float

The L2 regularization weight.

weight_decay float

The value of weight decay used in the optimizer.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

neg_samples int

The number of negative samples per positive interaction.

Source code in warprec/recommenders/collaborative_filtering_recommender/neural/neumf.py
@model_registry.register(name="NeuMF")
class NeuMF(IterativeRecommender):
    """Implementation of NeuMF algorithm from
        Neural Collaborative Filtering 2017.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        *args (Any): Variable length argument list.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        mf_embedding_size (int): The MF embedding size.
        mlp_embedding_size (int): The MLP embedding size.
        mlp_hidden_size (List[int]): The MLP hidden layer size list.
        mf_train (bool): Wether or not to train MF embedding.
        mlp_train (bool): Wether or not to train MLP embedding.
        dropout (float): The dropout probability.
        reg_weight (float): The L2 regularization weight.
        weight_decay (float): The value of weight decay used in the optimizer.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
        neg_samples (int): The number of negative samples per positive interaction.
    """

    # Dataloader definition
    DATALOADER_TYPE = DataLoaderType.ITEM_RATING_LOADER

    # Model hyperparameters
    mf_embedding_size: int
    mlp_embedding_size: int
    mlp_hidden_size: List[int]
    mf_train: bool
    mlp_train: bool
    dropout: float
    reg_weight: float
    weight_decay: float
    batch_size: int
    epochs: int
    learning_rate: float
    neg_samples: int

    def __init__(
        self,
        params: dict,
        info: dict,
        *args: Any,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(params, info, *args, seed=seed, **kwargs)

        # Check for optional value of block size
        self.block_size = kwargs.get("block_size", 50)

        # Ray Tune converts lists to tuples
        # so we need to convert them back to lists
        self.mlp_hidden_size = list(self.mlp_hidden_size)

        # MF embeddings
        self.user_mf_embedding = nn.Embedding(self.n_users, self.mf_embedding_size)
        self.item_mf_embedding = nn.Embedding(
            self.n_items + 1, self.mf_embedding_size, padding_idx=self.n_items
        )

        # MLP embeddings
        self.user_mlp_embedding = nn.Embedding(self.n_users, self.mlp_embedding_size)
        self.item_mlp_embedding = nn.Embedding(
            self.n_items + 1, self.mlp_embedding_size, padding_idx=self.n_items
        )

        # MLP layers
        self.mlp_layers = MLP(
            [2 * self.mlp_embedding_size] + self.mlp_hidden_size, self.dropout
        )

        # Final prediction layer
        if self.mf_train and self.mlp_train:
            self.predict_layer = nn.Linear(
                self.mf_embedding_size + self.mlp_hidden_size[-1], 1
            )
        elif self.mf_train:
            self.predict_layer = nn.Linear(self.mf_embedding_size, 1)
        else:
            self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)

        # Init embedding weights
        self.apply(self._init_weights)
        self.sigmoid = nn.Sigmoid()
        self.bce_loss = nn.BCEWithLogitsLoss()
        self.reg_loss = EmbLoss()

    def get_dataloader(
        self,
        interactions: Interactions,
        sessions: Sessions,
        **kwargs,
    ):
        return interactions.get_pointwise_dataloader(
            neg_samples=self.neg_samples,
            batch_size=self.batch_size,
            **kwargs,
        )

    def training_step(self, batch: Any, batch_idx: int):
        user, item, rating = batch

        # Calculate BCE loss
        predictions = self(user, item)
        bce_loss = self.bce_loss(predictions, rating)

        # Calculate L2 regularization
        reg_loss = self.reg_weight * self.reg_loss(
            self.user_mf_embedding(user),
            self.user_mlp_embedding(user),
            self.item_mf_embedding(item),
            self.item_mlp_embedding(item),
        )

        # Loss logging
        loss = bce_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    # pylint: disable = E0606
    def forward(self, user: Tensor, item: Tensor) -> Tensor:
        """Forward pass of the NeuMF model.

        Args:
            user (Tensor): The tensor containing the user indexes.
            item (Tensor): The tensor containing the item indexes.

        Returns:
            Tensor: The predicted score for each pair (user, item).
        """
        user_mf_e = self.user_mf_embedding(user)
        item_mf_e = self.item_mf_embedding(item)
        user_mlp_e = self.user_mlp_embedding(user)
        item_mlp_e = self.item_mlp_embedding(item)
        output: Tensor = None

        if self.mf_train:
            mf_output = torch.mul(user_mf_e, item_mf_e)

        if self.mlp_train:
            mlp_input = torch.cat((user_mlp_e, item_mlp_e), -1)
            mlp_output = self.mlp_layers(mlp_input)

        if self.mf_train and self.mlp_train:
            combined = torch.cat((mf_output, mlp_output), -1)
            output = self.predict_layer(combined)
        elif self.mf_train:
            output = self.predict_layer(mf_output)
        else:
            output = self.predict_layer(mlp_output)

        return output.squeeze(-1)

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the learned embeddings.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Retrieve batch size from user batch
        batch_size = user_indices.size(0)

        if item_indices is None:
            # Case 'full': iterate through all items in memory-safe blocks
            preds_logits = []
            for start in range(0, self.n_items, self.block_size):
                end = min(start + self.block_size, self.n_items)
                items_block = torch.arange(start, end)

                # Expand user and item indices to create all pairs for the block
                users_block = (
                    user_indices.unsqueeze(1).expand(-1, end - start).reshape(-1)
                )
                items_block_expanded = (
                    items_block.unsqueeze(0).expand(batch_size, -1).reshape(-1)
                ).to(users_block.device)

                # Get raw logits from the forward pass
                logits_block = self.forward(users_block, items_block_expanded)
                preds_logits.append(logits_block.view(batch_size, -1))

            predictions_logits = torch.cat(preds_logits, dim=1)

        else:
            # Case 'sampled': process all given item_indices at once
            pad_seq = item_indices.size(1)

            # Expand user and item indices to create all pairs
            users_expanded = user_indices.unsqueeze(1).expand(-1, pad_seq).reshape(-1)
            items_expanded = item_indices.reshape(-1)

            # Get raw logits from the forward pass
            predictions_flat_logits = self.forward(users_expanded, items_expanded)
            predictions_logits = predictions_flat_logits.view(batch_size, pad_seq)

        # Apply sigmoid once to the final logits tensor
        predictions = self.sigmoid(predictions_logits)
        return predictions

forward(user, item)

Forward pass of the NeuMF model.

Parameters:

Name Type Description Default
user Tensor

The tensor containing the user indexes.

required
item Tensor

The tensor containing the item indexes.

required

Returns:

Name Type Description
Tensor Tensor

The predicted score for each pair (user, item).

Source code in warprec/recommenders/collaborative_filtering_recommender/neural/neumf.py
def forward(self, user: Tensor, item: Tensor) -> Tensor:
    """Forward pass of the NeuMF model.

    Args:
        user (Tensor): The tensor containing the user indexes.
        item (Tensor): The tensor containing the item indexes.

    Returns:
        Tensor: The predicted score for each pair (user, item).
    """
    user_mf_e = self.user_mf_embedding(user)
    item_mf_e = self.item_mf_embedding(item)
    user_mlp_e = self.user_mlp_embedding(user)
    item_mlp_e = self.item_mlp_embedding(item)
    output: Tensor = None

    if self.mf_train:
        mf_output = torch.mul(user_mf_e, item_mf_e)

    if self.mlp_train:
        mlp_input = torch.cat((user_mlp_e, item_mlp_e), -1)
        mlp_output = self.mlp_layers(mlp_input)

    if self.mf_train and self.mlp_train:
        combined = torch.cat((mf_output, mlp_output), -1)
        output = self.predict_layer(combined)
    elif self.mf_train:
        output = self.predict_layer(mf_output)
    else:
        output = self.predict_layer(mlp_output)

    return output.squeeze(-1)

predict(user_indices, *args, item_indices=None, **kwargs)

Prediction using the learned embeddings.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/collaborative_filtering_recommender/neural/neumf.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the learned embeddings.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Retrieve batch size from user batch
    batch_size = user_indices.size(0)

    if item_indices is None:
        # Case 'full': iterate through all items in memory-safe blocks
        preds_logits = []
        for start in range(0, self.n_items, self.block_size):
            end = min(start + self.block_size, self.n_items)
            items_block = torch.arange(start, end)

            # Expand user and item indices to create all pairs for the block
            users_block = (
                user_indices.unsqueeze(1).expand(-1, end - start).reshape(-1)
            )
            items_block_expanded = (
                items_block.unsqueeze(0).expand(batch_size, -1).reshape(-1)
            ).to(users_block.device)

            # Get raw logits from the forward pass
            logits_block = self.forward(users_block, items_block_expanded)
            preds_logits.append(logits_block.view(batch_size, -1))

        predictions_logits = torch.cat(preds_logits, dim=1)

    else:
        # Case 'sampled': process all given item_indices at once
        pad_seq = item_indices.size(1)

        # Expand user and item indices to create all pairs
        users_expanded = user_indices.unsqueeze(1).expand(-1, pad_seq).reshape(-1)
        items_expanded = item_indices.reshape(-1)

        # Get raw logits from the forward pass
        predictions_flat_logits = self.forward(users_expanded, items_expanded)
        predictions_logits = predictions_flat_logits.view(batch_size, pad_seq)

    # Apply sigmoid once to the final logits tensor
    predictions = self.sigmoid(predictions_logits)
    return predictions