Skip to content

Context-Aware - API Reference

Auto-generated documentation for context-aware recommender model classes.

warprec.recommenders.context_aware_recommender.afm.AFM

Bases: ContextRecommenderUtils, IterativeRecommender

Implementation of AFM algorithm from Attentional Factorization Machines: Learning the Weight of Feature Interactions via Attention Networks, IJCAI 2017.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
*args Any

Variable length argument list.

()
interactions Optional[Interactions]

The training interactions.

None
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The size of the latent vectors.

attention_size int

The size of the attention network hidden layer.

dropout float

The dropout probability.

reg_weight float

The L2 regularization weight for embeddings.

weight_decay float

The value of weight decay used in the optimizer.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

neg_samples int

Number of negative samples for training.

Source code in warprec/recommenders/context_aware_recommender/afm.py
@model_registry.register(name="AFM")
class AFM(ContextRecommenderUtils, IterativeRecommender):
    """Implementation of AFM algorithm from
        Attentional Factorization Machines: Learning the Weight of Feature Interactions
        via Attention Networks, IJCAI 2017.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        *args (Any): Variable length argument list.
        interactions (Optional[Interactions]): The training interactions.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The size of the latent vectors.
        attention_size (int): The size of the attention network hidden layer.
        dropout (float): The dropout probability.
        reg_weight (float): The L2 regularization weight for embeddings.
        weight_decay (float): The value of weight decay used in the optimizer.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
        neg_samples (int): Number of negative samples for training.
    """

    DATALOADER_TYPE = DataLoaderType.ITEM_RATING_LOADER_WITH_CONTEXT

    embedding_size: int
    attention_size: int
    dropout: float
    reg_weight: float
    weight_decay: float
    batch_size: int
    epochs: int
    learning_rate: float
    neg_samples: int

    def __init__(
        self,
        params: dict,
        info: dict,
        *args: Any,
        interactions: Optional[Interactions] = None,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(
            params, info, *args, interactions=interactions, seed=seed, **kwargs
        )

        self.block_size = kwargs.get("block_size", 50)
        self.chunk_size = kwargs.get("chunk_size", 4096)

        # Attention Network
        self.attention_layer = AttentionLayer(self.embedding_size, self.attention_size)

        # Projection Vector p
        # Weights the final pooled vector to produce the score
        self.p = nn.Parameter(torch.randn(self.embedding_size))

        # Dropout
        self.dropout_layer = nn.Dropout(self.dropout)

        # Pre-compute Pair Indices
        # Total fields = User (1) + Item (1) + Features (N) + Contexts (M)
        self.num_fields = 2 + len(self.feature_labels) + len(self.context_labels)

        # Generate indices for all unique pairs (i, j) where i < j
        row_idx = []
        col_idx = []
        for i in range(self.num_fields - 1):
            for j in range(i + 1, self.num_fields):
                row_idx.append(i)
                col_idx.append(j)

        # Register as buffers
        self.register_buffer("p_idx", torch.tensor(row_idx, dtype=torch.long))
        self.register_buffer("q_idx", torch.tensor(col_idx, dtype=torch.long))

        # Losses
        self.bce_loss = nn.BCEWithLogitsLoss()
        self.reg_loss = EmbLoss()

        self.apply(self._init_weights)

    def _compute_afm_interaction(self, stacked_embeddings: Tensor) -> Tensor:
        """Computes the AFM interaction part."""
        # Pair-wise Interaction Layer
        # [batch_size, num_pairs, embedding_size]
        p = stacked_embeddings[:, self.p_idx]  # type: ignore[index]
        q = stacked_embeddings[:, self.q_idx]  # type: ignore[index]

        # Element-wise product
        pair_wise_inter = p * q

        # Apply Dropout on the interaction vectors
        pair_wise_inter = self.dropout_layer(pair_wise_inter)

        # Attention-based Pooling
        att_weights = self.attention_layer(
            pair_wise_inter
        )  # [batch_size, num_pairs, 1]

        # Weighted sum
        att_pooling = torch.sum(
            att_weights * pair_wise_inter, dim=1
        )  # [batch_size, embedding_size]

        # Final Projection
        afm_score = torch.sum(att_pooling * self.p, dim=1)  # [batch_size]

        return afm_score

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, item, rating = batch[0], batch[1], batch[2]

        contexts: Optional[Tensor] = None
        features: Optional[Tensor] = None

        current_idx = 3

        # If feature dimensions exist, the next element is features
        if self.feature_dims:
            features = batch[current_idx]
            current_idx += 1

        # If context dimensions exist, the next element is context
        if self.context_dims:
            contexts = batch[current_idx]

        prediction = self.forward(user, item, features, contexts)

        # Compute BCE loss
        bce_loss = self.bce_loss(prediction, rating)

        # Compute L2 regularization on embeddings and biases
        reg_params = self.get_reg_params(user, item, features, contexts)
        reg_loss = self.reg_weight * self.reg_loss(*reg_params)

        # Loss logging
        loss = bce_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(
        self,
        user: Tensor,
        item: Tensor,
        features: Optional[Tensor] = None,
        contexts: Optional[Tensor] = None,
    ) -> Tensor:
        # Linear Part (First Order)
        linear_part = self.compute_first_order(user, item, features, contexts)

        # Interaction Part (Second Order)
        u_emb = self.user_embedding(user).unsqueeze(1)
        i_emb = self.item_embedding(item).unsqueeze(1)
        components = [u_emb, i_emb]

        # Add Feature Embeddings
        if features is not None and self.feature_dims:
            global_feat = features + self.feature_offsets
            f_emb = self.merged_feature_embedding(global_feat)
            components.append(f_emb)

        # Add Context Embeddings
        if contexts is not None and self.context_labels:
            global_ctx = contexts + self.context_offsets
            c_emb = self.merged_context_embedding(global_ctx)
            components.append(c_emb)

        # Concatenate on Field dimension
        stacked_embeddings = torch.cat(components, dim=1)

        # AFM Interaction Part
        afm_part = self._compute_afm_interaction(stacked_embeddings)

        return linear_part + afm_part

    def _compute_network_scores(
        self,
        u_emb: Tensor,
        i_emb: Tensor,
        feat_emb_tensor: Optional[Tensor],
        ctx_emb_tensor: Optional[Tensor],
        batch_size: int,
        num_items: int,
    ) -> Tensor:
        """Compute scores of AFM interaction part efficiently using chunking."""
        total_rows = batch_size * num_items

        # Create memory efficient views
        u_view = (
            u_emb.unsqueeze(1)
            .unsqueeze(2)
            .expand(-1, num_items, -1, -1)
            .reshape(total_rows, 1, -1)
        )
        i_view = i_emb.unsqueeze(2).reshape(total_rows, 1, -1)
        views = [u_view, i_view]

        # Handle Feature views
        if feat_emb_tensor is not None:
            f_view = (
                feat_emb_tensor.unsqueeze(0)
                .expand(batch_size, -1, -1, -1)
                .reshape(total_rows, -1, self.embedding_size)
            )
            views.append(f_view)

        # Handle Context views
        if ctx_emb_tensor is not None:
            c_view = (
                ctx_emb_tensor.unsqueeze(1)
                .expand(-1, num_items, -1, -1)
                .reshape(total_rows, -1, self.embedding_size)
            )
            views.append(c_view)

        # Pre-allocate tensor to memory
        all_scores = torch.empty(total_rows, device=self.device)

        # Loop on chunk size parameter
        for start in range(0, total_rows, self.chunk_size):
            end = min(start + self.chunk_size, total_rows)

            # Slice the views and concatenate
            # Each view is [Total_Rows, Num_Fields_Subset, Emb]
            chunk_components = [v[start:end] for v in views]

            # Concatenate on Field dimension (dim=1)
            chunk_stack = torch.cat(chunk_components, dim=1)

            # Compute AFM Interaction
            afm_s = self._compute_afm_interaction(chunk_stack)

            # Save in place
            all_scores[start:end] = afm_s

        return all_scores.view(batch_size, num_items)

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        contexts: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the AFM model.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            contexts (Optional[Tensor]): The batch of contexts.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        batch_size = user_indices.size(0)

        # Linear Fixed
        fixed_linear = self.global_bias + self.user_bias(user_indices).squeeze(-1)
        if contexts is not None and self.context_dims:
            global_ctx = contexts + self.context_offsets
            ctx_bias = self.merged_context_bias(global_ctx).sum(dim=1).squeeze(-1)
            fixed_linear += ctx_bias

        # Embeddings Fixed
        u_emb = self.user_embedding(user_indices)  # [batch_size, embedding_size]
        ctx_emb_tensor = self._get_context_embeddings(contexts)

        if item_indices is None:
            # Case 'full': iterate through all items in memory-safe blocks
            preds_list = []

            for start in range(0, self.n_items, self.block_size):
                end = min(start + self.block_size, self.n_items)
                current_block_len = end - start

                items_block = torch.arange(start, end, device=self.device)

                # Item Embeddings and Bias
                item_emb_block = self.item_embedding(items_block)

                # Retrieve block feature embeddings and bias
                feat_emb_block_tensor = self._get_feature_embeddings(items_block)
                feat_bias_block = self._get_feature_bias(items_block)

                # Linear Part
                item_bias_block = self.item_bias(items_block).squeeze(-1)
                linear_pred = (
                    fixed_linear.unsqueeze(1)
                    + item_bias_block.unsqueeze(0)
                    + feat_bias_block.unsqueeze(0)
                )

                # Expand Item to match batch size
                item_emb_expanded = item_emb_block.unsqueeze(0).expand(
                    batch_size, -1, -1
                )

                # Compute AFM scores efficiently
                afm_scores = self._compute_network_scores(
                    u_emb,
                    item_emb_expanded,
                    feat_emb_block_tensor,
                    ctx_emb_tensor,
                    batch_size,
                    current_block_len,
                )

                preds_list.append(linear_pred + afm_scores)

            return torch.cat(preds_list, dim=1)

        # Case 'sampled': process given item_indices
        pad_seq = item_indices.size(1)

        # Item Embeddings: [Batch, Seq, Emb]
        item_emb = self.item_embedding(item_indices)

        # Retrieve item feature embeddings & bias
        # feat_emb_tensor: [Batch, Seq, Num_Feat, Emb]
        feat_emb_tensor = self._get_feature_embeddings(item_indices)
        feat_bias = self._get_feature_bias(item_indices)

        # Linear
        item_bias = self.item_bias(item_indices).squeeze(-1)
        linear_pred = fixed_linear.unsqueeze(1) + item_bias + feat_bias

        # Stack Construction
        # User: [Batch, 1, 1, Emb] -> [Batch, Seq, 1, Emb]
        u_emb_exp = u_emb.unsqueeze(1).unsqueeze(2).expand(-1, pad_seq, -1, -1)

        # Item: [Batch, Seq, Emb] -> [Batch, Seq, 1, Emb]
        i_emb_exp = item_emb.unsqueeze(2)

        stack_list = [u_emb_exp, i_emb_exp]

        if feat_emb_tensor is not None:
            stack_list.append(feat_emb_tensor)

        if ctx_emb_tensor is not None:
            # Context: [Batch, Num_Ctx, Emb] -> [Batch, 1, Num_Ctx, Emb] -> [Batch, Seq, Num_Ctx, Emb]
            c_emb_exp = ctx_emb_tensor.unsqueeze(1).expand(-1, pad_seq, -1, -1)
            stack_list.append(c_emb_exp)

        # Concatenate on Field dimension (dim=2)
        # [Batch, Seq, Total_Fields, Emb]
        stack = torch.cat(stack_list, dim=2)

        # Reshape to [Batch * Seq, Total_Fields, Emb]
        total_rows = batch_size * pad_seq
        stack_flat = stack.view(total_rows, self.num_fields, self.embedding_size)

        # AFM part
        afm_scores_flat = self._compute_afm_interaction(stack_flat)
        afm_scores = afm_scores_flat.view(batch_size, pad_seq)

        return linear_pred + afm_scores

predict(user_indices, *args, item_indices=None, contexts=None, **kwargs)

Prediction using the AFM model.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
contexts Optional[Tensor]

The batch of contexts.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/context_aware_recommender/afm.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    contexts: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the AFM model.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        contexts (Optional[Tensor]): The batch of contexts.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    batch_size = user_indices.size(0)

    # Linear Fixed
    fixed_linear = self.global_bias + self.user_bias(user_indices).squeeze(-1)
    if contexts is not None and self.context_dims:
        global_ctx = contexts + self.context_offsets
        ctx_bias = self.merged_context_bias(global_ctx).sum(dim=1).squeeze(-1)
        fixed_linear += ctx_bias

    # Embeddings Fixed
    u_emb = self.user_embedding(user_indices)  # [batch_size, embedding_size]
    ctx_emb_tensor = self._get_context_embeddings(contexts)

    if item_indices is None:
        # Case 'full': iterate through all items in memory-safe blocks
        preds_list = []

        for start in range(0, self.n_items, self.block_size):
            end = min(start + self.block_size, self.n_items)
            current_block_len = end - start

            items_block = torch.arange(start, end, device=self.device)

            # Item Embeddings and Bias
            item_emb_block = self.item_embedding(items_block)

            # Retrieve block feature embeddings and bias
            feat_emb_block_tensor = self._get_feature_embeddings(items_block)
            feat_bias_block = self._get_feature_bias(items_block)

            # Linear Part
            item_bias_block = self.item_bias(items_block).squeeze(-1)
            linear_pred = (
                fixed_linear.unsqueeze(1)
                + item_bias_block.unsqueeze(0)
                + feat_bias_block.unsqueeze(0)
            )

            # Expand Item to match batch size
            item_emb_expanded = item_emb_block.unsqueeze(0).expand(
                batch_size, -1, -1
            )

            # Compute AFM scores efficiently
            afm_scores = self._compute_network_scores(
                u_emb,
                item_emb_expanded,
                feat_emb_block_tensor,
                ctx_emb_tensor,
                batch_size,
                current_block_len,
            )

            preds_list.append(linear_pred + afm_scores)

        return torch.cat(preds_list, dim=1)

    # Case 'sampled': process given item_indices
    pad_seq = item_indices.size(1)

    # Item Embeddings: [Batch, Seq, Emb]
    item_emb = self.item_embedding(item_indices)

    # Retrieve item feature embeddings & bias
    # feat_emb_tensor: [Batch, Seq, Num_Feat, Emb]
    feat_emb_tensor = self._get_feature_embeddings(item_indices)
    feat_bias = self._get_feature_bias(item_indices)

    # Linear
    item_bias = self.item_bias(item_indices).squeeze(-1)
    linear_pred = fixed_linear.unsqueeze(1) + item_bias + feat_bias

    # Stack Construction
    # User: [Batch, 1, 1, Emb] -> [Batch, Seq, 1, Emb]
    u_emb_exp = u_emb.unsqueeze(1).unsqueeze(2).expand(-1, pad_seq, -1, -1)

    # Item: [Batch, Seq, Emb] -> [Batch, Seq, 1, Emb]
    i_emb_exp = item_emb.unsqueeze(2)

    stack_list = [u_emb_exp, i_emb_exp]

    if feat_emb_tensor is not None:
        stack_list.append(feat_emb_tensor)

    if ctx_emb_tensor is not None:
        # Context: [Batch, Num_Ctx, Emb] -> [Batch, 1, Num_Ctx, Emb] -> [Batch, Seq, Num_Ctx, Emb]
        c_emb_exp = ctx_emb_tensor.unsqueeze(1).expand(-1, pad_seq, -1, -1)
        stack_list.append(c_emb_exp)

    # Concatenate on Field dimension (dim=2)
    # [Batch, Seq, Total_Fields, Emb]
    stack = torch.cat(stack_list, dim=2)

    # Reshape to [Batch * Seq, Total_Fields, Emb]
    total_rows = batch_size * pad_seq
    stack_flat = stack.view(total_rows, self.num_fields, self.embedding_size)

    # AFM part
    afm_scores_flat = self._compute_afm_interaction(stack_flat)
    afm_scores = afm_scores_flat.view(batch_size, pad_seq)

    return linear_pred + afm_scores

warprec.recommenders.context_aware_recommender.dcn.DCN

Bases: ContextRecommenderUtils, IterativeRecommender

Implementation of Deep & Cross Network (DCN) from Deep & Cross Network for Ad Click Predictions, ADKDD 2017.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
*args Any

Variable length argument list.

()
interactions Optional[Interactions]

The training interactions.

None
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The size of the latent vectors.

mlp_hidden_size List[int]

The MLP hidden layer size list.

cross_layer_num int

The number of cross layers.

dropout float

The dropout probability.

reg_weight float

The L2 regularization weight.

weight_decay float

The value of weight decay used in the optimizer.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

neg_samples int

Number of negative samples for training.

Source code in warprec/recommenders/context_aware_recommender/dcn.py
@model_registry.register(name="DCN")
class DCN(ContextRecommenderUtils, IterativeRecommender):
    """Implementation of Deep & Cross Network (DCN) from
        Deep & Cross Network for Ad Click Predictions, ADKDD 2017.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        *args (Any): Variable length argument list.
        interactions (Optional[Interactions]): The training interactions.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The size of the latent vectors.
        mlp_hidden_size (List[int]): The MLP hidden layer size list.
        cross_layer_num (int): The number of cross layers.
        dropout (float): The dropout probability.
        reg_weight (float): The L2 regularization weight.
        weight_decay (float): The value of weight decay used in the optimizer.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
        neg_samples (int): Number of negative samples for training.
    """

    DATALOADER_TYPE = DataLoaderType.ITEM_RATING_LOADER_WITH_CONTEXT

    embedding_size: int
    mlp_hidden_size: List[int]
    cross_layer_num: int
    dropout: float
    reg_weight: float
    weight_decay: float
    batch_size: int
    epochs: int
    learning_rate: float
    neg_samples: int

    def __init__(
        self,
        params: dict,
        info: dict,
        *args: Any,
        interactions: Optional[Interactions] = None,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(
            params, info, *args, interactions=interactions, seed=seed, **kwargs
        )

        self.block_size = kwargs.get("block_size", 50)
        self.mlp_hidden_size = list(self.mlp_hidden_size)

        # DCN Specific Layers
        self.num_fields = 2 + len(self.feature_labels) + len(self.context_labels)
        self.input_dim = self.num_fields * self.embedding_size

        # Cross Network Parameters
        # Weights and Biases for each layer
        self.cross_layer_w = nn.ParameterList(
            [
                nn.Parameter(torch.randn(self.input_dim))
                for _ in range(self.cross_layer_num)
            ]
        )
        self.cross_layer_b = nn.ParameterList(
            [
                nn.Parameter(torch.zeros(self.input_dim))
                for _ in range(self.cross_layer_num)
            ]
        )

        # Deep Network (MLP)
        # Input is the flattened embedding vector
        self.mlp_layers = MLP([self.input_dim] + self.mlp_hidden_size, self.dropout)

        # Final Prediction Layer
        # Input: Output of Cross Network + Output of Deep Network
        # Cross Network output size is same as input_dim
        final_dim = self.input_dim + self.mlp_hidden_size[-1]
        self.predict_layer = nn.Linear(final_dim, 1)

        # Losses
        self.bce_loss = nn.BCEWithLogitsLoss()
        self.reg_loss = EmbLoss()

        # Initialize weights
        self.apply(self._init_weights)

    def _cross_network(self, x_0: Tensor) -> Tensor:
        """Computes the output of the Cross Network.

        Formula: x_{l+1} = x_0 * (x_l^T * w_l) + b_l + x_l
        """
        x_l = x_0
        for i in range(self.cross_layer_num):
            # x_l: [batch, input_dim]
            # w: [input_dim]
            # x_l^T * w -> dot product per sample -> [batch, 1]
            # We use matmul for efficiency: (x_l @ w)

            # [batch, 1]
            xl_w = torch.matmul(x_l, self.cross_layer_w[i]).unsqueeze(1)

            # x_0 * scalar + bias + x_l
            x_l = x_0 * xl_w + self.cross_layer_b[i] + x_l

        return x_l

    def _compute_logits(self, dcn_input: Tensor) -> Tensor:
        """Core logic of DCN: Shared between forward and predict.

        Args:
            dcn_input (Tensor): Flattened input embeddings [batch_size, input_dim]

        Returns:
            Tensor: Logits [batch_size, 1]
        """
        # Deep Part
        deep_output = self.mlp_layers(dcn_input)

        # Cross Part
        cross_output = self._cross_network(dcn_input)

        # Stack and Predict
        stack = torch.cat([cross_output, deep_output], dim=-1)
        output = self.predict_layer(stack)

        return output

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, item, rating = batch[0], batch[1], batch[2]

        contexts: Optional[Tensor] = None
        features: Optional[Tensor] = None

        current_idx = 3

        # If feature dimensions exist, the next element is features
        if self.feature_dims:
            features = batch[current_idx]
            current_idx += 1

        # If context dimensions exist, the next element is context
        if self.context_dims:
            contexts = batch[current_idx]

        prediction = self.forward(user, item, features, contexts)

        # Compute BCE loss
        bce_loss = self.bce_loss(prediction, rating)

        # Compute L2 regularization on embeddings and biases
        reg_params = self.get_reg_params(user, item, features, contexts)
        reg_loss = self.reg_weight * self.reg_loss(*reg_params)

        # Loss logging
        loss = bce_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(
        self,
        user: Tensor,
        item: Tensor,
        features: Optional[Tensor] = None,
        contexts: Optional[Tensor] = None,
    ) -> Tensor:
        """Forward pass of the DCN model.

        Args:
            user (Tensor): The tensor containing the user indexes.
            item (Tensor): The tensor containing the item indexes.
            features (Optional[Tensor]): The tensor containing the features of the interactions.
            contexts (Optional[Tensor]): The tensor containing the context of the interactions.

        Returns:
            Tensor: The prediction score for each triplet (user, item, context).
        """
        # Linear Part (First Order)
        linear_part = self.compute_first_order(user, item, features, contexts)

        # Interaction Part (Second Order)
        u_emb = self.user_embedding(user).unsqueeze(1)
        i_emb = self.item_embedding(item).unsqueeze(1)
        components = [u_emb, i_emb]

        # Add Feature Embeddings
        if features is not None and self.feature_dims:
            global_feat = features + self.feature_offsets
            f_emb = self.merged_feature_embedding(global_feat)
            components.append(f_emb)

        # Add Context Embeddings
        if contexts is not None and self.context_labels:
            global_ctx = contexts + self.context_offsets
            c_emb = self.merged_context_embedding(global_ctx)
            components.append(c_emb)

        # Concatenate on Field dimension
        dcn_input_block = torch.cat(components, dim=1)

        # Flatten the input
        batch_size = dcn_input_block.shape[0]
        dcn_input = dcn_input_block.view(batch_size, -1)

        # Compute Network
        output = self._compute_logits(dcn_input)

        return linear_part + output.squeeze(-1)

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        contexts: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the DCN model.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            contexts (Optional[Tensor]): The batch of contexts.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        batch_size = user_indices.size(0)

        # Retrieve Fixed Embeddings (User + Contexts)
        # [batch, embedding_size]
        user_emb = self.user_embedding(user_indices)
        ctx_emb_tensor = self._get_context_embeddings(contexts)

        # Helper function to process item block
        def process_block(
            items_emb_block: Tensor, feat_emb_block_tensor: Tensor
        ) -> Tensor:
            n_items = items_emb_block.shape[-2]

            # Expand User & Contexts to match items dimension
            u_exp = user_emb.unsqueeze(1).expand(-1, n_items, -1)

            # Handle Item & Feature Embedding expansion if necessary
            if items_emb_block.dim() == 2:
                # Case: Full prediction
                n_items = items_emb_block.shape[0]

                # User: [Batch, 1, 1, Emb] -> Expand su Items
                u_exp = user_emb.unsqueeze(1).unsqueeze(2).expand(-1, n_items, -1, -1)

                # Item: [1, Block, 1, Emb] -> Expand su Batch
                i_exp = (
                    items_emb_block.unsqueeze(0)
                    .unsqueeze(2)
                    .expand(batch_size, -1, -1, -1)
                )

                # [Block, N_Feat, Emb] -> [Batch, Block, N_Feat, Emb]
                f_exp = None
                if feat_emb_block_tensor is not None:
                    f_exp = feat_emb_block_tensor.unsqueeze(0).expand(
                        batch_size, -1, -1, -1
                    )

                # [Batch, N_Ctx, Emb] -> [Batch, Block, N_Ctx, Emb]
                c_exp = None
                if ctx_emb_tensor is not None:
                    c_exp = ctx_emb_tensor.unsqueeze(1).expand(-1, n_items, -1, -1)

            else:
                # Case: Sampled prediction
                n_items = items_emb_block.shape[1]

                # User: [Batch, Seq, 1, Emb]
                u_exp = user_emb.unsqueeze(1).unsqueeze(2).expand(-1, n_items, -1, -1)

                # Item: [Batch, Seq, 1, Emb]
                i_exp = items_emb_block.unsqueeze(2)

                f_exp = feat_emb_block_tensor

                # [Batch, 1, N_Ctx, Emb] -> [Batch, Seq, N_Ctx, Emb]
                c_exp = None
                if ctx_emb_tensor is not None:
                    c_exp = ctx_emb_tensor.unsqueeze(1).expand(-1, n_items, -1, -1)

            stack_list = [u_exp, i_exp]
            if f_exp is not None:
                stack_list.append(f_exp)
            if c_exp is not None:
                stack_list.append(c_exp)

            # Concatenate all fields on dim=2
            dcn_input_block = torch.cat(stack_list, dim=2)

            # Flatten: [Batch * N_Items, Total_Fields * Emb]
            dcn_input_flat = dcn_input_block.view(-1, self.input_dim)

            logits = self._compute_logits(dcn_input_flat)
            return logits.view(batch_size, n_items)

        if item_indices is None:
            # Case 'full': iterate through all items in memory-safe blocks
            preds_list = []
            for start in range(0, self.n_items, self.block_size):
                end = min(start + self.block_size, self.n_items)

                # Get item embeddings for the block (shared for all users)
                items_block = torch.arange(start, end, device=self.device)
                item_emb_block = self.item_embedding(
                    items_block
                )  # [block_size, embedding_size]

                # Get feature embeddings for the block
                feat_emb_block_list = self._get_feature_embeddings(items_block)

                # Process the block
                preds_list.append(process_block(item_emb_block, feat_emb_block_list))

            return torch.cat(preds_list, dim=1)

        # Case 'sampled': process given item_indices
        item_emb = self.item_embedding(
            item_indices
        )  # [batch_size, seq_len, embedding_size]

        # Get feature embeddings for the specific items
        feat_emb_tensor = self._get_feature_embeddings(item_indices)

        return process_block(item_emb, feat_emb_tensor)

forward(user, item, features=None, contexts=None)

Forward pass of the DCN model.

Parameters:

Name Type Description Default
user Tensor

The tensor containing the user indexes.

required
item Tensor

The tensor containing the item indexes.

required
features Optional[Tensor]

The tensor containing the features of the interactions.

None
contexts Optional[Tensor]

The tensor containing the context of the interactions.

None

Returns:

Name Type Description
Tensor Tensor

The prediction score for each triplet (user, item, context).

Source code in warprec/recommenders/context_aware_recommender/dcn.py
def forward(
    self,
    user: Tensor,
    item: Tensor,
    features: Optional[Tensor] = None,
    contexts: Optional[Tensor] = None,
) -> Tensor:
    """Forward pass of the DCN model.

    Args:
        user (Tensor): The tensor containing the user indexes.
        item (Tensor): The tensor containing the item indexes.
        features (Optional[Tensor]): The tensor containing the features of the interactions.
        contexts (Optional[Tensor]): The tensor containing the context of the interactions.

    Returns:
        Tensor: The prediction score for each triplet (user, item, context).
    """
    # Linear Part (First Order)
    linear_part = self.compute_first_order(user, item, features, contexts)

    # Interaction Part (Second Order)
    u_emb = self.user_embedding(user).unsqueeze(1)
    i_emb = self.item_embedding(item).unsqueeze(1)
    components = [u_emb, i_emb]

    # Add Feature Embeddings
    if features is not None and self.feature_dims:
        global_feat = features + self.feature_offsets
        f_emb = self.merged_feature_embedding(global_feat)
        components.append(f_emb)

    # Add Context Embeddings
    if contexts is not None and self.context_labels:
        global_ctx = contexts + self.context_offsets
        c_emb = self.merged_context_embedding(global_ctx)
        components.append(c_emb)

    # Concatenate on Field dimension
    dcn_input_block = torch.cat(components, dim=1)

    # Flatten the input
    batch_size = dcn_input_block.shape[0]
    dcn_input = dcn_input_block.view(batch_size, -1)

    # Compute Network
    output = self._compute_logits(dcn_input)

    return linear_part + output.squeeze(-1)

predict(user_indices, *args, item_indices=None, contexts=None, **kwargs)

Prediction using the DCN model.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
contexts Optional[Tensor]

The batch of contexts.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/context_aware_recommender/dcn.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    contexts: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the DCN model.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        contexts (Optional[Tensor]): The batch of contexts.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    batch_size = user_indices.size(0)

    # Retrieve Fixed Embeddings (User + Contexts)
    # [batch, embedding_size]
    user_emb = self.user_embedding(user_indices)
    ctx_emb_tensor = self._get_context_embeddings(contexts)

    # Helper function to process item block
    def process_block(
        items_emb_block: Tensor, feat_emb_block_tensor: Tensor
    ) -> Tensor:
        n_items = items_emb_block.shape[-2]

        # Expand User & Contexts to match items dimension
        u_exp = user_emb.unsqueeze(1).expand(-1, n_items, -1)

        # Handle Item & Feature Embedding expansion if necessary
        if items_emb_block.dim() == 2:
            # Case: Full prediction
            n_items = items_emb_block.shape[0]

            # User: [Batch, 1, 1, Emb] -> Expand su Items
            u_exp = user_emb.unsqueeze(1).unsqueeze(2).expand(-1, n_items, -1, -1)

            # Item: [1, Block, 1, Emb] -> Expand su Batch
            i_exp = (
                items_emb_block.unsqueeze(0)
                .unsqueeze(2)
                .expand(batch_size, -1, -1, -1)
            )

            # [Block, N_Feat, Emb] -> [Batch, Block, N_Feat, Emb]
            f_exp = None
            if feat_emb_block_tensor is not None:
                f_exp = feat_emb_block_tensor.unsqueeze(0).expand(
                    batch_size, -1, -1, -1
                )

            # [Batch, N_Ctx, Emb] -> [Batch, Block, N_Ctx, Emb]
            c_exp = None
            if ctx_emb_tensor is not None:
                c_exp = ctx_emb_tensor.unsqueeze(1).expand(-1, n_items, -1, -1)

        else:
            # Case: Sampled prediction
            n_items = items_emb_block.shape[1]

            # User: [Batch, Seq, 1, Emb]
            u_exp = user_emb.unsqueeze(1).unsqueeze(2).expand(-1, n_items, -1, -1)

            # Item: [Batch, Seq, 1, Emb]
            i_exp = items_emb_block.unsqueeze(2)

            f_exp = feat_emb_block_tensor

            # [Batch, 1, N_Ctx, Emb] -> [Batch, Seq, N_Ctx, Emb]
            c_exp = None
            if ctx_emb_tensor is not None:
                c_exp = ctx_emb_tensor.unsqueeze(1).expand(-1, n_items, -1, -1)

        stack_list = [u_exp, i_exp]
        if f_exp is not None:
            stack_list.append(f_exp)
        if c_exp is not None:
            stack_list.append(c_exp)

        # Concatenate all fields on dim=2
        dcn_input_block = torch.cat(stack_list, dim=2)

        # Flatten: [Batch * N_Items, Total_Fields * Emb]
        dcn_input_flat = dcn_input_block.view(-1, self.input_dim)

        logits = self._compute_logits(dcn_input_flat)
        return logits.view(batch_size, n_items)

    if item_indices is None:
        # Case 'full': iterate through all items in memory-safe blocks
        preds_list = []
        for start in range(0, self.n_items, self.block_size):
            end = min(start + self.block_size, self.n_items)

            # Get item embeddings for the block (shared for all users)
            items_block = torch.arange(start, end, device=self.device)
            item_emb_block = self.item_embedding(
                items_block
            )  # [block_size, embedding_size]

            # Get feature embeddings for the block
            feat_emb_block_list = self._get_feature_embeddings(items_block)

            # Process the block
            preds_list.append(process_block(item_emb_block, feat_emb_block_list))

        return torch.cat(preds_list, dim=1)

    # Case 'sampled': process given item_indices
    item_emb = self.item_embedding(
        item_indices
    )  # [batch_size, seq_len, embedding_size]

    # Get feature embeddings for the specific items
    feat_emb_tensor = self._get_feature_embeddings(item_indices)

    return process_block(item_emb, feat_emb_tensor)

warprec.recommenders.context_aware_recommender.dcnv2.DCNv2

Bases: ContextRecommenderUtils, IterativeRecommender

Implementation of Deep & Cross Network V2 (DCNv2) from Dcn v2: Improved deep & cross network and practical lessons for web-scale, WWW 2021.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
*args Any

Variable length argument list.

()
interactions Optional[Interactions]

The training interactions.

None
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The size of the latent vectors.

mlp_hidden_size List[int]

The MLP hidden layer size list.

cross_layer_num int

The number of cross layers.

dropout float

The dropout probability.

model_structure str

The model structure to use.

use_mixed bool

Wether or not use the MoE.

expert_num int

The number of expert to use in MoE.

low_rank int

The low rank dimension.

reg_weight float

The L2 regularization weight.

weight_decay float

The value of weight decay used in the optimizer.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

neg_samples int

Number of negative samples for training.

Raises:

Type Description
ValueError

If model_structure parameter is not supported.

Source code in warprec/recommenders/context_aware_recommender/dcnv2.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
@model_registry.register(name="DCNv2")
class DCNv2(ContextRecommenderUtils, IterativeRecommender):
    """Implementation of Deep & Cross Network V2 (DCNv2) from
        Dcn v2: Improved deep & cross network and practical lessons for web-scale, WWW 2021.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        *args (Any): Variable length argument list.
        interactions (Optional[Interactions]): The training interactions.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The size of the latent vectors.
        mlp_hidden_size (List[int]): The MLP hidden layer size list.
        cross_layer_num (int): The number of cross layers.
        dropout (float): The dropout probability.
        model_structure (str): The model structure to use.
        use_mixed (bool): Wether or not use the MoE.
        expert_num (int): The number of expert to use in MoE.
        low_rank (int): The low rank dimension.
        reg_weight (float): The L2 regularization weight.
        weight_decay (float): The value of weight decay used in the optimizer.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
        neg_samples (int): Number of negative samples for training.

    Raises:
        ValueError: If model_structure parameter is not supported.
    """

    DATALOADER_TYPE = DataLoaderType.ITEM_RATING_LOADER_WITH_CONTEXT

    embedding_size: int
    mlp_hidden_size: List[int]
    cross_layer_num: int
    dropout: float
    model_structure: str
    use_mixed: bool
    expert_num: int
    low_rank: int
    reg_weight: float
    weight_decay: float
    batch_size: int
    epochs: int
    learning_rate: float
    neg_samples: int

    def __init__(
        self,
        params: dict,
        info: dict,
        *args: Any,
        interactions: Optional[Interactions] = None,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(
            params, info, *args, interactions=interactions, seed=seed, **kwargs
        )

        self.block_size = kwargs.get("block_size", 50)
        self.mlp_hidden_size = list(self.mlp_hidden_size)

        # Input Dimensions
        self.num_fields = 2 + len(self.feature_labels) + len(self.context_labels)
        self.input_dim = self.num_fields * self.embedding_size

        if self.use_mixed:
            # Mixed Cross Network (MoE + Low Rank)
            # U: [Layers, Experts, Input, Rank]
            self.cross_u = nn.Parameter(
                torch.randn(
                    self.cross_layer_num, self.expert_num, self.input_dim, self.low_rank
                )
            )
            # V: [Layers, Experts, Input, Rank]
            self.cross_v = nn.Parameter(
                torch.randn(
                    self.cross_layer_num, self.expert_num, self.input_dim, self.low_rank
                )
            )
            # C: [Layers, Experts, Rank, Rank]
            self.cross_c = nn.Parameter(
                torch.randn(
                    self.cross_layer_num, self.expert_num, self.low_rank, self.low_rank
                )
            )

            # Gating: [Layers, Input, Experts] -> Linear transformation per layer
            self.gating = nn.ModuleList(
                [
                    nn.Linear(self.input_dim, self.expert_num)
                    for _ in range(self.cross_layer_num)
                ]
            )
        else:
            # Standard DCNv2 Matrix Cross Network
            # W: [Layers, Input, Input] -> Full Matrix (Expensive for high dim)
            self.cross_w = nn.Parameter(
                torch.randn(self.cross_layer_num, self.input_dim, self.input_dim)
            )

        # Bias: [Layers, Input]
        self.cross_b = nn.Parameter(torch.zeros(self.cross_layer_num, self.input_dim))

        # Deep Network (MLP)
        self.mlp_layers = MLP([self.input_dim] + self.mlp_hidden_size, self.dropout)

        # Prediction Layer
        if self.model_structure == "parallel":
            final_dim = self.input_dim + self.mlp_hidden_size[-1]
        elif self.model_structure == "stacked":
            final_dim = self.mlp_hidden_size[-1]
        else:
            raise ValueError(
                f"Model structure {self.model_structure} not supported. "
                "Model structure supported are 'parallel' and 'stacked'."
            )

        self.predict_layer = nn.Linear(final_dim, 1)

        # Losses
        self.bce_loss = nn.BCEWithLogitsLoss()
        self.reg_loss = EmbLoss()

        # Initialize weights
        self.apply(self._init_weights)

    def _cross_network_matrix(self, x_0: Tensor) -> Tensor:
        """Standard DCNv2 (Matrix).

        Equation: x_{l+1} = x_0 * (W_l * x_l + b_l) + x_l
        """
        x_l = x_0
        for i in range(self.cross_layer_num):
            # W * x_l -> [batch_size, input_dim]
            # Using linear transformation logic: x @ W.T

            # [batch_size, input_dim] @ [input_dim, input_dim] -> [batch_size, input_dim]
            wl_xl = torch.matmul(x_l, self.cross_w[i])

            # Add bias
            wl_xl = wl_xl + self.cross_b[i]

            # Element-wise product with x_0 and add residual
            x_l = x_0 * wl_xl + x_l

        return x_l

    def _cross_network_mixed(self, x_0: Tensor) -> Tensor:
        """Mixed DCNv2 (MoE + Low Rank).

        Vectorized implementation using einsum to avoid loops over experts.
        """
        x_l = x_0  # [batch_size, input_dim]

        for i in range(self.cross_layer_num):
            # Gating
            # [batch_size, expert_num]
            gating_score = self.gating[i](x_l)
            gating_prob = torch.softmax(gating_score, dim=1)

            # Expert Computation (Low Rank)
            # We want to compute: U * tanh(C * tanh(V^T * x))
            xl_v = torch.einsum("bi, eir -> ber", x_l, self.cross_v[i])
            xl_v = torch.tanh(xl_v)  # [batch_size, expert_num, low_rank]

            # Mix in Low Rank (C * result)
            xl_c = torch.einsum("ber, err -> ber", xl_v, self.cross_c[i])
            xl_c = torch.tanh(xl_c)  # [batch_size, expert_num, low_rank]

            # Project back to High Rank (U * result)
            # [batch_size, expert_num, input_dim]
            expert_outputs = torch.einsum("ber, eir -> bei", xl_c, self.cross_u[i])

            # Add bias (broadcast over experts)
            expert_outputs = expert_outputs + self.cross_b[i].unsqueeze(0).unsqueeze(0)

            # Element-wise with x_0: x_0 * (Expert_Out)
            expert_outputs = x_0.unsqueeze(1) * expert_outputs

            # Weighted Sum of Experts (MoE)
            # Gating: [batch_size, expert_num] -> [batch_size, expert_num, 1]
            # Output: [batch_size, input_dim]
            moe_output = torch.sum(expert_outputs * gating_prob.unsqueeze(-1), dim=1)

            # Residual
            x_l = moe_output + x_l

        return x_l

    def _compute_logits(self, dcn_input: Tensor) -> Tensor:
        """Core logic shared between forward and predict."""

        # Cross Network
        if self.use_mixed:
            cross_output = self._cross_network_mixed(dcn_input)
        else:
            cross_output = self._cross_network_matrix(dcn_input)

        # Deep Network
        if self.model_structure == "parallel":
            deep_output = self.mlp_layers(dcn_input)
            stack = torch.cat([cross_output, deep_output], dim=-1)
        else:  # stacked
            # Deep network takes cross output as input
            deep_output = self.mlp_layers(cross_output)
            stack = deep_output

        # 3. Prediction
        output = self.predict_layer(stack)
        return output

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, item, rating = batch[0], batch[1], batch[2]

        contexts: Optional[Tensor] = None
        features: Optional[Tensor] = None

        current_idx = 3

        # If feature dimensions exist, the next element is features
        if self.feature_dims:
            features = batch[current_idx]
            current_idx += 1

        # If context dimensions exist, the next element is context
        if self.context_dims:
            contexts = batch[current_idx]

        prediction = self.forward(user, item, features, contexts)

        # Compute BCE loss
        bce_loss = self.bce_loss(prediction, rating)

        # Compute L2 regularization on embeddings and biases
        reg_params = self.get_reg_params(user, item, features, contexts)
        reg_loss = self.reg_weight * self.reg_loss(*reg_params)

        # Loss logging
        loss = bce_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(
        self,
        user: Tensor,
        item: Tensor,
        features: Optional[Tensor] = None,
        contexts: Optional[Tensor] = None,
    ) -> Tensor:
        # Linear Part (First Order)
        linear_part = self.compute_first_order(user, item, features, contexts)

        # Interaction Part (Second Order)
        u_emb = self.user_embedding(user).unsqueeze(1)
        i_emb = self.item_embedding(item).unsqueeze(1)
        components = [u_emb, i_emb]

        # Add Feature Embeddings
        if features is not None and self.feature_dims:
            global_feat = features + self.feature_offsets
            f_emb = self.merged_feature_embedding(global_feat)
            components.append(f_emb)

        # Add Context Embeddings
        if contexts is not None and self.context_labels:
            global_ctx = contexts + self.context_offsets
            c_emb = self.merged_context_embedding(global_ctx)
            components.append(c_emb)

        # Concatenate on Field dimension
        dcn_input_block = torch.cat(components, dim=1)

        # Flatten the input
        batch_size = dcn_input_block.shape[0]
        dcn_input = dcn_input_block.view(batch_size, -1)

        # Compute Network
        output = self._compute_logits(dcn_input)

        return linear_part + output.squeeze(-1)

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        contexts: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the DCNv2 model.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            contexts (Optional[Tensor]): The batch of contexts.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        batch_size = user_indices.size(0)

        # Retrieve Fixed Embeddings (User + Contexts)
        # [batch, embedding_size]
        user_emb = self.user_embedding(user_indices)
        ctx_emb_tensor = self._get_context_embeddings(contexts)

        # Helper function to process item block
        def process_block(
            items_emb_block: Tensor, feat_emb_block_tensor: Tensor
        ) -> Tensor:
            n_items = items_emb_block.shape[-2]

            # Expand User & Contexts to match items dimension
            u_exp = user_emb.unsqueeze(1).expand(-1, n_items, -1)

            # Handle Item & Feature Embedding expansion if necessary
            if items_emb_block.dim() == 2:
                # Case: Full prediction
                n_items = items_emb_block.shape[0]

                # User: [Batch, 1, 1, Emb] -> Expand su Items
                u_exp = user_emb.unsqueeze(1).unsqueeze(2).expand(-1, n_items, -1, -1)

                # Item: [1, Block, 1, Emb] -> Expand su Batch
                i_exp = (
                    items_emb_block.unsqueeze(0)
                    .unsqueeze(2)
                    .expand(batch_size, -1, -1, -1)
                )

                # [Block, N_Feat, Emb] -> [Batch, Block, N_Feat, Emb]
                f_exp = None
                if feat_emb_block_tensor is not None:
                    f_exp = feat_emb_block_tensor.unsqueeze(0).expand(
                        batch_size, -1, -1, -1
                    )

                # [Batch, N_Ctx, Emb] -> [Batch, Block, N_Ctx, Emb]
                c_exp = None
                if ctx_emb_tensor is not None:
                    c_exp = ctx_emb_tensor.unsqueeze(1).expand(-1, n_items, -1, -1)

            else:
                # Case: Sampled prediction
                n_items = items_emb_block.shape[1]

                # User: [Batch, Seq, 1, Emb]
                u_exp = user_emb.unsqueeze(1).unsqueeze(2).expand(-1, n_items, -1, -1)

                # Item: [Batch, Seq, 1, Emb]
                i_exp = items_emb_block.unsqueeze(2)

                f_exp = feat_emb_block_tensor

                # [Batch, 1, N_Ctx, Emb] -> [Batch, Seq, N_Ctx, Emb]
                c_exp = None
                if ctx_emb_tensor is not None:
                    c_exp = ctx_emb_tensor.unsqueeze(1).expand(-1, n_items, -1, -1)

            stack_list = [u_exp, i_exp]
            if f_exp is not None:
                stack_list.append(f_exp)
            if c_exp is not None:
                stack_list.append(c_exp)

            # Concatenate all fields on dim=2
            dcn_input_block = torch.cat(stack_list, dim=2)

            # Flatten: [Batch * N_Items, Total_Fields * Emb]
            dcn_input_flat = dcn_input_block.view(-1, self.input_dim)

            logits = self._compute_logits(dcn_input_flat)
            return logits.view(batch_size, n_items)

        if item_indices is None:
            # Case 'full': iterate through all items in memory-safe blocks
            preds_list = []
            for start in range(0, self.n_items, self.block_size):
                end = min(start + self.block_size, self.n_items)

                items_block = torch.arange(start, end, device=self.device)
                item_emb_block = self.item_embedding(items_block)

                # Get feature embeddings for the block
                feat_emb_block_tensor = self._get_feature_embeddings(items_block)

                preds_list.append(process_block(item_emb_block, feat_emb_block_tensor))
            return torch.cat(preds_list, dim=1)
        # Case 'sampled': process given item_indices
        item_emb = self.item_embedding(item_indices)

        # Get feature embeddings for the specific items
        feat_emb_tensor = self._get_feature_embeddings(item_indices)

        return process_block(item_emb, feat_emb_tensor)

predict(user_indices, *args, item_indices=None, contexts=None, **kwargs)

Prediction using the DCNv2 model.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
contexts Optional[Tensor]

The batch of contexts.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/context_aware_recommender/dcnv2.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    contexts: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the DCNv2 model.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        contexts (Optional[Tensor]): The batch of contexts.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    batch_size = user_indices.size(0)

    # Retrieve Fixed Embeddings (User + Contexts)
    # [batch, embedding_size]
    user_emb = self.user_embedding(user_indices)
    ctx_emb_tensor = self._get_context_embeddings(contexts)

    # Helper function to process item block
    def process_block(
        items_emb_block: Tensor, feat_emb_block_tensor: Tensor
    ) -> Tensor:
        n_items = items_emb_block.shape[-2]

        # Expand User & Contexts to match items dimension
        u_exp = user_emb.unsqueeze(1).expand(-1, n_items, -1)

        # Handle Item & Feature Embedding expansion if necessary
        if items_emb_block.dim() == 2:
            # Case: Full prediction
            n_items = items_emb_block.shape[0]

            # User: [Batch, 1, 1, Emb] -> Expand su Items
            u_exp = user_emb.unsqueeze(1).unsqueeze(2).expand(-1, n_items, -1, -1)

            # Item: [1, Block, 1, Emb] -> Expand su Batch
            i_exp = (
                items_emb_block.unsqueeze(0)
                .unsqueeze(2)
                .expand(batch_size, -1, -1, -1)
            )

            # [Block, N_Feat, Emb] -> [Batch, Block, N_Feat, Emb]
            f_exp = None
            if feat_emb_block_tensor is not None:
                f_exp = feat_emb_block_tensor.unsqueeze(0).expand(
                    batch_size, -1, -1, -1
                )

            # [Batch, N_Ctx, Emb] -> [Batch, Block, N_Ctx, Emb]
            c_exp = None
            if ctx_emb_tensor is not None:
                c_exp = ctx_emb_tensor.unsqueeze(1).expand(-1, n_items, -1, -1)

        else:
            # Case: Sampled prediction
            n_items = items_emb_block.shape[1]

            # User: [Batch, Seq, 1, Emb]
            u_exp = user_emb.unsqueeze(1).unsqueeze(2).expand(-1, n_items, -1, -1)

            # Item: [Batch, Seq, 1, Emb]
            i_exp = items_emb_block.unsqueeze(2)

            f_exp = feat_emb_block_tensor

            # [Batch, 1, N_Ctx, Emb] -> [Batch, Seq, N_Ctx, Emb]
            c_exp = None
            if ctx_emb_tensor is not None:
                c_exp = ctx_emb_tensor.unsqueeze(1).expand(-1, n_items, -1, -1)

        stack_list = [u_exp, i_exp]
        if f_exp is not None:
            stack_list.append(f_exp)
        if c_exp is not None:
            stack_list.append(c_exp)

        # Concatenate all fields on dim=2
        dcn_input_block = torch.cat(stack_list, dim=2)

        # Flatten: [Batch * N_Items, Total_Fields * Emb]
        dcn_input_flat = dcn_input_block.view(-1, self.input_dim)

        logits = self._compute_logits(dcn_input_flat)
        return logits.view(batch_size, n_items)

    if item_indices is None:
        # Case 'full': iterate through all items in memory-safe blocks
        preds_list = []
        for start in range(0, self.n_items, self.block_size):
            end = min(start + self.block_size, self.n_items)

            items_block = torch.arange(start, end, device=self.device)
            item_emb_block = self.item_embedding(items_block)

            # Get feature embeddings for the block
            feat_emb_block_tensor = self._get_feature_embeddings(items_block)

            preds_list.append(process_block(item_emb_block, feat_emb_block_tensor))
        return torch.cat(preds_list, dim=1)
    # Case 'sampled': process given item_indices
    item_emb = self.item_embedding(item_indices)

    # Get feature embeddings for the specific items
    feat_emb_tensor = self._get_feature_embeddings(item_indices)

    return process_block(item_emb, feat_emb_tensor)

warprec.recommenders.context_aware_recommender.deepfm.DeepFM

Bases: ContextRecommenderUtils, IterativeRecommender

Implementation of DeepFM algorithm from DeepFM: A Factorization-Machine based Neural Network for CTR Prediction, IJCAI 2017.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
*args Any

Variable length argument list.

()
interactions Optional[Interactions]

The training interactions.

None
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The size of the latent vectors.

mlp_hidden_size List[int]

The MLP hidden layer size list.

dropout float

The dropout probability.

reg_weight float

The L2 regularization weight.

weight_decay float

The value of weight decay used in the optimizer.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

neg_samples int

Number of negative samples for training.

Source code in warprec/recommenders/context_aware_recommender/deepfm.py
@model_registry.register(name="DeepFM")
class DeepFM(ContextRecommenderUtils, IterativeRecommender):
    """Implementation of DeepFM algorithm from
        DeepFM: A Factorization-Machine based Neural Network for CTR Prediction, IJCAI 2017.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        *args (Any): Variable length argument list.
        interactions (Optional[Interactions]): The training interactions.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The size of the latent vectors.
        mlp_hidden_size (List[int]): The MLP hidden layer size list.
        dropout (float): The dropout probability.
        reg_weight (float): The L2 regularization weight.
        weight_decay (float): The value of weight decay used in the optimizer.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
        neg_samples (int): Number of negative samples for training.
    """

    DATALOADER_TYPE = DataLoaderType.ITEM_RATING_LOADER_WITH_CONTEXT

    embedding_size: int
    mlp_hidden_size: List[int]
    dropout: float
    reg_weight: float
    weight_decay: float
    batch_size: int
    epochs: int
    learning_rate: float
    neg_samples: int

    def __init__(
        self,
        params: dict,
        info: dict,
        *args: Any,
        interactions: Optional[Interactions] = None,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(
            params, info, *args, interactions=interactions, seed=seed, **kwargs
        )

        # Check for optional value of block size
        self.block_size = kwargs.get("block_size", 50)

        # Ray Tune converts lists to tuples, convert back to list
        self.mlp_hidden_size = list(self.mlp_hidden_size)

        # FM Layer (Interaction Part - Second Order)
        self.fm = FactorizationMachine(reduce_sum=True)

        # Deep Part (DNN)
        # Calculate total number of fields: User + Item + Features + Contexts
        self.num_fields = 2 + len(self.feature_labels) + len(self.context_labels)

        # Input size for MLP is the concatenation of all embeddings
        input_dim = self.num_fields * self.embedding_size

        self.mlp_layers = MLP([input_dim] + self.mlp_hidden_size, self.dropout)

        # Final prediction layer for the Deep part
        self.deep_predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)

        # Losses
        self.bce_loss = nn.BCEWithLogitsLoss()
        self.reg_loss = EmbLoss()

        # Initialize weights
        self.apply(self._init_weights)

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, item, rating = batch[0], batch[1], batch[2]

        contexts: Optional[Tensor] = None
        features: Optional[Tensor] = None

        current_idx = 3

        # If feature dimensions exist, the next element is features
        if self.feature_dims:
            features = batch[current_idx]
            current_idx += 1

        # If context dimensions exist, the next element is context
        if self.context_dims:
            contexts = batch[current_idx]

        prediction = self.forward(user, item, features, contexts)

        # Compute BCE loss
        bce_loss = self.bce_loss(prediction, rating)

        # Compute L2 regularization on embeddings and biases
        reg_params = self.get_reg_params(user, item, features, contexts)
        reg_loss = self.reg_weight * self.reg_loss(*reg_params)

        # Loss logging
        loss = bce_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(
        self,
        user: Tensor,
        item: Tensor,
        features: Optional[Tensor] = None,
        contexts: Optional[Tensor] = None,
    ) -> Tensor:
        """Forward pass of the DeepFM model.

        Args:
            user (Tensor): The tensor containing the user indexes.
            item (Tensor): The tensor containing the item indexes.
            features (Optional[Tensor]): The tensor containing the features of the interactions.
            contexts (Optional[Tensor]): The tensor containing the context of the interactions.

        Returns:
            Tensor: The prediction score.
        """
        # Linear Part
        linear_part = self.compute_first_order(user, item, features, contexts)

        # Interaction Part (Second Order)
        u_emb = self.user_embedding(user).unsqueeze(1)
        i_emb = self.item_embedding(item).unsqueeze(1)
        components = [u_emb, i_emb]

        # Add Feature Embeddings
        if features is not None and self.feature_dims:
            global_feat = features + self.feature_offsets
            f_emb = self.merged_feature_embedding(global_feat)
            components.append(f_emb)

        # Add Context Embeddings
        if contexts is not None and self.context_labels:
            global_ctx = contexts + self.context_offsets
            c_emb = self.merged_context_embedding(global_ctx)
            components.append(c_emb)

        stacked_embeddings = torch.cat(components, dim=1)

        # FM Interaction
        fm_part = self.fm(stacked_embeddings).squeeze(-1)

        # Deep Interaction
        batch_size = stacked_embeddings.shape[0]
        deep_input = stacked_embeddings.view(batch_size, -1)
        deep_output = self.mlp_layers(deep_input)
        deep_part = self.deep_predict_layer(deep_output).squeeze(-1)

        return linear_part + fm_part + deep_part

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        contexts: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the DeepFM model.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            contexts (Optional[Tensor]): The batch of contexts.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        batch_size = user_indices.size(0)

        # Linear Fixed
        fixed_linear = self.global_bias + self.user_bias(user_indices).squeeze(-1)

        # FM Fixed Accumulators
        sum_v_fixed = self.user_embedding(user_indices)
        sum_sq_v_fixed = sum_v_fixed.pow(2)

        # Deep Fixed Parts
        user_emb = self.user_embedding(user_indices)

        # Contexts
        ctx_emb_tensor = self._get_context_embeddings(contexts)

        # Process Contexts
        if contexts is not None and self.context_dims:
            # Linear
            global_ctx = contexts + self.context_offsets
            ctx_bias = self.merged_context_bias(global_ctx).sum(dim=1).squeeze(-1)
            fixed_linear += ctx_bias

            # FM
            sum_v_fixed += ctx_emb_tensor.sum(dim=1)
            sum_sq_v_fixed += ctx_emb_tensor.pow(2).sum(dim=1)

        if item_indices is None:
            # Case 'full'
            preds_list = []

            for start in range(0, self.n_items, self.block_size):
                end = min(start + self.block_size, self.n_items)
                current_block_size = end - start

                items_block = torch.arange(start, end, device=user_indices.device)

                # Item Embeddings and Bias
                item_emb = self.item_embedding(items_block)
                item_b = self.item_bias(items_block).squeeze(-1)

                # Feature Embeddings and Bias
                # feat_emb_tensor: [Block, Num_Feat, Emb]
                feat_emb_tensor = self._get_feature_embeddings(items_block)
                feat_b = self._get_feature_bias(items_block)

                # Linear Part
                linear_pred = (
                    fixed_linear.unsqueeze(1)
                    + item_b.unsqueeze(0)
                    + feat_b.unsqueeze(0)
                )

                # FM Part
                # Aggregate Item + Features
                if feat_emb_tensor is not None:
                    item_feat_sum = item_emb + feat_emb_tensor.sum(dim=1)
                    item_feat_sq_sum = item_emb.pow(2) + feat_emb_tensor.pow(2).sum(
                        dim=1
                    )
                else:
                    item_feat_sum = item_emb
                    item_feat_sq_sum = item_emb.pow(2)

                sum_v_total = sum_v_fixed.unsqueeze(1) + item_feat_sum.unsqueeze(0)
                sum_v_total_sq = sum_v_total.pow(2)

                sum_sq_total = sum_sq_v_fixed.unsqueeze(1) + item_feat_sq_sum.unsqueeze(
                    0
                )

                fm_pred = 0.5 * (sum_v_total_sq - sum_sq_total).sum(dim=2)

                # Deep Part
                # Expand User: [Batch, 1, 1, Emb] -> [Batch, Block, 1, Emb]
                u_exp = (
                    user_emb.unsqueeze(1)
                    .unsqueeze(2)
                    .expand(-1, current_block_size, -1, -1)
                )

                # Expand Item: [1, Block, 1, Emb] -> [Batch, Block, 1, Emb]
                i_exp = (
                    item_emb.unsqueeze(0).unsqueeze(2).expand(batch_size, -1, -1, -1)
                )

                stack_list = [u_exp, i_exp]

                # Expand Features: [1, Block, N_Feat, Emb] -> [Batch, Block, N_Feat, Emb]
                if feat_emb_tensor is not None:
                    f_exp = feat_emb_tensor.unsqueeze(0).expand(batch_size, -1, -1, -1)
                    stack_list.append(f_exp)

                # Expand Contexts: [Batch, 1, N_Ctx, Emb] -> [Batch, Block, N_Ctx, Emb]
                if ctx_emb_tensor is not None:
                    c_exp = ctx_emb_tensor.unsqueeze(1).expand(
                        -1, current_block_size, -1, -1
                    )
                    stack_list.append(c_exp)

                # Concatenate: [Batch, Block, Total_Fields, Emb]
                deep_input_block = torch.cat(stack_list, dim=2)

                deep_input_flat = deep_input_block.view(
                    -1, self.num_fields * self.embedding_size
                )

                deep_out = self.mlp_layers(deep_input_flat)
                deep_pred = self.deep_predict_layer(deep_out).view(
                    batch_size, current_block_size
                )

                preds_list.append(linear_pred + fm_pred + deep_pred)

            return torch.cat(preds_list, dim=1)
        # Case 'sampled'
        pad_seq = item_indices.size(1)

        item_emb = self.item_embedding(item_indices)
        item_b = self.item_bias(item_indices).squeeze(-1)

        feat_emb_tensor = self._get_feature_embeddings(item_indices)
        feat_b = self._get_feature_bias(item_indices)

        # Linear Part
        linear_pred = fixed_linear.unsqueeze(1) + item_b + feat_b

        # FM Part
        if feat_emb_tensor is not None:
            item_feat_sum = item_emb + feat_emb_tensor.sum(dim=2)
            item_feat_sq_sum = item_emb.pow(2) + feat_emb_tensor.pow(2).sum(dim=2)
        else:
            item_feat_sum = item_emb
            item_feat_sq_sum = item_emb.pow(2)

        sum_v_fixed_exp = sum_v_fixed.unsqueeze(1)
        sum_sq_v_fixed_exp = sum_sq_v_fixed.unsqueeze(1)

        sum_v_total_sq = (sum_v_fixed_exp + item_feat_sum).pow(2)
        sum_sq_total = sum_sq_v_fixed_exp + item_feat_sq_sum

        fm_pred = 0.5 * (sum_v_total_sq - sum_sq_total).sum(dim=2)

        # Deep Part
        # User: [Batch, Seq, 1, Emb]
        u_exp = user_emb.unsqueeze(1).unsqueeze(2).expand(-1, pad_seq, -1, -1)

        # Item: [Batch, Seq, 1, Emb]
        i_exp = item_emb.unsqueeze(2)

        stack_list = [u_exp, i_exp]

        # Features: [Batch, Seq, N_Feat, Emb] (Already correct)
        if feat_emb_tensor is not None:
            stack_list.append(feat_emb_tensor)

        # Contexts: [Batch, Seq, N_Ctx, Emb]
        if ctx_emb_tensor is not None:
            c_exp = ctx_emb_tensor.unsqueeze(1).expand(-1, pad_seq, -1, -1)
            stack_list.append(c_exp)

        deep_input_block = torch.cat(stack_list, dim=2)
        deep_input_flat = deep_input_block.view(
            -1, self.num_fields * self.embedding_size
        )

        deep_out = self.mlp_layers(deep_input_flat)
        deep_pred = self.deep_predict_layer(deep_out).view(batch_size, pad_seq)

        return linear_pred + fm_pred + deep_pred

forward(user, item, features=None, contexts=None)

Forward pass of the DeepFM model.

Parameters:

Name Type Description Default
user Tensor

The tensor containing the user indexes.

required
item Tensor

The tensor containing the item indexes.

required
features Optional[Tensor]

The tensor containing the features of the interactions.

None
contexts Optional[Tensor]

The tensor containing the context of the interactions.

None

Returns:

Name Type Description
Tensor Tensor

The prediction score.

Source code in warprec/recommenders/context_aware_recommender/deepfm.py
def forward(
    self,
    user: Tensor,
    item: Tensor,
    features: Optional[Tensor] = None,
    contexts: Optional[Tensor] = None,
) -> Tensor:
    """Forward pass of the DeepFM model.

    Args:
        user (Tensor): The tensor containing the user indexes.
        item (Tensor): The tensor containing the item indexes.
        features (Optional[Tensor]): The tensor containing the features of the interactions.
        contexts (Optional[Tensor]): The tensor containing the context of the interactions.

    Returns:
        Tensor: The prediction score.
    """
    # Linear Part
    linear_part = self.compute_first_order(user, item, features, contexts)

    # Interaction Part (Second Order)
    u_emb = self.user_embedding(user).unsqueeze(1)
    i_emb = self.item_embedding(item).unsqueeze(1)
    components = [u_emb, i_emb]

    # Add Feature Embeddings
    if features is not None and self.feature_dims:
        global_feat = features + self.feature_offsets
        f_emb = self.merged_feature_embedding(global_feat)
        components.append(f_emb)

    # Add Context Embeddings
    if contexts is not None and self.context_labels:
        global_ctx = contexts + self.context_offsets
        c_emb = self.merged_context_embedding(global_ctx)
        components.append(c_emb)

    stacked_embeddings = torch.cat(components, dim=1)

    # FM Interaction
    fm_part = self.fm(stacked_embeddings).squeeze(-1)

    # Deep Interaction
    batch_size = stacked_embeddings.shape[0]
    deep_input = stacked_embeddings.view(batch_size, -1)
    deep_output = self.mlp_layers(deep_input)
    deep_part = self.deep_predict_layer(deep_output).squeeze(-1)

    return linear_part + fm_part + deep_part

predict(user_indices, *args, item_indices=None, contexts=None, **kwargs)

Prediction using the DeepFM model.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
contexts Optional[Tensor]

The batch of contexts.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/context_aware_recommender/deepfm.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    contexts: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the DeepFM model.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        contexts (Optional[Tensor]): The batch of contexts.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    batch_size = user_indices.size(0)

    # Linear Fixed
    fixed_linear = self.global_bias + self.user_bias(user_indices).squeeze(-1)

    # FM Fixed Accumulators
    sum_v_fixed = self.user_embedding(user_indices)
    sum_sq_v_fixed = sum_v_fixed.pow(2)

    # Deep Fixed Parts
    user_emb = self.user_embedding(user_indices)

    # Contexts
    ctx_emb_tensor = self._get_context_embeddings(contexts)

    # Process Contexts
    if contexts is not None and self.context_dims:
        # Linear
        global_ctx = contexts + self.context_offsets
        ctx_bias = self.merged_context_bias(global_ctx).sum(dim=1).squeeze(-1)
        fixed_linear += ctx_bias

        # FM
        sum_v_fixed += ctx_emb_tensor.sum(dim=1)
        sum_sq_v_fixed += ctx_emb_tensor.pow(2).sum(dim=1)

    if item_indices is None:
        # Case 'full'
        preds_list = []

        for start in range(0, self.n_items, self.block_size):
            end = min(start + self.block_size, self.n_items)
            current_block_size = end - start

            items_block = torch.arange(start, end, device=user_indices.device)

            # Item Embeddings and Bias
            item_emb = self.item_embedding(items_block)
            item_b = self.item_bias(items_block).squeeze(-1)

            # Feature Embeddings and Bias
            # feat_emb_tensor: [Block, Num_Feat, Emb]
            feat_emb_tensor = self._get_feature_embeddings(items_block)
            feat_b = self._get_feature_bias(items_block)

            # Linear Part
            linear_pred = (
                fixed_linear.unsqueeze(1)
                + item_b.unsqueeze(0)
                + feat_b.unsqueeze(0)
            )

            # FM Part
            # Aggregate Item + Features
            if feat_emb_tensor is not None:
                item_feat_sum = item_emb + feat_emb_tensor.sum(dim=1)
                item_feat_sq_sum = item_emb.pow(2) + feat_emb_tensor.pow(2).sum(
                    dim=1
                )
            else:
                item_feat_sum = item_emb
                item_feat_sq_sum = item_emb.pow(2)

            sum_v_total = sum_v_fixed.unsqueeze(1) + item_feat_sum.unsqueeze(0)
            sum_v_total_sq = sum_v_total.pow(2)

            sum_sq_total = sum_sq_v_fixed.unsqueeze(1) + item_feat_sq_sum.unsqueeze(
                0
            )

            fm_pred = 0.5 * (sum_v_total_sq - sum_sq_total).sum(dim=2)

            # Deep Part
            # Expand User: [Batch, 1, 1, Emb] -> [Batch, Block, 1, Emb]
            u_exp = (
                user_emb.unsqueeze(1)
                .unsqueeze(2)
                .expand(-1, current_block_size, -1, -1)
            )

            # Expand Item: [1, Block, 1, Emb] -> [Batch, Block, 1, Emb]
            i_exp = (
                item_emb.unsqueeze(0).unsqueeze(2).expand(batch_size, -1, -1, -1)
            )

            stack_list = [u_exp, i_exp]

            # Expand Features: [1, Block, N_Feat, Emb] -> [Batch, Block, N_Feat, Emb]
            if feat_emb_tensor is not None:
                f_exp = feat_emb_tensor.unsqueeze(0).expand(batch_size, -1, -1, -1)
                stack_list.append(f_exp)

            # Expand Contexts: [Batch, 1, N_Ctx, Emb] -> [Batch, Block, N_Ctx, Emb]
            if ctx_emb_tensor is not None:
                c_exp = ctx_emb_tensor.unsqueeze(1).expand(
                    -1, current_block_size, -1, -1
                )
                stack_list.append(c_exp)

            # Concatenate: [Batch, Block, Total_Fields, Emb]
            deep_input_block = torch.cat(stack_list, dim=2)

            deep_input_flat = deep_input_block.view(
                -1, self.num_fields * self.embedding_size
            )

            deep_out = self.mlp_layers(deep_input_flat)
            deep_pred = self.deep_predict_layer(deep_out).view(
                batch_size, current_block_size
            )

            preds_list.append(linear_pred + fm_pred + deep_pred)

        return torch.cat(preds_list, dim=1)
    # Case 'sampled'
    pad_seq = item_indices.size(1)

    item_emb = self.item_embedding(item_indices)
    item_b = self.item_bias(item_indices).squeeze(-1)

    feat_emb_tensor = self._get_feature_embeddings(item_indices)
    feat_b = self._get_feature_bias(item_indices)

    # Linear Part
    linear_pred = fixed_linear.unsqueeze(1) + item_b + feat_b

    # FM Part
    if feat_emb_tensor is not None:
        item_feat_sum = item_emb + feat_emb_tensor.sum(dim=2)
        item_feat_sq_sum = item_emb.pow(2) + feat_emb_tensor.pow(2).sum(dim=2)
    else:
        item_feat_sum = item_emb
        item_feat_sq_sum = item_emb.pow(2)

    sum_v_fixed_exp = sum_v_fixed.unsqueeze(1)
    sum_sq_v_fixed_exp = sum_sq_v_fixed.unsqueeze(1)

    sum_v_total_sq = (sum_v_fixed_exp + item_feat_sum).pow(2)
    sum_sq_total = sum_sq_v_fixed_exp + item_feat_sq_sum

    fm_pred = 0.5 * (sum_v_total_sq - sum_sq_total).sum(dim=2)

    # Deep Part
    # User: [Batch, Seq, 1, Emb]
    u_exp = user_emb.unsqueeze(1).unsqueeze(2).expand(-1, pad_seq, -1, -1)

    # Item: [Batch, Seq, 1, Emb]
    i_exp = item_emb.unsqueeze(2)

    stack_list = [u_exp, i_exp]

    # Features: [Batch, Seq, N_Feat, Emb] (Already correct)
    if feat_emb_tensor is not None:
        stack_list.append(feat_emb_tensor)

    # Contexts: [Batch, Seq, N_Ctx, Emb]
    if ctx_emb_tensor is not None:
        c_exp = ctx_emb_tensor.unsqueeze(1).expand(-1, pad_seq, -1, -1)
        stack_list.append(c_exp)

    deep_input_block = torch.cat(stack_list, dim=2)
    deep_input_flat = deep_input_block.view(
        -1, self.num_fields * self.embedding_size
    )

    deep_out = self.mlp_layers(deep_input_flat)
    deep_pred = self.deep_predict_layer(deep_out).view(batch_size, pad_seq)

    return linear_pred + fm_pred + deep_pred

warprec.recommenders.context_aware_recommender.fm.FM

Bases: ContextRecommenderUtils, IterativeRecommender

Implementation of FM algorithm from Factorization Machines ICDM 2010.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
*args Any

Variable length argument list.

()
interactions Optional[Interactions]

The training interactions.

None
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The size of the latent vectors.

reg_weight float

The L2 regularization weight.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

neg_samples int

Number of negative samples for training.

Source code in warprec/recommenders/context_aware_recommender/fm.py
@model_registry.register(name="FM")
class FM(ContextRecommenderUtils, IterativeRecommender):
    """Implementation of FM algorithm from
        Factorization Machines ICDM 2010.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        *args (Any): Variable length argument list.
        interactions (Optional[Interactions]): The training interactions.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The size of the latent vectors.
        reg_weight (float): The L2 regularization weight.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
        neg_samples (int): Number of negative samples for training.
    """

    DATALOADER_TYPE = DataLoaderType.ITEM_RATING_LOADER_WITH_CONTEXT

    embedding_size: int
    reg_weight: float
    batch_size: int
    epochs: int
    learning_rate: float
    neg_samples: int

    def __init__(
        self,
        params: dict,
        info: dict,
        *args: Any,
        interactions: Optional[Interactions] = None,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(
            params, info, *args, interactions=interactions, seed=seed, **kwargs
        )

        # FM Layer (Interaction Part - Second Order)
        self.fm = FactorizationMachine(reduce_sum=True)

        # Losses
        self.bce_loss = nn.BCEWithLogitsLoss()
        self.reg_loss = EmbLoss()

        # Initialize weights
        self.apply(self._init_weights)

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, item, rating = batch[0], batch[1], batch[2]

        contexts: Optional[Tensor] = None
        features: Optional[Tensor] = None

        current_idx = 3

        # If feature dimensions exist, the next element is features
        if self.feature_dims:
            features = batch[current_idx]
            current_idx += 1

        # If context dimensions exist, the next element is context
        if self.context_dims:
            contexts = batch[current_idx]

        prediction = self.forward(user, item, features, contexts)

        # Compute BCE loss
        bce_loss = self.bce_loss(prediction, rating)

        # Compute L2 regularization on embeddings and biases
        reg_params = self.get_reg_params(user, item, features, contexts)
        reg_loss = self.reg_weight * self.reg_loss(*reg_params)

        # Loss logging
        loss = bce_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(
        self,
        user: Tensor,
        item: Tensor,
        features: Optional[Tensor] = None,
        contexts: Optional[Tensor] = None,
    ) -> Tensor:
        """Forward pass of the FM model.

        Args:
            user (Tensor): The tensor containing the user indexes.
            item (Tensor): The tensor containing the item indexes.
            features (Optional[Tensor]): The tensor containing the features of the interactions.
            contexts (Optional[Tensor]): The tensor containing the context of the interactions.

        Returns:
            Tensor: The prediction score for each triplet (user, item, context).
        """
        # Linear Part
        linear_part = self.compute_first_order(user, item, features, contexts)

        # Interaction Part (Second Order)
        u_emb = self.user_embedding(user).unsqueeze(1)
        i_emb = self.item_embedding(item).unsqueeze(1)
        components = [u_emb, i_emb]

        # Add Feature Embeddings
        if features is not None and self.feature_dims:
            global_feat = features + self.feature_offsets
            f_emb = self.merged_feature_embedding(global_feat)
            components.append(f_emb)

        # Add Context Embeddings
        if contexts is not None and self.context_labels:
            global_ctx = contexts + self.context_offsets
            c_emb = self.merged_context_embedding(global_ctx)
            components.append(c_emb)

        fm_input = torch.cat(components, dim=1)
        interaction_part = self.fm(fm_input).squeeze(-1)

        return linear_part + interaction_part

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        contexts: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the linear part and FM.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            contexts (Optional[Tensor]): The batch of contexts. Required to
                predict with CARS models.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        # Linear Fixed
        fixed_linear = self.global_bias + self.user_bias(user_indices).squeeze(-1)

        # FM Fixed Accumulators (Sum V and Sum V^2)
        sum_v_fixed = self.user_embedding(user_indices)
        sum_sq_v_fixed = sum_v_fixed.pow(2)

        # Process Contexts
        if contexts is not None and self.context_labels:
            # Linear Context
            global_ctx = contexts + self.context_offsets
            ctx_bias = self.merged_context_bias(global_ctx).sum(dim=1).squeeze(-1)
            fixed_linear += ctx_bias

            # FM Context
            ctx_emb = self.merged_context_embedding(global_ctx)
            sum_v_fixed += ctx_emb.sum(dim=1)
            sum_sq_v_fixed += ctx_emb.pow(2).sum(dim=1)

        # Determine target items
        if item_indices is None:
            # All items (excluding padding)
            target_items = torch.arange(self.n_items, device=fixed_linear.device)
        else:
            target_items = item_indices
            if target_items.dim() == 1:
                target_items = target_items.unsqueeze(
                    1
                )  # [batch_size, 1] for sampled case

        # Item Linear Bias
        item_linear_total = self.item_bias(target_items).squeeze(-1)

        # Item Embeddings
        item_emb = self.item_embedding(target_items)

        # Feature Handling
        feat_bias = self._get_feature_bias(target_items)
        feat_emb_tensor = self._get_feature_embeddings(target_items)

        # Update Linear
        item_linear_total += feat_bias

        # Update FM accumulators
        if feat_emb_tensor is not None:
            feat_sum = feat_emb_tensor.sum(dim=-2)
            feat_sq_sum = feat_emb_tensor.pow(2).sum(dim=-2)

            # Total Item Component
            item_component_sum = item_emb + feat_sum
            item_component_sq_sum = item_emb.pow(2) + feat_sq_sum
        else:
            item_component_sum = item_emb
            item_component_sq_sum = item_emb.pow(2)

        if item_indices is None:
            # Case 'full': [batch_size, n_items]

            final_linear = fixed_linear.unsqueeze(1) + item_linear_total.unsqueeze(0)

            # Prepare for broadcasting
            sum_v_fixed_exp = sum_v_fixed.unsqueeze(1)  # [B, 1, E]
            sum_sq_v_fixed_exp = sum_sq_v_fixed.unsqueeze(1)  # [B, 1, E]

            item_sum_exp = item_component_sum.unsqueeze(0)  # [1, I, E]
            item_sq_sum_exp = item_component_sq_sum.unsqueeze(0)  # [1, I, E]

            # FM Equation
            sum_all_sq = (sum_v_fixed_exp + item_sum_exp).pow(2)
            sum_sq_all = sum_sq_v_fixed_exp + item_sq_sum_exp

            interaction = 0.5 * (sum_all_sq - sum_sq_all).sum(dim=2)
        else:
            # Case 'sampled': [batch_size, 1]

            final_linear = fixed_linear.unsqueeze(1) + item_linear_total

            sum_v_fixed_exp = sum_v_fixed.unsqueeze(1)
            sum_sq_v_fixed_exp = sum_sq_v_fixed.unsqueeze(1)

            sum_all_sq = (sum_v_fixed_exp + item_component_sum).pow(2)
            sum_sq_all = sum_sq_v_fixed_exp + item_component_sq_sum

            interaction = 0.5 * (sum_all_sq - sum_sq_all).sum(dim=2)

        return final_linear + interaction

forward(user, item, features=None, contexts=None)

Forward pass of the FM model.

Parameters:

Name Type Description Default
user Tensor

The tensor containing the user indexes.

required
item Tensor

The tensor containing the item indexes.

required
features Optional[Tensor]

The tensor containing the features of the interactions.

None
contexts Optional[Tensor]

The tensor containing the context of the interactions.

None

Returns:

Name Type Description
Tensor Tensor

The prediction score for each triplet (user, item, context).

Source code in warprec/recommenders/context_aware_recommender/fm.py
def forward(
    self,
    user: Tensor,
    item: Tensor,
    features: Optional[Tensor] = None,
    contexts: Optional[Tensor] = None,
) -> Tensor:
    """Forward pass of the FM model.

    Args:
        user (Tensor): The tensor containing the user indexes.
        item (Tensor): The tensor containing the item indexes.
        features (Optional[Tensor]): The tensor containing the features of the interactions.
        contexts (Optional[Tensor]): The tensor containing the context of the interactions.

    Returns:
        Tensor: The prediction score for each triplet (user, item, context).
    """
    # Linear Part
    linear_part = self.compute_first_order(user, item, features, contexts)

    # Interaction Part (Second Order)
    u_emb = self.user_embedding(user).unsqueeze(1)
    i_emb = self.item_embedding(item).unsqueeze(1)
    components = [u_emb, i_emb]

    # Add Feature Embeddings
    if features is not None and self.feature_dims:
        global_feat = features + self.feature_offsets
        f_emb = self.merged_feature_embedding(global_feat)
        components.append(f_emb)

    # Add Context Embeddings
    if contexts is not None and self.context_labels:
        global_ctx = contexts + self.context_offsets
        c_emb = self.merged_context_embedding(global_ctx)
        components.append(c_emb)

    fm_input = torch.cat(components, dim=1)
    interaction_part = self.fm(fm_input).squeeze(-1)

    return linear_part + interaction_part

predict(user_indices, *args, item_indices=None, contexts=None, **kwargs)

Prediction using the linear part and FM.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
contexts Optional[Tensor]

The batch of contexts. Required to predict with CARS models.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/context_aware_recommender/fm.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    contexts: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the linear part and FM.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        contexts (Optional[Tensor]): The batch of contexts. Required to
            predict with CARS models.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    # Linear Fixed
    fixed_linear = self.global_bias + self.user_bias(user_indices).squeeze(-1)

    # FM Fixed Accumulators (Sum V and Sum V^2)
    sum_v_fixed = self.user_embedding(user_indices)
    sum_sq_v_fixed = sum_v_fixed.pow(2)

    # Process Contexts
    if contexts is not None and self.context_labels:
        # Linear Context
        global_ctx = contexts + self.context_offsets
        ctx_bias = self.merged_context_bias(global_ctx).sum(dim=1).squeeze(-1)
        fixed_linear += ctx_bias

        # FM Context
        ctx_emb = self.merged_context_embedding(global_ctx)
        sum_v_fixed += ctx_emb.sum(dim=1)
        sum_sq_v_fixed += ctx_emb.pow(2).sum(dim=1)

    # Determine target items
    if item_indices is None:
        # All items (excluding padding)
        target_items = torch.arange(self.n_items, device=fixed_linear.device)
    else:
        target_items = item_indices
        if target_items.dim() == 1:
            target_items = target_items.unsqueeze(
                1
            )  # [batch_size, 1] for sampled case

    # Item Linear Bias
    item_linear_total = self.item_bias(target_items).squeeze(-1)

    # Item Embeddings
    item_emb = self.item_embedding(target_items)

    # Feature Handling
    feat_bias = self._get_feature_bias(target_items)
    feat_emb_tensor = self._get_feature_embeddings(target_items)

    # Update Linear
    item_linear_total += feat_bias

    # Update FM accumulators
    if feat_emb_tensor is not None:
        feat_sum = feat_emb_tensor.sum(dim=-2)
        feat_sq_sum = feat_emb_tensor.pow(2).sum(dim=-2)

        # Total Item Component
        item_component_sum = item_emb + feat_sum
        item_component_sq_sum = item_emb.pow(2) + feat_sq_sum
    else:
        item_component_sum = item_emb
        item_component_sq_sum = item_emb.pow(2)

    if item_indices is None:
        # Case 'full': [batch_size, n_items]

        final_linear = fixed_linear.unsqueeze(1) + item_linear_total.unsqueeze(0)

        # Prepare for broadcasting
        sum_v_fixed_exp = sum_v_fixed.unsqueeze(1)  # [B, 1, E]
        sum_sq_v_fixed_exp = sum_sq_v_fixed.unsqueeze(1)  # [B, 1, E]

        item_sum_exp = item_component_sum.unsqueeze(0)  # [1, I, E]
        item_sq_sum_exp = item_component_sq_sum.unsqueeze(0)  # [1, I, E]

        # FM Equation
        sum_all_sq = (sum_v_fixed_exp + item_sum_exp).pow(2)
        sum_sq_all = sum_sq_v_fixed_exp + item_sq_sum_exp

        interaction = 0.5 * (sum_all_sq - sum_sq_all).sum(dim=2)
    else:
        # Case 'sampled': [batch_size, 1]

        final_linear = fixed_linear.unsqueeze(1) + item_linear_total

        sum_v_fixed_exp = sum_v_fixed.unsqueeze(1)
        sum_sq_v_fixed_exp = sum_sq_v_fixed.unsqueeze(1)

        sum_all_sq = (sum_v_fixed_exp + item_component_sum).pow(2)
        sum_sq_all = sum_sq_v_fixed_exp + item_component_sq_sum

        interaction = 0.5 * (sum_all_sq - sum_sq_all).sum(dim=2)

    return final_linear + interaction

warprec.recommenders.context_aware_recommender.nfm.NFM

Bases: ContextRecommenderUtils, IterativeRecommender

Implementation of NFM algorithm from Neural Factorization Machines for Sparse Predictive Analytics, SIGIR 2017.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
*args Any

Variable length argument list.

()
interactions Optional[Interactions]

The training interactions.

None
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The size of the latent vectors.

mlp_hidden_size List[int]

The MLP hidden layer size list.

dropout float

The dropout probability.

reg_weight float

The L2 regularization weight.

weight_decay float

The value of weight decay used in the optimizer.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

neg_samples int

Number of negative samples for training.

Source code in warprec/recommenders/context_aware_recommender/nfm.py
@model_registry.register(name="NFM")
class NFM(ContextRecommenderUtils, IterativeRecommender):
    """Implementation of NFM algorithm from
        Neural Factorization Machines for Sparse Predictive Analytics, SIGIR 2017.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        *args (Any): Variable length argument list.
        interactions (Optional[Interactions]): The training interactions.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The size of the latent vectors.
        mlp_hidden_size (List[int]): The MLP hidden layer size list.
        dropout (float): The dropout probability.
        reg_weight (float): The L2 regularization weight.
        weight_decay (float): The value of weight decay used in the optimizer.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
        neg_samples (int): Number of negative samples for training.
    """

    DATALOADER_TYPE = DataLoaderType.ITEM_RATING_LOADER_WITH_CONTEXT

    embedding_size: int
    mlp_hidden_size: List[int]
    dropout: float
    reg_weight: float
    weight_decay: float
    batch_size: int
    epochs: int
    learning_rate: float
    neg_samples: int

    def __init__(
        self,
        params: dict,
        info: dict,
        *args: Any,
        interactions: Optional[Interactions] = None,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(
            params, info, *args, interactions=interactions, seed=seed, **kwargs
        )

        # Check for optional value of block size
        self.block_size = kwargs.get("block_size", 50)

        # Ray Tune converts lists to tuples, convert back to list
        self.mlp_hidden_size = list(self.mlp_hidden_size)

        # Batch Normalization after the Bi-Interaction pooling
        self.batch_norm = nn.BatchNorm1d(self.embedding_size)

        # MLP Layers: Input size is the embedding size (output of Bi-Interaction)
        # The MLP class handles the hidden layers and dropout
        self.mlp_layers = MLP(
            [self.embedding_size] + self.mlp_hidden_size, self.dropout
        )

        # Final prediction layer (projects MLP output to scalar)
        self.predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1, bias=False)

        # Losses
        self.bce_loss = nn.BCEWithLogitsLoss()
        self.reg_loss = EmbLoss()

        # Initialize weights
        self.apply(self._init_weights)

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, item, rating = batch[0], batch[1], batch[2]

        contexts: Optional[Tensor] = None
        features: Optional[Tensor] = None

        current_idx = 3

        # If feature dimensions exist, the next element is features
        if self.feature_dims:
            features = batch[current_idx]
            current_idx += 1

        # If context dimensions exist, the next element is context
        if self.context_dims:
            contexts = batch[current_idx]

        prediction = self.forward(user, item, features, contexts)

        # Compute BCE loss
        bce_loss = self.bce_loss(prediction, rating)

        # Compute L2 regularization on embeddings and biases
        reg_params = self.get_reg_params(user, item, features, contexts)
        reg_loss = self.reg_weight * self.reg_loss(*reg_params)

        # Loss logging
        loss = bce_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(
        self,
        user: Tensor,
        item: Tensor,
        features: Optional[Tensor] = None,
        contexts: Optional[Tensor] = None,
    ) -> Tensor:
        """Forward pass of the NFM model.

        Args:
            user (Tensor): The tensor containing the user indexes.
            item (Tensor): The tensor containing the item indexes.
            features (Optional[Tensor]): The tensor containing the features of the interactions.
            contexts (Optional[Tensor]): The tensor containing the context of the interactions.

        Returns:
            Tensor: The prediction score for each triplet (user, item, context).
        """
        # Linear Part
        linear_part = self.compute_first_order(user, item, features, contexts)

        # Interaction Part (Second Order)
        u_emb = self.user_embedding(user).unsqueeze(1)
        i_emb = self.item_embedding(item).unsqueeze(1)
        components = [u_emb, i_emb]

        # Add Feature Embeddings
        if features is not None and self.feature_dims:
            global_feat = features + self.feature_offsets
            f_emb = self.merged_feature_embedding(global_feat)
            components.append(f_emb)

        # Add Context Embeddings
        if contexts is not None and self.context_labels:
            global_ctx = contexts + self.context_offsets
            c_emb = self.merged_context_embedding(global_ctx)
            components.append(c_emb)

        fm_input = torch.cat(components, dim=1)

        # Bi-Interaction Pooling
        sum_of_vectors = torch.sum(fm_input, dim=1)
        sum_of_squares = torch.sum(fm_input.pow(2), dim=1)
        bi_interaction = 0.5 * (sum_of_vectors.pow(2) - sum_of_squares)

        # Neural Layers
        bi_interaction = self.batch_norm(bi_interaction)
        mlp_output = self.mlp_layers(bi_interaction)
        prediction_score = self.predict_layer(mlp_output).squeeze(-1)

        return linear_part + prediction_score

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        contexts: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the NFM model.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            contexts (Optional[Tensor]): The batch of contexts.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        batch_size = user_indices.size(0)

        # Linear Fixed
        fixed_linear = self.global_bias + self.user_bias(user_indices).squeeze(-1)

        # Interaction Fixed Accumulators
        sum_v_fixed = self.user_embedding(user_indices)
        sum_sq_v_fixed = sum_v_fixed.pow(2)

        # Process Contexts (Vettorizzato)
        if contexts is not None and self.context_dims:
            # Linear
            global_ctx = contexts + self.context_offsets
            ctx_bias = self.merged_context_bias(global_ctx).sum(dim=1).squeeze(-1)
            fixed_linear += ctx_bias

            # Interaction
            ctx_emb = self.merged_context_embedding(global_ctx)
            sum_v_fixed += ctx_emb.sum(dim=1)
            sum_sq_v_fixed += ctx_emb.pow(2).sum(dim=1)

        if item_indices is None:
            # Case 'full'
            preds_list = []

            for start in range(0, self.n_items, self.block_size):
                end = min(start + self.block_size, self.n_items)
                current_block_size = end - start

                items_block = torch.arange(start, end, device=user_indices.device)

                # Item Embeddings & Bias
                item_emb = self.item_embedding(items_block)
                item_b = self.item_bias(items_block).squeeze(-1)

                # Feature Embeddings & Bias (Vettorizzato)
                feat_emb_tensor = self._get_feature_embeddings(items_block)
                feat_b = self._get_feature_bias(items_block)

                # Linear Part
                linear_pred = (
                    fixed_linear.unsqueeze(1)
                    + item_b.unsqueeze(0)
                    + feat_b.unsqueeze(0)
                )

                # Bi-Interaction Part
                # Aggregate Item + Features
                if feat_emb_tensor is not None:
                    item_feat_sum = item_emb + feat_emb_tensor.sum(dim=1)
                    item_feat_sq_sum = item_emb.pow(2) + feat_emb_tensor.pow(2).sum(
                        dim=1
                    )
                else:
                    item_feat_sum = item_emb
                    item_feat_sq_sum = item_emb.pow(2)

                # (V_fixed + V_item_total)^2
                sum_v_total = sum_v_fixed.unsqueeze(1) + item_feat_sum.unsqueeze(0)
                sum_v_total_sq = sum_v_total.pow(2)

                # (V_fixed^2 + V_item_total^2)
                sum_sq_total = sum_sq_v_fixed.unsqueeze(1) + item_feat_sq_sum.unsqueeze(
                    0
                )

                # Interaction vector
                bi_interaction = 0.5 * (sum_v_total_sq - sum_sq_total)

                # Flatten for MLP
                bi_interaction_flat = bi_interaction.view(-1, self.embedding_size)

                # Neural Part
                bi_interaction_flat = self.batch_norm(bi_interaction_flat)
                mlp_out = self.mlp_layers(bi_interaction_flat)
                neural_pred = self.predict_layer(mlp_out).view(
                    batch_size, current_block_size
                )

                preds_list.append(linear_pred + neural_pred)

            return torch.cat(preds_list, dim=1)

        # Case 'sampled'
        pad_seq = item_indices.size(1)

        item_emb = self.item_embedding(item_indices)
        item_b = self.item_bias(item_indices).squeeze(-1)

        feat_emb_tensor = self._get_feature_embeddings(item_indices)
        feat_b = self._get_feature_bias(item_indices)

        # Linear Part
        linear_pred = fixed_linear.unsqueeze(1) + item_b + feat_b

        # Bi-Interaction Part
        if feat_emb_tensor is not None:
            item_feat_sum = item_emb + feat_emb_tensor.sum(dim=2)
            item_feat_sq_sum = item_emb.pow(2) + feat_emb_tensor.pow(2).sum(dim=2)
        else:
            item_feat_sum = item_emb
            item_feat_sq_sum = item_emb.pow(2)

        sum_v_fixed_exp = sum_v_fixed.unsqueeze(1)
        sum_sq_v_fixed_exp = sum_sq_v_fixed.unsqueeze(1)

        sum_v_total_sq = (sum_v_fixed_exp + item_feat_sum).pow(2)
        sum_sq_total = sum_sq_v_fixed_exp + item_feat_sq_sum

        bi_interaction = 0.5 * (sum_v_total_sq - sum_sq_total)

        # Flatten for MLP
        bi_interaction_flat = bi_interaction.view(-1, self.embedding_size)

        # Neural Part
        bi_interaction_flat = self.batch_norm(bi_interaction_flat)
        mlp_out = self.mlp_layers(bi_interaction_flat)
        neural_pred = self.predict_layer(mlp_out).view(batch_size, pad_seq)

        return linear_pred + neural_pred

forward(user, item, features=None, contexts=None)

Forward pass of the NFM model.

Parameters:

Name Type Description Default
user Tensor

The tensor containing the user indexes.

required
item Tensor

The tensor containing the item indexes.

required
features Optional[Tensor]

The tensor containing the features of the interactions.

None
contexts Optional[Tensor]

The tensor containing the context of the interactions.

None

Returns:

Name Type Description
Tensor Tensor

The prediction score for each triplet (user, item, context).

Source code in warprec/recommenders/context_aware_recommender/nfm.py
def forward(
    self,
    user: Tensor,
    item: Tensor,
    features: Optional[Tensor] = None,
    contexts: Optional[Tensor] = None,
) -> Tensor:
    """Forward pass of the NFM model.

    Args:
        user (Tensor): The tensor containing the user indexes.
        item (Tensor): The tensor containing the item indexes.
        features (Optional[Tensor]): The tensor containing the features of the interactions.
        contexts (Optional[Tensor]): The tensor containing the context of the interactions.

    Returns:
        Tensor: The prediction score for each triplet (user, item, context).
    """
    # Linear Part
    linear_part = self.compute_first_order(user, item, features, contexts)

    # Interaction Part (Second Order)
    u_emb = self.user_embedding(user).unsqueeze(1)
    i_emb = self.item_embedding(item).unsqueeze(1)
    components = [u_emb, i_emb]

    # Add Feature Embeddings
    if features is not None and self.feature_dims:
        global_feat = features + self.feature_offsets
        f_emb = self.merged_feature_embedding(global_feat)
        components.append(f_emb)

    # Add Context Embeddings
    if contexts is not None and self.context_labels:
        global_ctx = contexts + self.context_offsets
        c_emb = self.merged_context_embedding(global_ctx)
        components.append(c_emb)

    fm_input = torch.cat(components, dim=1)

    # Bi-Interaction Pooling
    sum_of_vectors = torch.sum(fm_input, dim=1)
    sum_of_squares = torch.sum(fm_input.pow(2), dim=1)
    bi_interaction = 0.5 * (sum_of_vectors.pow(2) - sum_of_squares)

    # Neural Layers
    bi_interaction = self.batch_norm(bi_interaction)
    mlp_output = self.mlp_layers(bi_interaction)
    prediction_score = self.predict_layer(mlp_output).squeeze(-1)

    return linear_part + prediction_score

predict(user_indices, *args, item_indices=None, contexts=None, **kwargs)

Prediction using the NFM model.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
contexts Optional[Tensor]

The batch of contexts.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/context_aware_recommender/nfm.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    contexts: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the NFM model.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        contexts (Optional[Tensor]): The batch of contexts.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    batch_size = user_indices.size(0)

    # Linear Fixed
    fixed_linear = self.global_bias + self.user_bias(user_indices).squeeze(-1)

    # Interaction Fixed Accumulators
    sum_v_fixed = self.user_embedding(user_indices)
    sum_sq_v_fixed = sum_v_fixed.pow(2)

    # Process Contexts (Vettorizzato)
    if contexts is not None and self.context_dims:
        # Linear
        global_ctx = contexts + self.context_offsets
        ctx_bias = self.merged_context_bias(global_ctx).sum(dim=1).squeeze(-1)
        fixed_linear += ctx_bias

        # Interaction
        ctx_emb = self.merged_context_embedding(global_ctx)
        sum_v_fixed += ctx_emb.sum(dim=1)
        sum_sq_v_fixed += ctx_emb.pow(2).sum(dim=1)

    if item_indices is None:
        # Case 'full'
        preds_list = []

        for start in range(0, self.n_items, self.block_size):
            end = min(start + self.block_size, self.n_items)
            current_block_size = end - start

            items_block = torch.arange(start, end, device=user_indices.device)

            # Item Embeddings & Bias
            item_emb = self.item_embedding(items_block)
            item_b = self.item_bias(items_block).squeeze(-1)

            # Feature Embeddings & Bias (Vettorizzato)
            feat_emb_tensor = self._get_feature_embeddings(items_block)
            feat_b = self._get_feature_bias(items_block)

            # Linear Part
            linear_pred = (
                fixed_linear.unsqueeze(1)
                + item_b.unsqueeze(0)
                + feat_b.unsqueeze(0)
            )

            # Bi-Interaction Part
            # Aggregate Item + Features
            if feat_emb_tensor is not None:
                item_feat_sum = item_emb + feat_emb_tensor.sum(dim=1)
                item_feat_sq_sum = item_emb.pow(2) + feat_emb_tensor.pow(2).sum(
                    dim=1
                )
            else:
                item_feat_sum = item_emb
                item_feat_sq_sum = item_emb.pow(2)

            # (V_fixed + V_item_total)^2
            sum_v_total = sum_v_fixed.unsqueeze(1) + item_feat_sum.unsqueeze(0)
            sum_v_total_sq = sum_v_total.pow(2)

            # (V_fixed^2 + V_item_total^2)
            sum_sq_total = sum_sq_v_fixed.unsqueeze(1) + item_feat_sq_sum.unsqueeze(
                0
            )

            # Interaction vector
            bi_interaction = 0.5 * (sum_v_total_sq - sum_sq_total)

            # Flatten for MLP
            bi_interaction_flat = bi_interaction.view(-1, self.embedding_size)

            # Neural Part
            bi_interaction_flat = self.batch_norm(bi_interaction_flat)
            mlp_out = self.mlp_layers(bi_interaction_flat)
            neural_pred = self.predict_layer(mlp_out).view(
                batch_size, current_block_size
            )

            preds_list.append(linear_pred + neural_pred)

        return torch.cat(preds_list, dim=1)

    # Case 'sampled'
    pad_seq = item_indices.size(1)

    item_emb = self.item_embedding(item_indices)
    item_b = self.item_bias(item_indices).squeeze(-1)

    feat_emb_tensor = self._get_feature_embeddings(item_indices)
    feat_b = self._get_feature_bias(item_indices)

    # Linear Part
    linear_pred = fixed_linear.unsqueeze(1) + item_b + feat_b

    # Bi-Interaction Part
    if feat_emb_tensor is not None:
        item_feat_sum = item_emb + feat_emb_tensor.sum(dim=2)
        item_feat_sq_sum = item_emb.pow(2) + feat_emb_tensor.pow(2).sum(dim=2)
    else:
        item_feat_sum = item_emb
        item_feat_sq_sum = item_emb.pow(2)

    sum_v_fixed_exp = sum_v_fixed.unsqueeze(1)
    sum_sq_v_fixed_exp = sum_sq_v_fixed.unsqueeze(1)

    sum_v_total_sq = (sum_v_fixed_exp + item_feat_sum).pow(2)
    sum_sq_total = sum_sq_v_fixed_exp + item_feat_sq_sum

    bi_interaction = 0.5 * (sum_v_total_sq - sum_sq_total)

    # Flatten for MLP
    bi_interaction_flat = bi_interaction.view(-1, self.embedding_size)

    # Neural Part
    bi_interaction_flat = self.batch_norm(bi_interaction_flat)
    mlp_out = self.mlp_layers(bi_interaction_flat)
    neural_pred = self.predict_layer(mlp_out).view(batch_size, pad_seq)

    return linear_pred + neural_pred

warprec.recommenders.context_aware_recommender.wideanddeep.WideAndDeep

Bases: ContextRecommenderUtils, IterativeRecommender

Implementation of Wide & Deep algorithm from Wide & Deep Learning for Recommender Systems, DLRS 2016.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
*args Any

Variable length argument list.

()
interactions Optional[Interactions]

The training interactions.

None
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The size of the latent vectors.

mlp_hidden_size List[int]

The MLP hidden layer size list.

dropout float

The dropout probability.

reg_weight float

The L2 regularization weight.

weight_decay float

The value of weight decay used in the optimizer.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

neg_samples int

Number of negative samples for training.

Source code in warprec/recommenders/context_aware_recommender/wideanddeep.py
@model_registry.register(name="WideAndDeep")
class WideAndDeep(ContextRecommenderUtils, IterativeRecommender):
    """Implementation of Wide & Deep algorithm from
        Wide & Deep Learning for Recommender Systems, DLRS 2016.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        *args (Any): Variable length argument list.
        interactions (Optional[Interactions]): The training interactions.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The size of the latent vectors.
        mlp_hidden_size (List[int]): The MLP hidden layer size list.
        dropout (float): The dropout probability.
        reg_weight (float): The L2 regularization weight.
        weight_decay (float): The value of weight decay used in the optimizer.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
        neg_samples (int): Number of negative samples for training.
    """

    DATALOADER_TYPE = DataLoaderType.ITEM_RATING_LOADER_WITH_CONTEXT

    embedding_size: int
    mlp_hidden_size: List[int]
    dropout: float
    reg_weight: float
    weight_decay: float
    batch_size: int
    epochs: int
    learning_rate: float
    neg_samples: int

    def __init__(
        self,
        params: dict,
        info: dict,
        *args: Any,
        interactions: Optional[Interactions] = None,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(
            params, info, *args, interactions=interactions, seed=seed, **kwargs
        )

        # Check for optional value of block size
        self.block_size = kwargs.get("block_size", 50)

        # Ray Tune converts lists to tuples, convert back to list
        self.mlp_hidden_size = list(self.mlp_hidden_size)

        # Deep Part (DNN)
        self.num_fields = 2 + len(self.feature_labels) + len(self.context_labels)

        # Input size for MLP is the concatenation of all embeddings
        input_dim = self.num_fields * self.embedding_size

        self.mlp_layers = MLP([input_dim] + self.mlp_hidden_size, self.dropout)

        # Final prediction layer for the Deep part
        self.deep_predict_layer = nn.Linear(self.mlp_hidden_size[-1], 1)

        # Losses
        self.bce_loss = nn.BCEWithLogitsLoss()
        self.reg_loss = EmbLoss()

        # Initialize weights
        self.apply(self._init_weights)

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, item, rating = batch[0], batch[1], batch[2]

        contexts: Optional[Tensor] = None
        features: Optional[Tensor] = None

        current_idx = 3

        # If feature dimensions exist, the next element is features
        if self.feature_dims:
            features = batch[current_idx]
            current_idx += 1

        # If context dimensions exist, the next element is context
        if self.context_dims:
            contexts = batch[current_idx]

        prediction = self.forward(user, item, features, contexts)

        # Compute BCE loss
        bce_loss = self.bce_loss(prediction, rating)

        # Compute L2 regularization on embeddings and biases
        reg_params = self.get_reg_params(user, item, features, contexts)
        reg_loss = self.reg_weight * self.reg_loss(*reg_params)

        # Loss logging
        loss = bce_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(
        self,
        user: Tensor,
        item: Tensor,
        features: Optional[Tensor] = None,
        contexts: Optional[Tensor] = None,
    ) -> Tensor:
        """Forward pass of the WideDeep model.

        Args:
            user (Tensor): The tensor containing the user indexes.
            item (Tensor): The tensor containing the item indexes.
            features (Optional[Tensor]): The tensor containing the features of the interactions.
            contexts (Optional[Tensor]): The tensor containing the context of the interactions.

        Returns:
            Tensor: The prediction score for each triplet (user, item, context).
        """
        # Wide Part (Linear)
        wide_part = self.compute_first_order(user, item, features, contexts)

        # Deep Part (DNN)
        u_emb = self.user_embedding(user).unsqueeze(1)
        i_emb = self.item_embedding(item).unsqueeze(1)
        components = [u_emb, i_emb]

        # Add Feature Embeddings
        if features is not None and self.feature_dims:
            global_feat = features + self.feature_offsets
            f_emb = self.merged_feature_embedding(global_feat)
            components.append(f_emb)

        # Add Context Embeddings
        if contexts is not None and self.context_labels:
            global_ctx = contexts + self.context_offsets
            c_emb = self.merged_context_embedding(global_ctx)
            components.append(c_emb)

        stacked_embeddings = torch.cat(components, dim=1)
        batch_size = stacked_embeddings.shape[0]

        deep_input = stacked_embeddings.view(batch_size, -1)
        deep_output = self.mlp_layers(deep_input)
        deep_part = self.deep_predict_layer(deep_output).squeeze(-1)

        return wide_part + deep_part

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        contexts: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the WideAndDeep model.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            contexts (Optional[Tensor]): The batch of contexts.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        batch_size = user_indices.size(0)

        # Wide Fixed
        fixed_wide = self.global_bias + self.user_bias(user_indices).squeeze(-1)

        # Deep Fixed Parts
        user_emb = self.user_embedding(user_indices)

        # Contexts (Vettorizzato)
        ctx_emb_tensor = self._get_context_embeddings(contexts)

        if contexts is not None and self.context_dims:
            # Wide
            global_ctx = contexts + self.context_offsets
            ctx_bias = self.merged_context_bias(global_ctx).sum(dim=1).squeeze(-1)
            fixed_wide += ctx_bias

        if item_indices is None:
            # Case 'full'
            preds_list = []

            for start in range(0, self.n_items, self.block_size):
                end = min(start + self.block_size, self.n_items)
                current_block_size = end - start

                items_block = torch.arange(start, end, device=user_indices.device)

                # Item Embeddings and Bias
                item_emb = self.item_embedding(items_block)
                item_b = self.item_bias(items_block).squeeze(-1)

                # Feature Embeddings and Bias (Vettorizzato)
                feat_emb_tensor = self._get_feature_embeddings(items_block)
                feat_b = self._get_feature_bias(items_block)

                # Wide Part
                wide_pred = (
                    fixed_wide.unsqueeze(1) + item_b.unsqueeze(0) + feat_b.unsqueeze(0)
                )

                # Deep Part
                # Expand User: [Batch, 1, 1, Emb] -> [Batch, Block, 1, Emb]
                u_exp = (
                    user_emb.unsqueeze(1)
                    .unsqueeze(2)
                    .expand(-1, current_block_size, -1, -1)
                )

                # Expand Item: [1, Block, 1, Emb] -> [Batch, Block, 1, Emb]
                i_exp = (
                    item_emb.unsqueeze(0).unsqueeze(2).expand(batch_size, -1, -1, -1)
                )

                stack_list = [u_exp, i_exp]

                # Expand Features: [1, Block, N_Feat, Emb] -> [Batch, Block, N_Feat, Emb]
                if feat_emb_tensor is not None:
                    f_exp = feat_emb_tensor.unsqueeze(0).expand(batch_size, -1, -1, -1)
                    stack_list.append(f_exp)

                # Expand Contexts: [Batch, 1, N_Ctx, Emb] -> [Batch, Block, N_Ctx, Emb]
                if ctx_emb_tensor is not None:
                    c_exp = ctx_emb_tensor.unsqueeze(1).expand(
                        -1, current_block_size, -1, -1
                    )
                    stack_list.append(c_exp)

                # Concatenate: [Batch, Block, Total_Fields, Emb]
                deep_input_block = torch.cat(stack_list, dim=2)

                deep_input_flat = deep_input_block.view(
                    -1, self.num_fields * self.embedding_size
                )

                deep_out = self.mlp_layers(deep_input_flat)
                deep_pred = self.deep_predict_layer(deep_out).view(
                    batch_size, current_block_size
                )

                preds_list.append(wide_pred + deep_pred)

            return torch.cat(preds_list, dim=1)

        # Case 'sampled'
        pad_seq = item_indices.size(1)

        item_emb = self.item_embedding(item_indices)
        item_b = self.item_bias(item_indices).squeeze(-1)

        feat_emb_tensor = self._get_feature_embeddings(item_indices)
        feat_b = self._get_feature_bias(item_indices)

        # Wide Part
        wide_pred = fixed_wide.unsqueeze(1) + item_b + feat_b

        # Deep Part
        # User: [Batch, Seq, 1, Emb]
        u_exp = user_emb.unsqueeze(1).unsqueeze(2).expand(-1, pad_seq, -1, -1)

        # Item: [Batch, Seq, 1, Emb]
        i_exp = item_emb.unsqueeze(2)

        stack_list = [u_exp, i_exp]

        # Features: [Batch, Seq, N_Feat, Emb] (Already correct)
        if feat_emb_tensor is not None:
            stack_list.append(feat_emb_tensor)

        # Contexts: [Batch, Seq, N_Ctx, Emb]
        if ctx_emb_tensor is not None:
            c_exp = ctx_emb_tensor.unsqueeze(1).expand(-1, pad_seq, -1, -1)
            stack_list.append(c_exp)

        deep_input_block = torch.cat(stack_list, dim=2)
        deep_input_flat = deep_input_block.view(
            -1, self.num_fields * self.embedding_size
        )

        deep_out = self.mlp_layers(deep_input_flat)
        deep_pred = self.deep_predict_layer(deep_out).view(batch_size, pad_seq)

        return wide_pred + deep_pred

forward(user, item, features=None, contexts=None)

Forward pass of the WideDeep model.

Parameters:

Name Type Description Default
user Tensor

The tensor containing the user indexes.

required
item Tensor

The tensor containing the item indexes.

required
features Optional[Tensor]

The tensor containing the features of the interactions.

None
contexts Optional[Tensor]

The tensor containing the context of the interactions.

None

Returns:

Name Type Description
Tensor Tensor

The prediction score for each triplet (user, item, context).

Source code in warprec/recommenders/context_aware_recommender/wideanddeep.py
def forward(
    self,
    user: Tensor,
    item: Tensor,
    features: Optional[Tensor] = None,
    contexts: Optional[Tensor] = None,
) -> Tensor:
    """Forward pass of the WideDeep model.

    Args:
        user (Tensor): The tensor containing the user indexes.
        item (Tensor): The tensor containing the item indexes.
        features (Optional[Tensor]): The tensor containing the features of the interactions.
        contexts (Optional[Tensor]): The tensor containing the context of the interactions.

    Returns:
        Tensor: The prediction score for each triplet (user, item, context).
    """
    # Wide Part (Linear)
    wide_part = self.compute_first_order(user, item, features, contexts)

    # Deep Part (DNN)
    u_emb = self.user_embedding(user).unsqueeze(1)
    i_emb = self.item_embedding(item).unsqueeze(1)
    components = [u_emb, i_emb]

    # Add Feature Embeddings
    if features is not None and self.feature_dims:
        global_feat = features + self.feature_offsets
        f_emb = self.merged_feature_embedding(global_feat)
        components.append(f_emb)

    # Add Context Embeddings
    if contexts is not None and self.context_labels:
        global_ctx = contexts + self.context_offsets
        c_emb = self.merged_context_embedding(global_ctx)
        components.append(c_emb)

    stacked_embeddings = torch.cat(components, dim=1)
    batch_size = stacked_embeddings.shape[0]

    deep_input = stacked_embeddings.view(batch_size, -1)
    deep_output = self.mlp_layers(deep_input)
    deep_part = self.deep_predict_layer(deep_output).squeeze(-1)

    return wide_part + deep_part

predict(user_indices, *args, item_indices=None, contexts=None, **kwargs)

Prediction using the WideAndDeep model.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
contexts Optional[Tensor]

The batch of contexts.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/context_aware_recommender/wideanddeep.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    contexts: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the WideAndDeep model.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        contexts (Optional[Tensor]): The batch of contexts.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    batch_size = user_indices.size(0)

    # Wide Fixed
    fixed_wide = self.global_bias + self.user_bias(user_indices).squeeze(-1)

    # Deep Fixed Parts
    user_emb = self.user_embedding(user_indices)

    # Contexts (Vettorizzato)
    ctx_emb_tensor = self._get_context_embeddings(contexts)

    if contexts is not None and self.context_dims:
        # Wide
        global_ctx = contexts + self.context_offsets
        ctx_bias = self.merged_context_bias(global_ctx).sum(dim=1).squeeze(-1)
        fixed_wide += ctx_bias

    if item_indices is None:
        # Case 'full'
        preds_list = []

        for start in range(0, self.n_items, self.block_size):
            end = min(start + self.block_size, self.n_items)
            current_block_size = end - start

            items_block = torch.arange(start, end, device=user_indices.device)

            # Item Embeddings and Bias
            item_emb = self.item_embedding(items_block)
            item_b = self.item_bias(items_block).squeeze(-1)

            # Feature Embeddings and Bias (Vettorizzato)
            feat_emb_tensor = self._get_feature_embeddings(items_block)
            feat_b = self._get_feature_bias(items_block)

            # Wide Part
            wide_pred = (
                fixed_wide.unsqueeze(1) + item_b.unsqueeze(0) + feat_b.unsqueeze(0)
            )

            # Deep Part
            # Expand User: [Batch, 1, 1, Emb] -> [Batch, Block, 1, Emb]
            u_exp = (
                user_emb.unsqueeze(1)
                .unsqueeze(2)
                .expand(-1, current_block_size, -1, -1)
            )

            # Expand Item: [1, Block, 1, Emb] -> [Batch, Block, 1, Emb]
            i_exp = (
                item_emb.unsqueeze(0).unsqueeze(2).expand(batch_size, -1, -1, -1)
            )

            stack_list = [u_exp, i_exp]

            # Expand Features: [1, Block, N_Feat, Emb] -> [Batch, Block, N_Feat, Emb]
            if feat_emb_tensor is not None:
                f_exp = feat_emb_tensor.unsqueeze(0).expand(batch_size, -1, -1, -1)
                stack_list.append(f_exp)

            # Expand Contexts: [Batch, 1, N_Ctx, Emb] -> [Batch, Block, N_Ctx, Emb]
            if ctx_emb_tensor is not None:
                c_exp = ctx_emb_tensor.unsqueeze(1).expand(
                    -1, current_block_size, -1, -1
                )
                stack_list.append(c_exp)

            # Concatenate: [Batch, Block, Total_Fields, Emb]
            deep_input_block = torch.cat(stack_list, dim=2)

            deep_input_flat = deep_input_block.view(
                -1, self.num_fields * self.embedding_size
            )

            deep_out = self.mlp_layers(deep_input_flat)
            deep_pred = self.deep_predict_layer(deep_out).view(
                batch_size, current_block_size
            )

            preds_list.append(wide_pred + deep_pred)

        return torch.cat(preds_list, dim=1)

    # Case 'sampled'
    pad_seq = item_indices.size(1)

    item_emb = self.item_embedding(item_indices)
    item_b = self.item_bias(item_indices).squeeze(-1)

    feat_emb_tensor = self._get_feature_embeddings(item_indices)
    feat_b = self._get_feature_bias(item_indices)

    # Wide Part
    wide_pred = fixed_wide.unsqueeze(1) + item_b + feat_b

    # Deep Part
    # User: [Batch, Seq, 1, Emb]
    u_exp = user_emb.unsqueeze(1).unsqueeze(2).expand(-1, pad_seq, -1, -1)

    # Item: [Batch, Seq, 1, Emb]
    i_exp = item_emb.unsqueeze(2)

    stack_list = [u_exp, i_exp]

    # Features: [Batch, Seq, N_Feat, Emb] (Already correct)
    if feat_emb_tensor is not None:
        stack_list.append(feat_emb_tensor)

    # Contexts: [Batch, Seq, N_Ctx, Emb]
    if ctx_emb_tensor is not None:
        c_exp = ctx_emb_tensor.unsqueeze(1).expand(-1, pad_seq, -1, -1)
        stack_list.append(c_exp)

    deep_input_block = torch.cat(stack_list, dim=2)
    deep_input_flat = deep_input_block.view(
        -1, self.num_fields * self.embedding_size
    )

    deep_out = self.mlp_layers(deep_input_flat)
    deep_pred = self.deep_predict_layer(deep_out).view(batch_size, pad_seq)

    return wide_pred + deep_pred

warprec.recommenders.context_aware_recommender.xdeepfm.xDeepFM

Bases: ContextRecommenderUtils, IterativeRecommender

Implementation of xDeepFM algorithm from xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems, KDD 2018.

Parameters:

Name Type Description Default
params dict

Model parameters.

required
info dict

The dictionary containing dataset information.

required
*args Any

Variable length argument list.

()
interactions Optional[Interactions]

The training interactions.

None
seed int

The seed to use for reproducibility.

42
**kwargs Any

Arbitrary keyword arguments.

{}

Attributes:

Name Type Description
DATALOADER_TYPE

The type of dataloader used.

embedding_size int

The size of the latent vectors.

mlp_hidden_size List[int]

The MLP hidden layer size list.

cin_layer_size List[int]

The size of CIN layers.

dropout float

The dropout probability.

direct bool

The type of output of CIN module.

reg_weight float

The L2 regularization weight.

weight_decay float

The value of weight decay used in the optimizer.

batch_size int

The batch size used for training.

epochs int

The number of epochs.

learning_rate float

The learning rate value.

neg_samples int

Number of negative samples for training.

Source code in warprec/recommenders/context_aware_recommender/xdeepfm.py
@model_registry.register(name="xDeepFM")
class xDeepFM(ContextRecommenderUtils, IterativeRecommender):
    """Implementation of xDeepFM algorithm from
        xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems, KDD 2018.

    Args:
        params (dict): Model parameters.
        info (dict): The dictionary containing dataset information.
        *args (Any): Variable length argument list.
        interactions (Optional[Interactions]): The training interactions.
        seed (int): The seed to use for reproducibility.
        **kwargs (Any): Arbitrary keyword arguments.

    Attributes:
        DATALOADER_TYPE: The type of dataloader used.
        embedding_size (int): The size of the latent vectors.
        mlp_hidden_size (List[int]): The MLP hidden layer size list.
        cin_layer_size (List[int]): The size of CIN layers.
        dropout (float): The dropout probability.
        direct (bool): The type of output of CIN module.
        reg_weight (float): The L2 regularization weight.
        weight_decay (float): The value of weight decay used in the optimizer.
        batch_size (int): The batch size used for training.
        epochs (int): The number of epochs.
        learning_rate (float): The learning rate value.
        neg_samples (int): Number of negative samples for training.
    """

    DATALOADER_TYPE = DataLoaderType.ITEM_RATING_LOADER_WITH_CONTEXT

    embedding_size: int
    mlp_hidden_size: List[int]
    cin_layer_size: List[int]
    dropout: float
    direct: bool
    reg_weight: float
    weight_decay: float
    batch_size: int
    epochs: int
    learning_rate: float
    neg_samples: int

    def __init__(
        self,
        params: dict,
        info: dict,
        *args: Any,
        interactions: Optional[Interactions] = None,
        seed: int = 42,
        **kwargs: Any,
    ):
        super().__init__(
            params, info, *args, interactions=interactions, seed=seed, **kwargs
        )

        self.block_size = kwargs.get("block_size", 50)
        self.chunk_size = kwargs.get("chunk_size", 4096)
        self.mlp_hidden_size = list(self.mlp_hidden_size)
        self.cin_layer_size = list(self.cin_layer_size)
        self.num_fields = 2 + len(self.feature_labels) + len(self.context_labels)

        # CIN (Compressed Interaction Network) - Explicit High-order
        self.cin = CIN(
            self.num_fields, self.embedding_size, self.cin_layer_size, self.direct
        )
        self.cin_linear = nn.Linear(self.cin.final_len, 1)

        # DNN (MLP) - Implicit High-order
        input_dim = self.num_fields * self.embedding_size
        self.mlp_layers = MLP([input_dim] + self.mlp_hidden_size, self.dropout)
        self.dnn_linear = nn.Linear(self.mlp_hidden_size[-1], 1)

        # Losses
        self.bce_loss = nn.BCEWithLogitsLoss()
        self.reg_loss = EmbLoss()

        # Initialize weights
        self.apply(self._init_weights)

    def training_step(self, batch: Any, batch_idx: int) -> Tensor:
        user, item, rating = batch[0], batch[1], batch[2]

        contexts: Optional[Tensor] = None
        features: Optional[Tensor] = None

        current_idx = 3

        # If feature dimensions exist, the next element is features
        if self.feature_dims:
            features = batch[current_idx]
            current_idx += 1

        # If context dimensions exist, the next element is context
        if self.context_dims:
            contexts = batch[current_idx]

        prediction = self.forward(user, item, features, contexts)

        # Compute BCE loss
        bce_loss = self.bce_loss(prediction, rating)

        # Compute L2 regularization on embeddings and biases
        reg_params = self.get_reg_params(user, item, features, contexts)
        reg_loss = self.reg_weight * self.reg_loss(*reg_params)

        # Loss logging
        loss = bce_loss + reg_loss
        self.log("loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss

    def forward(
        self,
        user: Tensor,
        item: Tensor,
        features: Optional[Tensor] = None,
        contexts: Optional[Tensor] = None,
    ) -> Tensor:
        # Linear Part
        linear_part = self.compute_first_order(user, item, features, contexts)

        # Interaction Part (Second Order)
        u_emb = self.user_embedding(user).unsqueeze(1)
        i_emb = self.item_embedding(item).unsqueeze(1)
        components = [u_emb, i_emb]

        # Add Feature Embeddings
        if features is not None and self.feature_dims:
            global_feat = features + self.feature_offsets
            f_emb = self.merged_feature_embedding(global_feat)
            components.append(f_emb)

        # Add Context Embeddings
        if contexts is not None and self.context_labels:
            global_ctx = contexts + self.context_offsets
            c_emb = self.merged_context_embedding(global_ctx)
            components.append(c_emb)

        stacked_embeddings = torch.cat(components, dim=1)

        # CIN Part
        cin_output = self.cin(stacked_embeddings)
        cin_score = self.cin_linear(cin_output).squeeze(-1)

        # DNN Part
        batch_size = stacked_embeddings.shape[0]
        dnn_input = stacked_embeddings.view(batch_size, -1)
        dnn_output = self.mlp_layers(dnn_input)
        dnn_score = self.dnn_linear(dnn_output).squeeze(-1)

        # Final Sum
        return linear_part + cin_score + dnn_score

    def _compute_network_scores(
        self,
        u_emb: Tensor,
        i_emb: Tensor,
        feat_emb_tensor: Optional[Tensor],
        ctx_emb_tensor: Optional[Tensor],
        batch_size: int,
        num_items: int,
    ) -> Tensor:
        """Compute scores of deep part (CIN + MLP) efficiently"""
        total_rows = batch_size * num_items

        # Create memory efficient views
        u_view = (
            u_emb.unsqueeze(1)
            .unsqueeze(2)
            .expand(-1, num_items, -1, -1)
            .reshape(total_rows, 1, -1)
        )
        i_view = i_emb.unsqueeze(2).reshape(total_rows, 1, -1)

        views = [u_view, i_view]

        # Handle Feature views
        if feat_emb_tensor is not None:
            f_view = (
                feat_emb_tensor.unsqueeze(0)
                .expand(batch_size, -1, -1, -1)
                .reshape(total_rows, -1, self.embedding_size)
            )
            views.append(f_view)

        # Handle Context views
        if ctx_emb_tensor is not None:
            c_view = (
                ctx_emb_tensor.unsqueeze(1)
                .expand(-1, num_items, -1, -1)
                .reshape(total_rows, -1, self.embedding_size)
            )
            views.append(c_view)

        # Pre-allocate tensor to memory
        all_scores = torch.empty(total_rows, device=self.device)

        # Loop on chunk size parameter
        for start in range(0, total_rows, self.chunk_size):
            end = min(start + self.chunk_size, total_rows)

            # Slice the views and concatenate
            chunk_components = [v[start:end] for v in views]

            # Concatenate on Field dimension (dim=1)
            chunk_stack = torch.cat(chunk_components, dim=1)

            # Forward CIN
            cin_out = self.cin(chunk_stack)
            cin_s = self.cin_linear(cin_out).squeeze(-1)

            # Forward MLP
            dnn_in = chunk_stack.view(chunk_stack.size(0), -1)
            dnn_out = self.mlp_layers(dnn_in)
            dnn_s = self.dnn_linear(dnn_out).squeeze(-1)

            # Save in place
            all_scores[start:end] = cin_s + dnn_s

        return all_scores.view(batch_size, num_items)

    def predict(
        self,
        user_indices: Tensor,
        *args: Any,
        item_indices: Optional[Tensor] = None,
        contexts: Optional[Tensor] = None,
        **kwargs: Any,
    ) -> Tensor:
        """Prediction using the xDeepFM model.

        Args:
            user_indices (Tensor): The batch of user indices.
            *args (Any): List of arguments.
            item_indices (Optional[Tensor]): The batch of item indices. If None,
                full prediction will be produced.
            contexts (Optional[Tensor]): The batch of contexts.
            **kwargs (Any): The dictionary of keyword arguments.

        Returns:
            Tensor: The score matrix {user x item}.
        """
        batch_size = user_indices.size(0)

        # Linear Parts (User + Context)
        fixed_linear = self.global_bias + self.user_bias(user_indices).squeeze(-1)

        # Contexts
        ctx_emb_tensor = self._get_context_embeddings(contexts)

        if contexts is not None and self.context_dims:
            # Linear
            global_ctx = contexts + self.context_offsets
            ctx_bias = self.merged_context_bias(global_ctx).sum(dim=1).squeeze(-1)
            fixed_linear += ctx_bias

        # Embeddings (User)
        u_emb = self.user_embedding(user_indices)

        if item_indices is None:
            # Case 'full'
            preds_list = []

            for start in range(0, self.n_items, self.block_size):
                end = min(start + self.block_size, self.n_items)
                current_block_len = end - start

                items_block = torch.arange(start, end, device=self.device)

                # Item Embeddings and Bias
                item_emb_block = self.item_embedding(items_block)
                item_bias_block = self.item_bias(items_block).squeeze(-1)

                # Feature Embeddings and Bias
                feat_emb_block_tensor = self._get_feature_embeddings(items_block)
                feat_bias_block = self._get_feature_bias(items_block)

                # Linear Part
                linear_pred = (
                    fixed_linear.unsqueeze(1)
                    + item_bias_block.unsqueeze(0)
                    + feat_bias_block.unsqueeze(0)
                )

                # Expand Item to match batch size
                item_emb_expanded = item_emb_block.unsqueeze(0).expand(
                    batch_size, -1, -1
                )

                # Compute scores efficiently
                net_scores = self._compute_network_scores(
                    u_emb,
                    item_emb_expanded,
                    feat_emb_block_tensor,
                    ctx_emb_tensor,
                    batch_size,
                    current_block_len,
                )

                preds_list.append(linear_pred + net_scores)

            return torch.cat(preds_list, dim=1)

        # Case 'sampled'
        pad_seq = item_indices.size(1)

        item_emb = self.item_embedding(item_indices)
        item_bias = self.item_bias(item_indices).squeeze(-1)

        feat_emb_tensor = self._get_feature_embeddings(item_indices)
        feat_bias = self._get_feature_bias(item_indices)

        # Linear
        linear_pred = fixed_linear.unsqueeze(1) + item_bias + feat_bias

        # Stack Construction
        # User: [Batch, 1, 1, Emb] -> [Batch, Seq, 1, Emb]
        u_emb_exp = u_emb.unsqueeze(1).unsqueeze(2).expand(-1, pad_seq, -1, -1)

        # Item: [Batch, Seq, Emb] -> [Batch, Seq, 1, Emb]
        i_emb_exp = item_emb.unsqueeze(2)

        stack_list = [u_emb_exp, i_emb_exp]

        if feat_emb_tensor is not None:
            stack_list.append(feat_emb_tensor)

        if ctx_emb_tensor is not None:
            c_emb_exp = ctx_emb_tensor.unsqueeze(1).expand(-1, pad_seq, -1, -1)
            stack_list.append(c_emb_exp)

        # Concatenate on Field dimension (dim=2)
        stack = torch.cat(stack_list, dim=2)

        # Flatten to process the whole batch together
        total_rows = batch_size * pad_seq
        stack_flat = stack.view(total_rows, self.num_fields, self.embedding_size)

        # Forward CIN
        cin_out = self.cin(stack_flat)
        cin_s = self.cin_linear(cin_out).squeeze(-1)

        # Forward MLP
        dnn_in = stack_flat.view(total_rows, -1)
        dnn_out = self.mlp_layers(dnn_in)
        dnn_s = self.dnn_linear(dnn_out).squeeze(-1)

        net_scores = (cin_s + dnn_s).view(batch_size, pad_seq)

        return linear_pred + net_scores

predict(user_indices, *args, item_indices=None, contexts=None, **kwargs)

Prediction using the xDeepFM model.

Parameters:

Name Type Description Default
user_indices Tensor

The batch of user indices.

required
*args Any

List of arguments.

()
item_indices Optional[Tensor]

The batch of item indices. If None, full prediction will be produced.

None
contexts Optional[Tensor]

The batch of contexts.

None
**kwargs Any

The dictionary of keyword arguments.

{}

Returns:

Name Type Description
Tensor Tensor

The score matrix {user x item}.

Source code in warprec/recommenders/context_aware_recommender/xdeepfm.py
def predict(
    self,
    user_indices: Tensor,
    *args: Any,
    item_indices: Optional[Tensor] = None,
    contexts: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tensor:
    """Prediction using the xDeepFM model.

    Args:
        user_indices (Tensor): The batch of user indices.
        *args (Any): List of arguments.
        item_indices (Optional[Tensor]): The batch of item indices. If None,
            full prediction will be produced.
        contexts (Optional[Tensor]): The batch of contexts.
        **kwargs (Any): The dictionary of keyword arguments.

    Returns:
        Tensor: The score matrix {user x item}.
    """
    batch_size = user_indices.size(0)

    # Linear Parts (User + Context)
    fixed_linear = self.global_bias + self.user_bias(user_indices).squeeze(-1)

    # Contexts
    ctx_emb_tensor = self._get_context_embeddings(contexts)

    if contexts is not None and self.context_dims:
        # Linear
        global_ctx = contexts + self.context_offsets
        ctx_bias = self.merged_context_bias(global_ctx).sum(dim=1).squeeze(-1)
        fixed_linear += ctx_bias

    # Embeddings (User)
    u_emb = self.user_embedding(user_indices)

    if item_indices is None:
        # Case 'full'
        preds_list = []

        for start in range(0, self.n_items, self.block_size):
            end = min(start + self.block_size, self.n_items)
            current_block_len = end - start

            items_block = torch.arange(start, end, device=self.device)

            # Item Embeddings and Bias
            item_emb_block = self.item_embedding(items_block)
            item_bias_block = self.item_bias(items_block).squeeze(-1)

            # Feature Embeddings and Bias
            feat_emb_block_tensor = self._get_feature_embeddings(items_block)
            feat_bias_block = self._get_feature_bias(items_block)

            # Linear Part
            linear_pred = (
                fixed_linear.unsqueeze(1)
                + item_bias_block.unsqueeze(0)
                + feat_bias_block.unsqueeze(0)
            )

            # Expand Item to match batch size
            item_emb_expanded = item_emb_block.unsqueeze(0).expand(
                batch_size, -1, -1
            )

            # Compute scores efficiently
            net_scores = self._compute_network_scores(
                u_emb,
                item_emb_expanded,
                feat_emb_block_tensor,
                ctx_emb_tensor,
                batch_size,
                current_block_len,
            )

            preds_list.append(linear_pred + net_scores)

        return torch.cat(preds_list, dim=1)

    # Case 'sampled'
    pad_seq = item_indices.size(1)

    item_emb = self.item_embedding(item_indices)
    item_bias = self.item_bias(item_indices).squeeze(-1)

    feat_emb_tensor = self._get_feature_embeddings(item_indices)
    feat_bias = self._get_feature_bias(item_indices)

    # Linear
    linear_pred = fixed_linear.unsqueeze(1) + item_bias + feat_bias

    # Stack Construction
    # User: [Batch, 1, 1, Emb] -> [Batch, Seq, 1, Emb]
    u_emb_exp = u_emb.unsqueeze(1).unsqueeze(2).expand(-1, pad_seq, -1, -1)

    # Item: [Batch, Seq, Emb] -> [Batch, Seq, 1, Emb]
    i_emb_exp = item_emb.unsqueeze(2)

    stack_list = [u_emb_exp, i_emb_exp]

    if feat_emb_tensor is not None:
        stack_list.append(feat_emb_tensor)

    if ctx_emb_tensor is not None:
        c_emb_exp = ctx_emb_tensor.unsqueeze(1).expand(-1, pad_seq, -1, -1)
        stack_list.append(c_emb_exp)

    # Concatenate on Field dimension (dim=2)
    stack = torch.cat(stack_list, dim=2)

    # Flatten to process the whole batch together
    total_rows = batch_size * pad_seq
    stack_flat = stack.view(total_rows, self.num_fields, self.embedding_size)

    # Forward CIN
    cin_out = self.cin(stack_flat)
    cin_s = self.cin_linear(cin_out).squeeze(-1)

    # Forward MLP
    dnn_in = stack_flat.view(total_rows, -1)
    dnn_out = self.mlp_layers(dnn_in)
    dnn_s = self.dnn_linear(dnn_out).squeeze(-1)

    net_scores = (cin_s + dnn_s).view(batch_size, pad_seq)

    return linear_pred + net_scores