Skip to content

Diversity Metrics - API Reference

Auto-generated documentation for diversity metric classes.

warprec.evaluation.metrics.diversity.gini_index.Gini

Bases: TopKMetric

The Gini index metric measures the inequality in the distribution of recommended items, computed on a per-user basis and averaged over users. This implementation accounts for items that were never recommended by applying an offset.

Attributes:

Name Type Description
item_counts Tensor

Tensor to store the recommendation counts for each item.

free_norm Tensor

Total number of recommendations made (accumulated per user).

num_items int

Total number of items in the catalog, inferred from the prediction tensor.

Parameters:

Name Type Description Default
k int

The cutoff for recommendations.

required
num_items int

Number of items in the training set.

required
dist_sync_on_step bool

Torchmetrics parameter.

False
**kwargs Any

The keyword argument dictionary.

{}
Source code in warprec/evaluation/metrics/diversity/gini_index.py
@metric_registry.register("Gini")
class Gini(TopKMetric):
    """The Gini index metric measures the inequality in the distribution of recommended items,
    computed on a per-user basis and averaged over users. This implementation accounts
    for items that were never recommended by applying an offset.

    Attributes:
        item_counts (Tensor): Tensor to store the recommendation counts for each item.
        free_norm (Tensor): Total number of recommendations made (accumulated per user).
        num_items (int): Total number of items in the catalog, inferred from the prediction tensor.

    Args:
        k (int): The cutoff for recommendations.
        num_items (int): Number of items in the training set.
        dist_sync_on_step (bool): Torchmetrics parameter.
        **kwargs (Any): The keyword argument dictionary.
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {MetricBlock.TOP_K_INDICES}

    item_counts: Tensor
    free_norm: Tensor
    num_items: int

    def __init__(
        self,
        k: int,
        num_items: int,
        dist_sync_on_step: bool = False,
        **kwargs: Any,
    ):
        super().__init__(k, dist_sync_on_step)
        self.num_items = num_items
        self.add_state(
            "item_counts", default=torch.zeros(self.num_items), dist_reduce_fx="sum"
        )
        # Accumulate the total number of recommendations given (free_norm)
        self.add_state("free_norm", default=torch.tensor(0.0), dist_reduce_fx="sum")

    def update(self, preds: Tensor, **kwargs: Any):
        # Retrieve top_k_indices from kwargs
        top_k_indices = kwargs.get(f"top_{self.k}_indices")

        # Remap top_k_indices to global
        item_indices = kwargs.get("item_indices")
        top_k_indices = self.remap_indices(top_k_indices, item_indices)

        batch_size = top_k_indices.size(0)
        self.free_norm += torch.tensor(batch_size * self.k, dtype=torch.float)

        # Flatten the indices and update item_counts
        flat_indices = top_k_indices.flatten()

        # Ensure indices are within bounds (safety check)
        flat_indices = flat_indices[flat_indices < self.num_items]

        batch_counts = torch.bincount(flat_indices, minlength=self.num_items)
        self.item_counts += batch_counts.to(self.item_counts)

    def compute(self):
        # Consider only items that have been recommended at least once
        recommended_counts = self.item_counts[self.item_counts > 0].float()

        if (
            recommended_counts.numel() == 0
            or self.num_items == 0
            or self.free_norm == 0
        ):
            return {self.name: torch.tensor(0.0)}

        n_rec_items = recommended_counts.numel()
        sorted_counts, _ = torch.sort(recommended_counts)

        # Offset to account for items never recommended
        offset = self.num_items - n_rec_items
        j = torch.arange(
            n_rec_items, dtype=sorted_counts.dtype, device=sorted_counts.device
        )

        contributions = (2 * (j + offset + 1) - self.num_items - 1) * (
            sorted_counts / self.free_norm
        )

        # Sum contributions and normalize
        gini_index = (torch.sum(contributions) / (self.num_items - 1)).item()
        return {self.name: gini_index}

warprec.evaluation.metrics.diversity.shannon_entropy.ShannonEntropy

Bases: TopKMetric

Shannon Entropy measures the diversity of recommendations by calculating the information entropy over item recommendation frequencies.

Attributes:

Name Type Description
item_counts Tensor

Cumulative count of each item's recommendations

users Tensor

Total number of users evaluated

Parameters:

Name Type Description Default
k int

Recommendation list cutoff

required
num_items int

Number of items in the training set.

required
dist_sync_on_step bool

Torchmetrics parameter.

False
**kwargs Any

The keyword argument dictionary.

{}
Source code in warprec/evaluation/metrics/diversity/shannon_entropy.py
@metric_registry.register("ShannonEntropy")
class ShannonEntropy(TopKMetric):
    """Shannon Entropy measures the diversity of recommendations by calculating
    the information entropy over item recommendation frequencies.

    Attributes:
        item_counts (Tensor): Cumulative count of each item's recommendations
        users (Tensor): Total number of users evaluated

    Args:
        k (int): Recommendation list cutoff
        num_items (int): Number of items in the training set.
        dist_sync_on_step (bool): Torchmetrics parameter.
        **kwargs (Any): The keyword argument dictionary.
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {MetricBlock.TOP_K_INDICES}

    item_counts: Tensor
    users: Tensor

    def __init__(
        self,
        k: int,
        num_items: int,
        dist_sync_on_step: bool = False,
        **kwargs: Any,
    ):
        super().__init__(k, dist_sync_on_step)
        self.num_items = num_items
        self.add_state(
            "item_counts", default=torch.zeros(self.num_items), dist_reduce_fx="sum"
        )
        self.add_state("total_recs", default=torch.tensor(0.0), dist_reduce_fx="sum")

    def update(self, preds: Tensor, **kwargs: Any):
        top_k_indices = kwargs.get(f"top_{self.k}_indices")
        item_indices = kwargs.get("item_indices")

        # Remap top_k_indices to global
        item_indices = kwargs.get("item_indices")
        top_k_indices = self.remap_indices(top_k_indices, item_indices)

        # Flatten recommendations and count occurrences
        flattened = top_k_indices.flatten().long()

        # Safety check for bounds
        flattened = flattened[flattened < self.num_items]

        # Update state
        self.item_counts += torch.bincount(flattened, minlength=self.num_items)
        self.total_recs += flattened.numel()

    def compute(self):
        # Avoid division by zero
        if self.total_recs == 0:
            return {self.name: torch.tensor(0.0)}

        # Calculate probability distribution
        probs = self.item_counts / self.total_recs

        # Filter out zero probabilities to avoid log(0)
        probs = probs[probs > 0]

        # Compute entropy
        shannon_entropy = -torch.sum(probs * torch.log(probs)).item()
        return {self.name: shannon_entropy}

warprec.evaluation.metrics.diversity.srecall.SRecall

Bases: UserAverageTopKMetric

Subtopic Recall (SRecall) metric for evaluating recommender systems.

It measures the proportion of a user's relevant features (or subtopics) that are present among the top-k recommended items. A higher value indicates that the recommendations cover a wider variety of the user's interests (features/subtopics).

Attributes:

Name Type Description
feature_lookup Tensor

The item feature lookup tensor.

Parameters:

Name Type Description Default
k int

The cutoff for recommendations.

required
num_users int

Number of users in the training set.

required
feature_lookup Tensor

A tensor containing the features associated with each item. Tensor shape is expected to be [num_items, num_features].

required
dist_sync_on_step bool

Torchmetrics parameter for distributed synchronization. Defaults to False.

False
**kwargs Any

Additional keyword arguments dictionary.

{}
Source code in warprec/evaluation/metrics/diversity/srecall.py
@metric_registry.register("SRecall")
class SRecall(UserAverageTopKMetric):
    r"""Subtopic Recall (SRecall) metric for evaluating recommender systems.

    It measures the proportion of a user's relevant features (or subtopics) that are present
    among the top-k recommended items. A higher value indicates that the recommendations
    cover a wider variety of the user's interests (features/subtopics).

    Attributes:
        feature_lookup (Tensor): The item feature lookup tensor.

    Args:
        k (int): The cutoff for recommendations.
        num_users (int): Number of users in the training set.
        feature_lookup (Tensor): A tensor containing the features associated with each item.
            Tensor shape is expected to be [num_items, num_features].
        dist_sync_on_step (bool): Torchmetrics parameter for distributed synchronization. Defaults to `False`.
        **kwargs (Any): Additional keyword arguments dictionary.
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
        MetricBlock.VALID_USERS,
        MetricBlock.TOP_K_INDICES,
        MetricBlock.TOP_K_BINARY_RELEVANCE,
    }

    feature_lookup: Tensor

    def __init__(
        self,
        k: int,
        num_users: int,
        feature_lookup: Tensor,
        dist_sync_on_step: bool = False,
        **kwargs: Any,
    ):
        super().__init__(k=k, num_users=num_users, dist_sync_on_step=dist_sync_on_step)

        # Add feature lookup as buffer
        self.register_buffer("feature_lookup", feature_lookup)

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Tensor, **kwargs: Any
    ) -> Tensor:
        top_k_indices = kwargs.get(f"top_{self.k}_indices")
        item_indices = kwargs.get("item_indices")

        # Handle sampled item indices if provided
        if item_indices is not None:
            # We subset the feature lookup to match the batch items
            batch_features = self.feature_lookup[
                item_indices
            ]  # [batch, num_samples, n_feats]

            # For Top-K, we need to map indices to gather features
            top_k_features = torch.gather(
                batch_features,
                1,
                top_k_indices.unsqueeze(-1).expand(-1, -1, batch_features.size(-1)),
            )  # [batch, k, n_feats]

        else:
            batch_features = self.feature_lookup.unsqueeze(0)  # [1, num_items, n_feats]
            top_k_features = self.feature_lookup[top_k_indices]  # [batch, k, n_feats]

        # Denominator: Unique features in ALL Relevant items
        relevant_mask = (target > 0).unsqueeze(-1)  # [batch, num_items, 1]

        # Mask features that are not relevant
        relevant_features_batch = batch_features * relevant_mask

        # Count unique features: Sum over items -> if > 0, feature is present
        # [batch, n_feats]
        features_present_in_relevant = (relevant_features_batch.sum(dim=1) > 0).float()

        # Total unique relevant features per user
        denominator = features_present_in_relevant.sum(dim=1)  # [batch]

        # Numerator: Unique features in Top-K AND Relevant items
        top_k_rel_mask = (top_k_rel > 0).unsqueeze(-1)  # [batch, k, 1]

        # Mask features of Top-K items that are NOT relevant
        relevant_top_k_features = top_k_features * top_k_rel_mask

        # Count unique features
        features_present_in_top_k = (relevant_top_k_features.sum(dim=1) > 0).float()

        # Total unique relevant features retrieved per user
        numerator = features_present_in_top_k.sum(dim=1)  # [batch]

        # Compute Ratio
        # Handle division by zero (users with no relevant items)
        return torch.where(
            denominator > 0,
            numerator / denominator,
            torch.tensor(0.0, device=preds.device),
        )