Skip to content

Accuracy Metrics - API Reference

Auto-generated documentation for accuracy metric classes.

warprec.evaluation.metrics.accuracy.auc.AUC

Bases: BaseMetric

Computes Area Under the ROC Curve (AUC)

Attributes:

Name Type Description
total_area Tensor

The accumulated area under the curve.

total_positives Tensor

The accumulated number of positive samples.

Parameters:

Name Type Description Default
num_items int

Number of items in the training set.

required
*args Any

The argument list.

()
dist_sync_on_step bool

Torchmetrics parameter.

False
**kwargs Any

The keyword argument dictionary.

{}
Source code in warprec/evaluation/metrics/accuracy/auc.py
@metric_registry.register("AUC")
class AUC(BaseMetric):
    """Computes Area Under the ROC Curve (AUC)

    Attributes:
        total_area (Tensor): The accumulated area under the curve.
        total_positives (Tensor): The accumulated number of positive samples.

    Args:
        num_items (int): Number of items in the training set.
        *args (Any): The argument list.
        dist_sync_on_step (bool): Torchmetrics parameter.
        **kwargs (Any): The keyword argument dictionary.
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
    }

    total_area: Tensor
    total_positives: Tensor

    def __init__(
        self,
        num_items: int,
        *args: Any,
        dist_sync_on_step: bool = False,
        **kwargs: Any,
    ):
        super().__init__(dist_sync_on_step=dist_sync_on_step)
        self.num_items = num_items
        self.add_state("total_area", default=torch.tensor(0.0), dist_reduce_fx="sum")
        self.add_state(
            "total_positives", default=torch.tensor(0.0), dist_reduce_fx="sum"
        )

    def update(self, preds: Tensor, **kwargs: Any):
        target = kwargs.get("binary_relevance", torch.zeros_like(preds))

        # Compute area and positives
        area, positives = self.compute_area_stats(preds, target, self.num_items)

        # Accumulate
        self.total_area += area.sum()
        self.total_positives += positives.sum()

    def compute(self):
        score = (
            self.total_area / self.total_positives
            if self.total_positives > 0
            else torch.tensor(0.0)
        )
        return {self.name: score.item()}

warprec.evaluation.metrics.accuracy.f1.F1

Bases: TopKMetric

The F1@k metric combines precision and recall at k, providing a harmonic mean between the two to evaluate the relevance of the top-k recommended items.

Attributes:

Name Type Description
metric_instance_1 BaseMetric

First metric to use inside F1-score computation.

metric_instance_2 BaseMetric

Second metric to use inside F1-score computation.

Parameters:

Name Type Description Default
k int

The number of top recommendations to consider (cutoff).

required
num_users int

Number of users in the training set.

required
num_items int

Number of items in the training set.

required
*args Any

Additional arguments to pass to the parent class.

()
beta float

The weight of recall in the harmonic mean. Default is 1.0.

1.0
dist_sync_on_step bool

Torchmetrics parameter.

False
metric_1 str

The name of the first metric. Defaults to Precision.

'Precision'
metric_2 str

The name of the second metric. Defaults to Recall.

'Recall'
**kwargs Any

Additional keyword arguments to pass to the parent class.

{}
Source code in warprec/evaluation/metrics/accuracy/f1.py
@metric_registry.register("F1")
class F1(TopKMetric):
    """The F1@k metric combines precision and recall at k, providing a harmonic mean
    between the two to evaluate the relevance of the top-k recommended items.

    Attributes:
        metric_instance_1 (BaseMetric): First metric to use inside F1-score computation.
        metric_instance_2 (BaseMetric): Second metric to use inside F1-score computation.

    Args:
        k (int): The number of top recommendations to consider (cutoff).
        num_users (int): Number of users in the training set.
        num_items (int): Number of items in the training set.
        *args (Any): Additional arguments to pass to the parent class.
        beta (float): The weight of recall in the harmonic mean. Default is 1.0.
        dist_sync_on_step (bool): Torchmetrics parameter.
        metric_1 (str): The name of the first metric. Defaults to Precision.
        metric_2 (str): The name of the second metric. Defaults to Recall.
        **kwargs (Any): Additional keyword arguments to pass to the parent class.
    """

    metric_instance_1: BaseMetric
    metric_instance_2: BaseMetric

    def __init__(
        self,
        k: int,
        num_users: int,
        num_items: int,
        *args: Any,
        beta: float = 1.0,
        dist_sync_on_step: bool = False,
        metric_1: str = "Precision",
        metric_2: str = "Recall",
        **kwargs: Any,
    ):
        super().__init__(k, dist_sync_on_step)
        self.num_users = num_users
        self.beta = beta
        self.metric_1 = metric_1
        self.metric_2 = metric_2

        # Set up metrics
        self.metric_instance_1 = metric_registry.get(
            metric_1,
            k=k,
            num_users=num_users,
            num_items=num_items,
            dist_sync_on_step=dist_sync_on_step,
            **kwargs,
        )
        self.metric_instance_2 = metric_registry.get(
            metric_2,
            k=k,
            num_users=num_users,
            num_items=num_items,
            dist_sync_on_step=dist_sync_on_step,
            **kwargs,
        )

        # Update needed blocks to be the union of the blocks
        # of the two metrics
        self._REQUIRED_COMPONENTS = (
            self.metric_instance_1._REQUIRED_COMPONENTS
            | self.metric_instance_2._REQUIRED_COMPONENTS
        )

    def update(self, preds: Tensor, user_indices: Tensor, **kwargs: Any):
        """Updates the metric state with the new batch of predictions."""
        # Update first metric
        self.metric_instance_1.update(preds, user_indices, **kwargs)

        # Update second metric
        self.metric_instance_2.update(preds, user_indices, **kwargs)

    def compute(self):
        """Computes the F1 score using the two custom metrics."""
        # Get scores from both metrics
        score_1 = self.metric_instance_1.compute().get(
            self.metric_1, torch.zeros(self.num_users)
        )
        score_2 = self.metric_instance_2.compute().get(
            self.metric_2, torch.zeros(self.num_users)
        )

        # Compute the final F1 score
        f1_score = (
            (1 + self.beta**2)
            * (score_1 * score_2)
            / (self.beta**2 * score_1 + score_2)
        ).nan_to_num(0)
        return {self.name: f1_score}

    @property
    def name(self):
        """The name of the metric customized based on the metrics compared."""
        if self.metric_1 == "Precision" and self.metric_2 == "Recall":
            return self.__class__.__name__
        return f"F1[{self.metric_1}, {self.metric_2}]"

name property

The name of the metric customized based on the metrics compared.

compute()

Computes the F1 score using the two custom metrics.

Source code in warprec/evaluation/metrics/accuracy/f1.py
def compute(self):
    """Computes the F1 score using the two custom metrics."""
    # Get scores from both metrics
    score_1 = self.metric_instance_1.compute().get(
        self.metric_1, torch.zeros(self.num_users)
    )
    score_2 = self.metric_instance_2.compute().get(
        self.metric_2, torch.zeros(self.num_users)
    )

    # Compute the final F1 score
    f1_score = (
        (1 + self.beta**2)
        * (score_1 * score_2)
        / (self.beta**2 * score_1 + score_2)
    ).nan_to_num(0)
    return {self.name: f1_score}

update(preds, user_indices, **kwargs)

Updates the metric state with the new batch of predictions.

Source code in warprec/evaluation/metrics/accuracy/f1.py
def update(self, preds: Tensor, user_indices: Tensor, **kwargs: Any):
    """Updates the metric state with the new batch of predictions."""
    # Update first metric
    self.metric_instance_1.update(preds, user_indices, **kwargs)

    # Update second metric
    self.metric_instance_2.update(preds, user_indices, **kwargs)

warprec.evaluation.metrics.accuracy.gauc.GAUC

Bases: UserAverageTopKMetric

Computes Group Area Under the ROC Curve (GAUC) using the following approach:

Parameters:

Name Type Description Default
num_items int

Number of items in the training set.

required
num_users int

Number of users in the training set.

required
*args Any

The argument list.

()
dist_sync_on_step bool

Torchmetrics parameter.

False
**kwargs Any

The keyword argument dictionary.

{}
Source code in warprec/evaluation/metrics/accuracy/gauc.py
@metric_registry.register("GAUC")
class GAUC(UserAverageTopKMetric):
    """Computes Group Area Under the ROC Curve (GAUC) using the following approach:

    Args:
        num_items (int): Number of items in the training set.
        num_users (int): Number of users in the training set.
        *args (Any): The argument list.
        dist_sync_on_step (bool): Torchmetrics parameter.
        **kwargs (Any): The keyword argument dictionary.
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
    }

    def __init__(
        self,
        num_items: int,
        num_users: int,
        *args: Any,
        dist_sync_on_step: bool = False,
        **kwargs: Any,
    ):
        super().__init__(k=0, num_users=num_users, dist_sync_on_step=dist_sync_on_step)
        self.num_items = num_items

    def unpack_inputs(self, preds: Tensor, **kwargs: Any) -> Tuple[Tensor, Tensor, Any]:
        target = kwargs.get("binary_relevance", torch.zeros_like(preds))
        users = kwargs.get("valid_users", self.valid_users(target))
        return target, users, None

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Any, **kwargs: Any
    ) -> Tensor:
        # Compute area and positives per user
        area, positives = self.compute_area_stats(preds, target, self.num_items, k=None)

        # GAUC = total_area / total_positives
        return torch.where(
            positives > 0, area / positives, torch.tensor(0.0, device=preds.device)
        )

warprec.evaluation.metrics.accuracy.hit_rate.HitRate

Bases: UserAverageTopKMetric

The HitRate@k metric counts the number of users for which the model retrieved at least one item.

Source code in warprec/evaluation/metrics/accuracy/hit_rate.py
@metric_registry.register("HitRate")
class HitRate(UserAverageTopKMetric):
    """The HitRate@k metric counts the number of users for which
    the model retrieved at least one item.
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
        MetricBlock.VALID_USERS,
        MetricBlock.TOP_K_BINARY_RELEVANCE,
    }

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Tensor, **kwargs: Any
    ) -> Tensor:
        return (top_k_rel.sum(dim=1) > 0).float()

warprec.evaluation.metrics.accuracy.lauc.LAUC

Bases: UserAverageTopKMetric

Computes Limited Under the ROC Curve (LAUC) using the following approach:

Parameters:

Name Type Description Default
k int

The cutoff.

required
num_users int

Number of users in the training set.

required
num_items int

Number of items in the training set.

required
*args Any

The argument list.

()
dist_sync_on_step bool

Torchmetrics parameter.

False
**kwargs Any

The keyword argument dictionary.

{}
Source code in warprec/evaluation/metrics/accuracy/lauc.py
@metric_registry.register("LAUC")
class LAUC(UserAverageTopKMetric):
    """Computes Limited Under the ROC Curve (LAUC) using the following approach:

    Args:
        k (int): The cutoff.
        num_users (int): Number of users in the training set.
        num_items (int): Number of items in the training set.
        *args (Any): The argument list.
        dist_sync_on_step (bool): Torchmetrics parameter.
        **kwargs (Any): The keyword argument dictionary.
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
        MetricBlock.VALID_USERS,
    }

    def __init__(
        self,
        k: int,
        num_users: int,
        num_items: int,
        *args: Any,
        dist_sync_on_step: bool = False,
        **kwargs: Any,
    ):
        super().__init__(k=k, num_users=num_users, **kwargs)
        self.num_items = num_items

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Any, **kwargs: Any
    ) -> Tensor:
        # Compute area and positives of sliced predictions
        area, _ = self.compute_area_stats(preds, target, self.num_items, k=self.k)

        # Normalization by min(positives, k)
        total_positives = target.sum(dim=1)
        normalization = torch.minimum(
            total_positives, torch.tensor(self.k, device=preds.device)
        )

        # LAUC = total_area / min(positives, k)
        return torch.where(
            normalization > 0,
            area / normalization,
            torch.tensor(0.0, device=preds.device),
        )

warprec.evaluation.metrics.accuracy.map.MAP

Bases: UserAverageTopKMetric

Mean Average Precision (MAP) at K.

Source code in warprec/evaluation/metrics/accuracy/map.py
@metric_registry.register("MAP")
class MAP(UserAverageTopKMetric):
    """Mean Average Precision (MAP) at K."""

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
        MetricBlock.VALID_USERS,
        MetricBlock.TOP_K_BINARY_RELEVANCE,
    }

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Tensor, **kwargs: Any
    ) -> Tensor:
        precision_at_i = top_k_rel.cumsum(dim=1) / torch.arange(
            1, self.k + 1, device=top_k_rel.device
        )  # [batch_size, k]
        normalization = torch.minimum(
            target.sum(dim=1),
            torch.tensor(self.k, dtype=target.dtype, device=target.device),
        )  # [batch_size]

        # Compute AP per user
        return torch.where(
            normalization > 0,
            (precision_at_i * top_k_rel).sum(dim=1) / normalization,
            torch.tensor(0.0, device=self._device),
        )  # [batch_size]

warprec.evaluation.metrics.accuracy.mar.MAR

Bases: UserAverageTopKMetric

Mean Average Recall (MAR) at K.

Source code in warprec/evaluation/metrics/accuracy/mar.py
@metric_registry.register("MAR")
class MAR(UserAverageTopKMetric):
    """Mean Average Recall (MAR) at K."""

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
        MetricBlock.VALID_USERS,
        MetricBlock.TOP_K_BINARY_RELEVANCE,
    }

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Tensor, **kwargs: Any
    ) -> Tensor:
        recall_at_i = top_k_rel.cumsum(dim=1) / target.sum(dim=1).unsqueeze(1).clamp(
            min=1
        )  # [batch_size, k]
        normalization = torch.minimum(
            target.sum(dim=1),
            torch.tensor(self.k, dtype=target.dtype, device=target.device),
        )  # [batch_size]

        # Compute AR per user
        return torch.where(
            normalization > 0,
            (recall_at_i * top_k_rel).sum(dim=1) / normalization,
            torch.tensor(0.0, device=self._device),
        )  # [batch_size]

warprec.evaluation.metrics.accuracy.mrr.MRR

Bases: UserAverageTopKMetric

Mean Reciprocal Rank (MRR) at K. MRR measures the position of the first relevant item in the recommendation list.

Source code in warprec/evaluation/metrics/accuracy/mrr.py
@metric_registry.register("MRR")
class MRR(UserAverageTopKMetric):
    """Mean Reciprocal Rank (MRR) at K. MRR measures the position of the first
    relevant item in the recommendation list."""

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
        MetricBlock.VALID_USERS,
        MetricBlock.TOP_K_BINARY_RELEVANCE,
    }

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Tensor, **kwargs: Any
    ) -> Tensor:
        # Find the first relevant item's rank
        reciprocal_ranks = (top_k_rel.argmax(dim=1) + 1).float().reciprocal()
        reciprocal_ranks[top_k_rel.sum(dim=1) == 0] = 0  # Assign 0 if no relevant items

        return reciprocal_ranks

warprec.evaluation.metrics.accuracy.ndcg.nDCG

Bases: UserAverageTopKMetric

The nDCG@k metric is defined as the rapport of the DCG@k and the IDCG@k.

The DCG@k represent the Discounted Cumulative Gain, which measures the gain of the items retrieved.

The IDCG@k represent the Ideal Discounted Cumulative Gain, which measures the maximum gain possible obtainable by a perfect model.

Source code in warprec/evaluation/metrics/accuracy/ndcg.py
@metric_registry.register("nDCG")
class nDCG(UserAverageTopKMetric):
    """The nDCG@k metric is defined as the rapport of the DCG@k and the IDCG@k.

    The DCG@k represent the Discounted Cumulative Gain,
        which measures the gain of the items retrieved.

    The IDCG@k represent the Ideal Discounted Cumulative Gain,
        which measures the maximum gain possible
        obtainable by a perfect model.
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.DISCOUNTED_RELEVANCE,
        MetricBlock.VALID_USERS,
        MetricBlock.TOP_K_DISCOUNTED_RELEVANCE,
    }

    def unpack_inputs(
        self, preds: Tensor, **kwargs: Any
    ) -> Tuple[Tensor, Tensor, Tensor]:
        target = kwargs.get("discounted_relevance", torch.zeros_like(preds))
        users = kwargs.get("valid_users", self.valid_users(target))
        top_k_rel = kwargs.get(
            f"top_{self.k}_discounted_relevance",
            self.top_k_relevance(preds, target, self.k),
        )
        return target, users, top_k_rel

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Tensor, **kwargs: Any
    ) -> Tensor:
        ideal_rel = torch.topk(target, self.k, dim=1).values
        dcg_score = self.dcg(top_k_rel)
        idcg_score = self.dcg(ideal_rel).clamp(min=1e-10)

        return (dcg_score / idcg_score).nan_to_num(0)

warprec.evaluation.metrics.accuracy.ndcg.nDCGRendle2020

Bases: UserAverageTopKMetric

Normalized Discounted Cumulative Gain (nDCG) metric for evaluating recommender systems.

It measures the ranking quality by considering the position of relevant items, giving higher scores to relevant items that appear earlier in the recommendation list. This implementation calculates nDCG@k using binary relevance (0 or 1).

Source code in warprec/evaluation/metrics/accuracy/ndcg.py
@metric_registry.register("nDCGRendle2020")
class nDCGRendle2020(UserAverageTopKMetric):
    """Normalized Discounted Cumulative Gain (nDCG) metric for evaluating recommender systems.

    It measures the ranking quality by considering the position of relevant items,
    giving higher scores to relevant items that appear earlier in the recommendation list.
    This implementation calculates nDCG@k using *binary relevance* (0 or 1).
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
        MetricBlock.VALID_USERS,
        MetricBlock.TOP_K_BINARY_RELEVANCE,
    }

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Tensor, **kwargs: Any
    ) -> Tensor:
        ideal_rel = torch.topk(target, self.k, dim=1).values
        dcg_score = self.dcg(top_k_rel)
        idcg_score = self.dcg(ideal_rel).clamp(min=1e-10)

        return (dcg_score / idcg_score).nan_to_num(0)

warprec.evaluation.metrics.accuracy.precision.Precision

Bases: UserAverageTopKMetric

The Precision@k counts the number of item retrieved correctly, over the maximum number of possible retrieve items.

Source code in warprec/evaluation/metrics/accuracy/precision.py
@metric_registry.register("Precision")
class Precision(UserAverageTopKMetric):
    """The Precision@k counts the number of item retrieved correctly,
    over the maximum number of possible retrieve items.
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
        MetricBlock.VALID_USERS,
        MetricBlock.TOP_K_BINARY_RELEVANCE,
    }

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Tensor, **kwargs: Any
    ) -> Tensor:
        return top_k_rel.sum(dim=1).float() / self.k

warprec.evaluation.metrics.accuracy.recall.Recall

Bases: UserAverageTopKMetric

The Recall@k counts the number of item retrieve correctly, over the total number of relevant item in the ground truth.

Source code in warprec/evaluation/metrics/accuracy/recall.py
@metric_registry.register("Recall")
class Recall(UserAverageTopKMetric):
    """The Recall@k counts the number of item retrieve correctly,
    over the total number of relevant item in the ground truth.
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
        MetricBlock.VALID_USERS,
        MetricBlock.TOP_K_BINARY_RELEVANCE,
    }

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Tensor, **kwargs: Any
    ) -> Tensor:
        hits = top_k_rel.sum(dim=1).float()
        relevant = target.sum(dim=1).float()

        # Handle cases where there are no relevant items to avoid division by zero
        return torch.where(
            relevant > 0, hits / relevant, torch.tensor(0.0, device=preds.device)
        )