Accuracy Metrics - API Reference¶

Auto-generated documentation for accuracy metric classes.

`warprec.evaluation.metrics.accuracy.auc.AUC` ¶

Bases: BaseMetric

Computes Area Under the ROC Curve (AUC)

Attributes:

Name	Type	Description
`total_area`	`Tensor`	The accumulated area under the curve.
`total_positives`	`Tensor`	The accumulated number of positive samples.

Parameters:

Name	Type	Description	Default
`num_items`	`int`	Number of items in the training set.	required
`*args`	`Any`	The argument list.	`()`
`dist_sync_on_step`	`bool`	Torchmetrics parameter.	`False`
`**kwargs`	`Any`	The keyword argument dictionary.	`{}`

Source code in warprec/evaluation/metrics/accuracy/auc.py

@metric_registry.register("AUC")
class AUC(BaseMetric):
    """Computes Area Under the ROC Curve (AUC)

    Attributes:
        total_area (Tensor): The accumulated area under the curve.
        total_positives (Tensor): The accumulated number of positive samples.

    Args:
        num_items (int): Number of items in the training set.
        *args (Any): The argument list.
        dist_sync_on_step (bool): Torchmetrics parameter.
        **kwargs (Any): The keyword argument dictionary.
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
    }

    total_area: Tensor
    total_positives: Tensor

    def __init__(
        self,
        num_items: int,
        *args: Any,
        dist_sync_on_step: bool = False,
        **kwargs: Any,
    ):
        super().__init__(dist_sync_on_step=dist_sync_on_step)
        self.num_items = num_items
        self.add_state("total_area", default=torch.tensor(0.0), dist_reduce_fx="sum")
        self.add_state(
            "total_positives", default=torch.tensor(0.0), dist_reduce_fx="sum"
        )

    def update(self, preds: Tensor, **kwargs: Any):
        target = kwargs.get("binary_relevance", torch.zeros_like(preds))

        # Compute area and positives
        area, positives = self.compute_area_stats(preds, target, self.num_items)

        # Accumulate
        self.total_area += area.sum()
        self.total_positives += positives.sum()

    def compute(self):
        score = (
            self.total_area / self.total_positives
            if self.total_positives > 0
            else torch.tensor(0.0)
        )
        return {self.name: score.item()}

`warprec.evaluation.metrics.accuracy.f1.F1` ¶

Bases: TopKMetric

The F1@k metric combines precision and recall at k, providing a harmonic mean between the two to evaluate the relevance of the top-k recommended items.

Attributes:

Name	Type	Description
`metric_instance_1`	`BaseMetric`	First metric to use inside F1-score computation.
`metric_instance_2`	`BaseMetric`	Second metric to use inside F1-score computation.

Parameters:

Name	Type	Description	Default
`k`	`int`	The number of top recommendations to consider (cutoff).	required
`num_users`	`int`	Number of users in the training set.	required
`num_items`	`int`	Number of items in the training set.	required
`*args`	`Any`	Additional arguments to pass to the parent class.	`()`
`beta`	`float`	The weight of recall in the harmonic mean. Default is 1.0.	`1.0`
`dist_sync_on_step`	`bool`	Torchmetrics parameter.	`False`
`metric_1`	`str`	The name of the first metric. Defaults to Precision.	`'Precision'`
`metric_2`	`str`	The name of the second metric. Defaults to Recall.	`'Recall'`
`**kwargs`	`Any`	Additional keyword arguments to pass to the parent class.	`{}`

Source code in warprec/evaluation/metrics/accuracy/f1.py

@metric_registry.register("F1")
class F1(TopKMetric):
    """The F1@k metric combines precision and recall at k, providing a harmonic mean
    between the two to evaluate the relevance of the top-k recommended items.

    Attributes:
        metric_instance_1 (BaseMetric): First metric to use inside F1-score computation.
        metric_instance_2 (BaseMetric): Second metric to use inside F1-score computation.

    Args:
        k (int): The number of top recommendations to consider (cutoff).
        num_users (int): Number of users in the training set.
        num_items (int): Number of items in the training set.
        *args (Any): Additional arguments to pass to the parent class.
        beta (float): The weight of recall in the harmonic mean. Default is 1.0.
        dist_sync_on_step (bool): Torchmetrics parameter.
        metric_1 (str): The name of the first metric. Defaults to Precision.
        metric_2 (str): The name of the second metric. Defaults to Recall.
        **kwargs (Any): Additional keyword arguments to pass to the parent class.
    """

    metric_instance_1: BaseMetric
    metric_instance_2: BaseMetric

    def __init__(
        self,
        k: int,
        num_users: int,
        num_items: int,
        *args: Any,
        beta: float = 1.0,
        dist_sync_on_step: bool = False,
        metric_1: str = "Precision",
        metric_2: str = "Recall",
        **kwargs: Any,
    ):
        super().__init__(k, dist_sync_on_step)
        self.num_users = num_users
        self.beta = beta
        self.metric_1 = metric_1
        self.metric_2 = metric_2

        # Set up metrics
        self.metric_instance_1 = metric_registry.get(
            metric_1,
            k=k,
            num_users=num_users,
            num_items=num_items,
            dist_sync_on_step=dist_sync_on_step,
            **kwargs,
        )
        self.metric_instance_2 = metric_registry.get(
            metric_2,
            k=k,
            num_users=num_users,
            num_items=num_items,
            dist_sync_on_step=dist_sync_on_step,
            **kwargs,
        )

        # Update needed blocks to be the union of the blocks
        # of the two metrics
        self._REQUIRED_COMPONENTS = (
            self.metric_instance_1._REQUIRED_COMPONENTS
            | self.metric_instance_2._REQUIRED_COMPONENTS
        )

    def update(self, preds: Tensor, user_indices: Tensor, **kwargs: Any):
        """Updates the metric state with the new batch of predictions."""
        # Update first metric
        self.metric_instance_1.update(preds, user_indices, **kwargs)

        # Update second metric
        self.metric_instance_2.update(preds, user_indices, **kwargs)

    def compute(self):
        """Computes the F1 score using the two custom metrics."""
        # Get scores from both metrics
        score_1 = self.metric_instance_1.compute().get(
            self.metric_1, torch.zeros(self.num_users)
        )
        score_2 = self.metric_instance_2.compute().get(
            self.metric_2, torch.zeros(self.num_users)
        )

        # Compute the final F1 score
        f1_score = (
            (1 + self.beta**2)
            * (score_1 * score_2)
            / (self.beta**2 * score_1 + score_2)
        ).nan_to_num(0)
        return {self.name: f1_score}

    @property
    def name(self):
        """The name of the metric customized based on the metrics compared."""
        if self.metric_1 == "Precision" and self.metric_2 == "Recall":
            return self.__class__.__name__
        return f"F1[{self.metric_1}, {self.metric_2}]"

`name` `property` ¶

The name of the metric customized based on the metrics compared.

`compute()` ¶

Computes the F1 score using the two custom metrics.

Source code in warprec/evaluation/metrics/accuracy/f1.py

def compute(self):
    """Computes the F1 score using the two custom metrics."""
    # Get scores from both metrics
    score_1 = self.metric_instance_1.compute().get(
        self.metric_1, torch.zeros(self.num_users)
    )
    score_2 = self.metric_instance_2.compute().get(
        self.metric_2, torch.zeros(self.num_users)
    )

    # Compute the final F1 score
    f1_score = (
        (1 + self.beta**2)
        * (score_1 * score_2)
        / (self.beta**2 * score_1 + score_2)
    ).nan_to_num(0)
    return {self.name: f1_score}

`update(preds, user_indices, **kwargs)` ¶

Updates the metric state with the new batch of predictions.

Source code in warprec/evaluation/metrics/accuracy/f1.py

def update(self, preds: Tensor, user_indices: Tensor, **kwargs: Any):
    """Updates the metric state with the new batch of predictions."""
    # Update first metric
    self.metric_instance_1.update(preds, user_indices, **kwargs)

    # Update second metric
    self.metric_instance_2.update(preds, user_indices, **kwargs)

`warprec.evaluation.metrics.accuracy.gauc.GAUC` ¶

Bases: UserAverageTopKMetric

Computes Group Area Under the ROC Curve (GAUC) using the following approach:

Parameters:

Name	Type	Description	Default
`num_items`	`int`	Number of items in the training set.	required
`num_users`	`int`	Number of users in the training set.	required
`*args`	`Any`	The argument list.	`()`
`dist_sync_on_step`	`bool`	Torchmetrics parameter.	`False`
`**kwargs`	`Any`	The keyword argument dictionary.	`{}`

Source code in warprec/evaluation/metrics/accuracy/gauc.py

@metric_registry.register("GAUC")
class GAUC(UserAverageTopKMetric):
    """Computes Group Area Under the ROC Curve (GAUC) using the following approach:

    Args:
        num_items (int): Number of items in the training set.
        num_users (int): Number of users in the training set.
        *args (Any): The argument list.
        dist_sync_on_step (bool): Torchmetrics parameter.
        **kwargs (Any): The keyword argument dictionary.
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
    }

    def __init__(
        self,
        num_items: int,
        num_users: int,
        *args: Any,
        dist_sync_on_step: bool = False,
        **kwargs: Any,
    ):
        super().__init__(k=0, num_users=num_users, dist_sync_on_step=dist_sync_on_step)
        self.num_items = num_items

    def unpack_inputs(self, preds: Tensor, **kwargs: Any) -> Tuple[Tensor, Tensor, Any]:
        target = kwargs.get("binary_relevance", torch.zeros_like(preds))
        users = kwargs.get("valid_users", self.valid_users(target))
        return target, users, None

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Any, **kwargs: Any
    ) -> Tensor:
        # Compute area and positives per user
        area, positives = self.compute_area_stats(preds, target, self.num_items, k=None)

        # GAUC = total_area / total_positives
        return torch.where(
            positives > 0, area / positives, torch.tensor(0.0, device=preds.device)
        )

`warprec.evaluation.metrics.accuracy.hit_rate.HitRate` ¶

Bases: UserAverageTopKMetric

The HitRate@k metric counts the number of users for which the model retrieved at least one item.

Source code in warprec/evaluation/metrics/accuracy/hit_rate.py

@metric_registry.register("HitRate")
class HitRate(UserAverageTopKMetric):
    """The HitRate@k metric counts the number of users for which
    the model retrieved at least one item.
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
        MetricBlock.VALID_USERS,
        MetricBlock.TOP_K_BINARY_RELEVANCE,
    }

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Tensor, **kwargs: Any
    ) -> Tensor:
        return (top_k_rel.sum(dim=1) > 0).float()

`warprec.evaluation.metrics.accuracy.lauc.LAUC` ¶

Bases: UserAverageTopKMetric

Computes Limited Under the ROC Curve (LAUC) using the following approach:

Parameters:

Name	Type	Description	Default
`k`	`int`	The cutoff.	required
`num_users`	`int`	Number of users in the training set.	required
`num_items`	`int`	Number of items in the training set.	required
`*args`	`Any`	The argument list.	`()`
`dist_sync_on_step`	`bool`	Torchmetrics parameter.	`False`
`**kwargs`	`Any`	The keyword argument dictionary.	`{}`

Source code in warprec/evaluation/metrics/accuracy/lauc.py

@metric_registry.register("LAUC")
class LAUC(UserAverageTopKMetric):
    """Computes Limited Under the ROC Curve (LAUC) using the following approach:

    Args:
        k (int): The cutoff.
        num_users (int): Number of users in the training set.
        num_items (int): Number of items in the training set.
        *args (Any): The argument list.
        dist_sync_on_step (bool): Torchmetrics parameter.
        **kwargs (Any): The keyword argument dictionary.
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
        MetricBlock.VALID_USERS,
    }

    def __init__(
        self,
        k: int,
        num_users: int,
        num_items: int,
        *args: Any,
        dist_sync_on_step: bool = False,
        **kwargs: Any,
    ):
        super().__init__(k=k, num_users=num_users, **kwargs)
        self.num_items = num_items

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Any, **kwargs: Any
    ) -> Tensor:
        # Compute area and positives of sliced predictions
        area, _ = self.compute_area_stats(preds, target, self.num_items, k=self.k)

        # Normalization by min(positives, k)
        total_positives = target.sum(dim=1)
        normalization = torch.minimum(
            total_positives, torch.tensor(self.k, device=preds.device)
        )

        # LAUC = total_area / min(positives, k)
        return torch.where(
            normalization > 0,
            area / normalization,
            torch.tensor(0.0, device=preds.device),
        )

`warprec.evaluation.metrics.accuracy.map.MAP` ¶

Bases: UserAverageTopKMetric

Mean Average Precision (MAP) at K.

Source code in warprec/evaluation/metrics/accuracy/map.py

@metric_registry.register("MAP")
class MAP(UserAverageTopKMetric):
    """Mean Average Precision (MAP) at K."""

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
        MetricBlock.VALID_USERS,
        MetricBlock.TOP_K_BINARY_RELEVANCE,
    }

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Tensor, **kwargs: Any
    ) -> Tensor:
        precision_at_i = top_k_rel.cumsum(dim=1) / torch.arange(
            1, self.k + 1, device=top_k_rel.device
        )  # [batch_size, k]
        normalization = torch.minimum(
            target.sum(dim=1),
            torch.tensor(self.k, dtype=target.dtype, device=target.device),
        )  # [batch_size]

        # Compute AP per user
        return torch.where(
            normalization > 0,
            (precision_at_i * top_k_rel).sum(dim=1) / normalization,
            torch.tensor(0.0, device=self._device),
        )  # [batch_size]

`warprec.evaluation.metrics.accuracy.mar.MAR` ¶

Bases: UserAverageTopKMetric

Mean Average Recall (MAR) at K.

Source code in warprec/evaluation/metrics/accuracy/mar.py

@metric_registry.register("MAR")
class MAR(UserAverageTopKMetric):
    """Mean Average Recall (MAR) at K."""

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
        MetricBlock.VALID_USERS,
        MetricBlock.TOP_K_BINARY_RELEVANCE,
    }

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Tensor, **kwargs: Any
    ) -> Tensor:
        recall_at_i = top_k_rel.cumsum(dim=1) / target.sum(dim=1).unsqueeze(1).clamp(
            min=1
        )  # [batch_size, k]
        normalization = torch.minimum(
            target.sum(dim=1),
            torch.tensor(self.k, dtype=target.dtype, device=target.device),
        )  # [batch_size]

        # Compute AR per user
        return torch.where(
            normalization > 0,
            (recall_at_i * top_k_rel).sum(dim=1) / normalization,
            torch.tensor(0.0, device=self._device),
        )  # [batch_size]

`warprec.evaluation.metrics.accuracy.mrr.MRR` ¶

Bases: UserAverageTopKMetric

Mean Reciprocal Rank (MRR) at K. MRR measures the position of the first relevant item in the recommendation list.

Source code in warprec/evaluation/metrics/accuracy/mrr.py

@metric_registry.register("MRR")
class MRR(UserAverageTopKMetric):
    """Mean Reciprocal Rank (MRR) at K. MRR measures the position of the first
    relevant item in the recommendation list."""

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
        MetricBlock.VALID_USERS,
        MetricBlock.TOP_K_BINARY_RELEVANCE,
    }

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Tensor, **kwargs: Any
    ) -> Tensor:
        # Find the first relevant item's rank
        reciprocal_ranks = (top_k_rel.argmax(dim=1) + 1).float().reciprocal()
        reciprocal_ranks[top_k_rel.sum(dim=1) == 0] = 0  # Assign 0 if no relevant items

        return reciprocal_ranks

`warprec.evaluation.metrics.accuracy.ndcg.nDCG` ¶

Bases: UserAverageTopKMetric

The nDCG@k metric is defined as the rapport of the DCG@k and the IDCG@k.

The DCG@k represent the Discounted Cumulative Gain, which measures the gain of the items retrieved.

The IDCG@k represent the Ideal Discounted Cumulative Gain, which measures the maximum gain possible obtainable by a perfect model.

Source code in warprec/evaluation/metrics/accuracy/ndcg.py

@metric_registry.register("nDCG")
class nDCG(UserAverageTopKMetric):
    """The nDCG@k metric is defined as the rapport of the DCG@k and the IDCG@k.

    The DCG@k represent the Discounted Cumulative Gain,
        which measures the gain of the items retrieved.

    The IDCG@k represent the Ideal Discounted Cumulative Gain,
        which measures the maximum gain possible
        obtainable by a perfect model.
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.DISCOUNTED_RELEVANCE,
        MetricBlock.VALID_USERS,
        MetricBlock.TOP_K_DISCOUNTED_RELEVANCE,
    }

    def unpack_inputs(
        self, preds: Tensor, **kwargs: Any
    ) -> Tuple[Tensor, Tensor, Tensor]:
        target = kwargs.get("discounted_relevance", torch.zeros_like(preds))
        users = kwargs.get("valid_users", self.valid_users(target))
        top_k_rel = kwargs.get(
            f"top_{self.k}_discounted_relevance",
            self.top_k_relevance(preds, target, self.k),
        )
        return target, users, top_k_rel

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Tensor, **kwargs: Any
    ) -> Tensor:
        ideal_rel = torch.topk(target, self.k, dim=1).values
        dcg_score = self.dcg(top_k_rel)
        idcg_score = self.dcg(ideal_rel).clamp(min=1e-10)

        return (dcg_score / idcg_score).nan_to_num(0)

`warprec.evaluation.metrics.accuracy.ndcg.nDCGRendle2020` ¶

Bases: UserAverageTopKMetric

Normalized Discounted Cumulative Gain (nDCG) metric for evaluating recommender systems.

It measures the ranking quality by considering the position of relevant items, giving higher scores to relevant items that appear earlier in the recommendation list. This implementation calculates nDCG@k using binary relevance (0 or 1).

Source code in warprec/evaluation/metrics/accuracy/ndcg.py

@metric_registry.register("nDCGRendle2020")
class nDCGRendle2020(UserAverageTopKMetric):
    """Normalized Discounted Cumulative Gain (nDCG) metric for evaluating recommender systems.

    It measures the ranking quality by considering the position of relevant items,
    giving higher scores to relevant items that appear earlier in the recommendation list.
    This implementation calculates nDCG@k using *binary relevance* (0 or 1).
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
        MetricBlock.VALID_USERS,
        MetricBlock.TOP_K_BINARY_RELEVANCE,
    }

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Tensor, **kwargs: Any
    ) -> Tensor:
        ideal_rel = torch.topk(target, self.k, dim=1).values
        dcg_score = self.dcg(top_k_rel)
        idcg_score = self.dcg(ideal_rel).clamp(min=1e-10)

        return (dcg_score / idcg_score).nan_to_num(0)

`warprec.evaluation.metrics.accuracy.precision.Precision` ¶

Bases: UserAverageTopKMetric

The Precision@k counts the number of item retrieved correctly, over the maximum number of possible retrieve items.

Source code in warprec/evaluation/metrics/accuracy/precision.py

@metric_registry.register("Precision")
class Precision(UserAverageTopKMetric):
    """The Precision@k counts the number of item retrieved correctly,
    over the maximum number of possible retrieve items.
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
        MetricBlock.VALID_USERS,
        MetricBlock.TOP_K_BINARY_RELEVANCE,
    }

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Tensor, **kwargs: Any
    ) -> Tensor:
        return top_k_rel.sum(dim=1).float() / self.k

`warprec.evaluation.metrics.accuracy.recall.Recall` ¶

Bases: UserAverageTopKMetric

The Recall@k counts the number of item retrieve correctly, over the total number of relevant item in the ground truth.

Source code in warprec/evaluation/metrics/accuracy/recall.py

@metric_registry.register("Recall")
class Recall(UserAverageTopKMetric):
    """The Recall@k counts the number of item retrieve correctly,
    over the total number of relevant item in the ground truth.
    """

    _REQUIRED_COMPONENTS: Set[MetricBlock] = {
        MetricBlock.BINARY_RELEVANCE,
        MetricBlock.VALID_USERS,
        MetricBlock.TOP_K_BINARY_RELEVANCE,
    }

    def compute_scores(
        self, preds: Tensor, target: Tensor, top_k_rel: Tensor, **kwargs: Any
    ) -> Tensor:
        hits = top_k_rel.sum(dim=1).float()
        relevant = target.sum(dim=1).float()

        # Handle cases where there are no relevant items to avoid division by zero
        return torch.where(
            relevant > 0, hits / relevant, torch.tensor(0.0, device=preds.device)
        )

Accuracy Metrics - API Reference¶

warprec.evaluation.metrics.accuracy.auc.AUC ¶

warprec.evaluation.metrics.accuracy.f1.F1 ¶

name property ¶

compute() ¶

update(preds, user_indices, **kwargs) ¶

warprec.evaluation.metrics.accuracy.gauc.GAUC ¶

warprec.evaluation.metrics.accuracy.hit_rate.HitRate ¶

warprec.evaluation.metrics.accuracy.lauc.LAUC ¶

warprec.evaluation.metrics.accuracy.map.MAP ¶

warprec.evaluation.metrics.accuracy.mar.MAR ¶

warprec.evaluation.metrics.accuracy.mrr.MRR ¶

warprec.evaluation.metrics.accuracy.ndcg.nDCG ¶

warprec.evaluation.metrics.accuracy.ndcg.nDCGRendle2020 ¶

warprec.evaluation.metrics.accuracy.precision.Precision ¶

warprec.evaluation.metrics.accuracy.recall.Recall ¶

`warprec.evaluation.metrics.accuracy.auc.AUC` ¶

`warprec.evaluation.metrics.accuracy.f1.F1` ¶

`name` `property` ¶

`compute()` ¶

`update(preds, user_indices, **kwargs)` ¶

`warprec.evaluation.metrics.accuracy.gauc.GAUC` ¶

`warprec.evaluation.metrics.accuracy.hit_rate.HitRate` ¶

`warprec.evaluation.metrics.accuracy.lauc.LAUC` ¶

`warprec.evaluation.metrics.accuracy.map.MAP` ¶

`warprec.evaluation.metrics.accuracy.mar.MAR` ¶

`warprec.evaluation.metrics.accuracy.mrr.MRR` ¶

`warprec.evaluation.metrics.accuracy.ndcg.nDCG` ¶

`warprec.evaluation.metrics.accuracy.ndcg.nDCGRendle2020` ¶

`warprec.evaluation.metrics.accuracy.precision.Precision` ¶

`warprec.evaluation.metrics.accuracy.recall.Recall` ¶