File size: 5,980 Bytes
9b33fca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""Vis4D base evaluation."""

from __future__ import annotations

from vis4d.common.typing import GenericFunc, MetricLogs, unimplemented


class Evaluator:  # pragma: no cover
    """Abstract evaluator class.

    The evaluator is responsible for evaluating the model on a given dataset.
    At each end of batches, the process_batch() is called with the model
    outputs and the batch data to accumulate the data for evaluation. An
    optional save_batch() can be implemented to save the predictions in the
    current batch.

    After all batches are processed, the gather() method is called to gather
    the data from all ranks. Then, the process() method is used to process all
    the accumulated data that are metrics-independent. Finally, the evaluate()
    method is called to evaluate the model for the specified metrics and return
    the results. Optionally, the save() method can be implemented to save the
    predictions for the specified metrics.

    The following diagram illustrates the evaluation process::

                      RANK 0                          RANK 1                  ...

        x num_batches
        β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
        β”‚  β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”    β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”  β”‚
        β”‚  β”‚ process_batch(data, ...) β”‚    β”‚ process_batch(data, ...) β”‚  β”‚ <- Process a batch (predictions, labels, etc.)
        β”‚  β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜    β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜  β”‚    and accumulate the data for evaluation.
        β”‚                β–Ό                              β–Ό                β”‚
        β”‚     β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”          β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”     β”‚
        β”‚     β”‚ save_batch(metric) β”‚          β”‚ save_batch(metric) β”‚     β”‚ <- Dump the predictions in a batch for a specified
        β”‚     β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜          β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜     β”‚    metric (e.g., for online evaluation).
        β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
                   β”Œβ”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”                         β”‚
                   β”‚ gather() β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
                   β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜      <- Gather the data from all ranks
                         β–Ό
                   β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
                   β”‚ process() β”‚     <- Process the data that are
                   β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜        metrics-independent (if any)
                         β–Ό
               β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
               β”‚ evaluate(metric) β”‚  <- Evaluate for a specified metric and
               β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜    return the results.
                         β–Ό
                 β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
                 β”‚ save(metric) β”‚    <- Dump the predictions for a specified
                 β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜       metric (e.g., for online evaluation).

    Note:
        The save_batch() saves the predictions every batch, which is helpful
        for reducing the memory usage, compared to saving all predictions at
        once in the save() method. However, the save_batch() is optional and
        can be omitted if the data can be saved only after all batches are
        processed.
    """  # pylint: disable=line-too-long

    @property
    def metrics(self) -> list[str]:
        """Return list of metrics to evaluate.

        Returns:
            list[str]: Metrics to evaluate.
        """
        return []

    def gather(self, gather_func: GenericFunc) -> None:
        """Gather variables in case of distributed setting (if needed).

        Args:
            gather_func (Callable[[Any], Any]): Gather function.
        """

    def reset(self) -> None:
        """Reset evaluator for new round of evaluation.

        Raises:
            NotImplementedError: This is an abstract class method.
        """
        raise NotImplementedError

    # Process a batch of data.
    process_batch: GenericFunc = unimplemented

    def process(self) -> None:
        """Process all accumulated data at the end of an epoch, if any."""

    def evaluate(self, metric: str) -> tuple[MetricLogs, str]:
        """Evaluate all predictions according to given metric.

        Args:
            metric (str): Metric to evaluate.

        Raises:
            NotImplementedError: This is an abstract class method.

        Returns:
            tuple[MetricLogs, str]: Dictionary of scores to log and a pretty
                printed string.
        """
        raise NotImplementedError

    def save_batch(self, metric: str, output_dir: str) -> None:
        """Save batch of predictions to file.

        Args:
            metric (str): Save predictions for the specified metrics.
            output_dir (str): Output directory.
        """

    def save(self, metric: str, output_dir: str) -> None:
        """Save all predictions to file at the end of an epoch.

        Args:
            metric (str): Save predictions for the specified metrics.
            output_dir (str): Output directory.
        """