Skip to content

Evaluation

cenote.eval.metrics

Retrieval quality metrics — BEIR-style with optional ID extractor.

precision_at_k(results: list[RetrievalResult], relevant_ids: set[str], k: int, key: Callable[[RetrievalResult], str] = _default_key) -> float

Fraction of the top-k retrieved chunks whose IDs are in relevant_ids.

Returns hits / len(top) (precision-among-returned), not strict BEIR hits / k. The two converge when the retriever always returns >= k results. Pass key=lambda r: r.chunk.document_id for doc-level matching.

Source code in src/cenote/eval/metrics.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def precision_at_k(
    results: list[RetrievalResult],
    relevant_ids: set[str],
    k: int,
    key: Callable[[RetrievalResult], str] = _default_key,
) -> float:
    """Fraction of the top-k retrieved chunks whose IDs are in `relevant_ids`.

    Returns `hits / len(top)` (precision-among-returned), not strict BEIR
    `hits / k`. The two converge when the retriever always returns >= k
    results. Pass `key=lambda r: r.chunk.document_id` for doc-level matching.
    """
    if k <= 0:
        raise ConfigurationError("k must be positive")
    if not results:
        return 0.0
    top = results[:k]
    hits = sum(1 for r in top if key(r) in relevant_ids)
    return hits / len(top)

recall_at_k(results: list[RetrievalResult], relevant_ids: set[str], k: int, key: Callable[[RetrievalResult], str] = _default_key) -> float

Fraction of relevant chunks captured by the top-k retrieved results.

Source code in src/cenote/eval/metrics.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def recall_at_k(
    results: list[RetrievalResult],
    relevant_ids: set[str],
    k: int,
    key: Callable[[RetrievalResult], str] = _default_key,
) -> float:
    """Fraction of relevant chunks captured by the top-k retrieved results."""
    if k <= 0:
        raise ConfigurationError("k must be positive")
    if not relevant_ids:
        return 0.0
    top_ids = {key(r) for r in results[:k]}
    hits = len(top_ids & relevant_ids)
    return hits / len(relevant_ids)

mean_reciprocal_rank(results: list[RetrievalResult], relevant_ids: set[str], key: Callable[[RetrievalResult], str] = _default_key) -> float

Reciprocal of the rank of the first relevant chunk; 0 if none found.

Source code in src/cenote/eval/metrics.py
53
54
55
56
57
58
59
60
61
62
def mean_reciprocal_rank(
    results: list[RetrievalResult],
    relevant_ids: set[str],
    key: Callable[[RetrievalResult], str] = _default_key,
) -> float:
    """Reciprocal of the rank of the first relevant chunk; 0 if none found."""
    for rank, r in enumerate(results, start=1):
        if key(r) in relevant_ids:
            return 1.0 / rank
    return 0.0