Skip to content

Data models

cenote.models

Pydantic models — the contracts between every cenote module.

Document

Bases: BaseModel

Source document before chunking.

Source code in src/cenote/models.py
 9
10
11
12
13
14
15
16
17
class Document(BaseModel):
    """Source document before chunking."""

    model_config = ConfigDict(extra="forbid")

    id: str
    content: str
    metadata: dict[str, Any] = Field(default_factory=dict)
    source: str | None = None

Chunk

Bases: BaseModel

Atomic embeddable unit. Produced by a Chunker, consumed by an Embedder.

Source code in src/cenote/models.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class Chunk(BaseModel):
    """Atomic embeddable unit. Produced by a Chunker, consumed by an Embedder."""

    model_config = ConfigDict(extra="forbid")

    id: str
    document_id: str
    content: str
    position: int
    metadata: dict[str, Any] = Field(default_factory=dict)
    content_hash: str

    @staticmethod
    def make_id(document_id: str, position: int) -> str:
        """Deterministic chunk ID from a document ID and ordinal position."""
        return f"{document_id}:{position}"

make_id(document_id: str, position: int) -> str staticmethod

Deterministic chunk ID from a document ID and ordinal position.

Source code in src/cenote/models.py
32
33
34
35
@staticmethod
def make_id(document_id: str, position: int) -> str:
    """Deterministic chunk ID from a document ID and ordinal position."""
    return f"{document_id}:{position}"

EmbeddedChunk

Bases: BaseModel

A Chunk together with its embedding vector and provenance.

Source code in src/cenote/models.py
38
39
40
41
42
43
44
45
46
class EmbeddedChunk(BaseModel):
    """A Chunk together with its embedding vector and provenance."""

    model_config = ConfigDict(extra="forbid")

    chunk: Chunk
    embedding: list[float]
    embedding_model: str
    dimensions: int

RetrievalResult

Bases: BaseModel

One result returned by a Retriever.

Source code in src/cenote/models.py
49
50
51
52
53
54
55
56
class RetrievalResult(BaseModel):
    """One result returned by a Retriever."""

    model_config = ConfigDict(extra="forbid")

    chunk: Chunk
    score: float
    retriever: str

Message

Bases: BaseModel

Single conversation turn. Anthropic prompt-cache marker is optional.

cache_control="ephemeral" enables Anthropic's default 5-minute cache. Longer TTLs (e.g. 1h) are not yet supported; deferred to M1.3 if demand.

Source code in src/cenote/models.py
59
60
61
62
63
64
65
66
67
68
69
70
class Message(BaseModel):
    """Single conversation turn. Anthropic prompt-cache marker is optional.

    `cache_control="ephemeral"` enables Anthropic's default 5-minute cache.
    Longer TTLs (e.g. 1h) are not yet supported; deferred to M1.3 if demand.
    """

    model_config = ConfigDict(extra="forbid")

    role: Literal["user", "assistant", "system"]
    content: str
    cache_control: Literal["ephemeral"] | None = None