Source code for dccd.domain.dataset

"""DatasetId and Provenance — dataset identity and lineage."""

from __future__ import annotations

from pydantic import BaseModel

from dccd.domain.symbol import Symbol
from dccd.domain.types import DataType

__all__ = ["DatasetId", "Provenance"]


[docs] class DatasetId(BaseModel, frozen=True): """Unique identifier for a dataset (exchange × symbol × type × span). Examples -------- >>> from dccd.domain.symbol import Symbol >>> from dccd.domain.types import DataType >>> ds = DatasetId(exchange='binance', symbol=Symbol(base='BTC', quote='USDT'), data_type=DataType.OHLC, span=3600) >>> ds.exchange 'binance' """ exchange: str symbol: Symbol data_type: DataType span: int | None = None
[docs] def pair_slug(self) -> str: """Return filesystem-safe pair string (e.g. ``'BTC-USDT'``).""" return f"{self.symbol.base}-{self.symbol.quote}"
def __str__(self) -> str: parts = [self.exchange, str(self.symbol), self.data_type.value] if self.span is not None: parts.append(f"{self.span}s") return ":".join(parts)
[docs] class Provenance(BaseModel, frozen=True): """Dataset-level lineage metadata (stored in Parquet metadata, not per-row). Attributes ---------- source : str Origin description, e.g. ``"binance:rest"`` or ``"kraken:ws"``. derived_from : DatasetId or None Parent dataset when this dataset was derived (e.g. OHLC from trades). """ source: str derived_from: DatasetId | None = None