Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
timj committed Oct 29, 2024
1 parent 9dc43b4 commit df63596
Showing 1 changed file with 25 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@
import uuid
import zipfile
from collections.abc import Callable, Iterable
from typing import ClassVar, Self
from typing import Annotated, ClassVar, Literal, Self, TypeAlias

from lsst.daf.butler import DatasetIdFactory, DatasetRef
from lsst.daf.butler.datastore.stored_file_info import SerializedStoredFileInfo
from lsst.resources import ResourcePath, ResourcePathExpression
from pydantic import BaseModel
from pydantic import BaseModel, ConfigDict, Field

from ..._dataset_type import DatasetType, SerializedDatasetType
from ...dimensions import DataCoordinate, DimensionUniverse, SerializedDataCoordinate, SerializedDataId
Expand All @@ -54,6 +54,8 @@ class MinimalistDatasetRef(BaseModel):
to this information.
"""

model_config = ConfigDict(frozen=True)

dataset_type_name: str
"""Name of the dataset type."""

Expand All @@ -70,6 +72,18 @@ class SerializedDatasetRefContainer(BaseModel):
Dimension records are not included.
"""

model_config = ConfigDict(extra="allow", frozen=True)
container_version: str


class SerializedDatasetRefContainerV1(SerializedDatasetRefContainer):
"""Serializable model for a collection of DatasetRef.
Dimension records are not included.
"""

container_version: Literal["V1"] = "V1"

universe_version: int
"""Dimension universe version."""

Expand Down Expand Up @@ -177,6 +191,12 @@ def to_refs(self, universe: DimensionUniverse) -> list[DatasetRef]:
return refs


SerializedDatasetRefContainers: TypeAlias = Annotated[
SerializedDatasetRefContainerV1,
Field(discriminator="container_version"),
]


class ArtifactIndexInfo(BaseModel):
"""Information related to an artifact in an index."""

Expand Down Expand Up @@ -242,7 +262,7 @@ class ZipIndex(BaseModel):
file datastore.
"""

refs: SerializedDatasetRefContainer
refs: SerializedDatasetRefContainers
"""Deduplicated information for all the `DatasetRef` in the index."""

artifact_map: dict[str, ArtifactIndexInfo]
Expand Down Expand Up @@ -367,15 +387,15 @@ def from_artifact_map(
"""
if not refs:
return cls(
refs=SerializedDatasetRefContainer.from_refs(refs),
refs=SerializedDatasetRefContainerV1.from_refs(refs),
artifact_map={},
)

# Calculate the paths relative to the given root since the Zip file
# uses relative paths.
file_to_relative = cls.calc_relative_paths(root, artifact_map.keys())

simplified_refs = SerializedDatasetRefContainer.from_refs(refs)
simplified_refs = SerializedDatasetRefContainerV1.from_refs(refs)

# Convert the artifact mapping to relative path.
relative_artifact_map = {file_to_relative[path]: info for path, info in artifact_map.items()}
Expand Down

0 comments on commit df63596

Please sign in to comment.