diff --git a/docs/explanations/schema-generation.rst b/docs/explanations/schema-generation.rst index 8c9485036..d631e42ac 100644 --- a/docs/explanations/schema-generation.rst +++ b/docs/explanations/schema-generation.rst @@ -15,7 +15,7 @@ After changing any of the documents it's necessary to regenerate the schemas. Th .. code-block:: bash - regenerate-schema + python -m event_model.generate which is a python environment script in a dev install of event-model. diff --git a/pyproject.toml b/pyproject.toml index afa726c15..0609d5b82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,12 +50,10 @@ dev = [ "numpydoc", # For schema generation. - "pydantic>=2.6", + "pydantic<3", + "datamodel-code-generator", ] -[project.scripts] -regenerate-schema = "event_model.documents.generate.__main__:main" - [project.urls] GitHub = "https://github.com/bluesky/event-model" diff --git a/src/event_model/__init__.py b/src/event_model/__init__.py index 3a255c7e3..38410a2f1 100644 --- a/src/event_model/__init__.py +++ b/src/event_model/__init__.py @@ -47,7 +47,16 @@ ) from .documents.event_page import EventPage, PartialEventPage from .documents.resource import PartialResource, Resource -from .documents.run_start import Calculation, Hints, Projection, Projections, RunStart +from .documents.run_start import ( + CalculatedEventProjection, + Calculation, + ConfigurationProjection, + Hints, + LinkedEventProjection, + Projections, + RunStart, + StaticProjection, +) from .documents.run_stop import RunStop from .documents.stream_datum import StreamDatum, StreamRange from .documents.stream_resource import StreamResource @@ -78,7 +87,10 @@ "Resource", "Calculation", "Hints", - "Projection", + "LinkedEventProjection", + "StaticProjection", + "CalculatedEventProjection", + "ConfigurationProjection", "Projections", "RunStart", "RunStop", @@ -350,8 +362,8 @@ def __call__( else: raise EventModelValueError( "SingleRunDocumentRouter associated with start document " - f'{self._start_doc["uid"]} ' - f'received a second start document with uid {doc["uid"]}' + f"{self._start_doc['uid']} " + f"received a second start document with uid {doc['uid']}" ) elif name == "descriptor": assert isinstance(self._start_doc, dict) @@ -360,9 +372,9 @@ def __call__( else: raise EventModelValueError( "SingleRunDocumentRouter associated with start document " - f'{self._start_doc["uid"]} ' - f'received a descriptor {doc["uid"]} associated with ' - f'start document {doc["run_start"]}' + f"{self._start_doc['uid']} " + f"received a descriptor {doc['uid']} associated with " + f"start document {doc['run_start']}" ) # Defer to superclass for dispatch/processing. return super().__call__(name, doc, validate=validate) @@ -403,7 +415,7 @@ def get_descriptor(self, doc: dict) -> EventDescriptor: elif doc["descriptor"] not in self._descriptors: raise EventModelValueError( "SingleRunDocumentRouter has not processed a descriptor with " - f'uid {doc["descriptor"]}' + f"uid {doc['descriptor']}" ) return self._descriptors[doc["descriptor"]] @@ -1066,9 +1078,7 @@ def get_handler(self, resource: Resource) -> Any: f"mapped from {original_root} to {root} by root_map." ) else: - msg += ( - f"Its 'root' field {original_root} was " f"*not* modified by root_map." - ) + msg += f"Its 'root' field {original_root} was *not* modified by root_map." error_to_raise = EventModelError(msg) handler = _attempt_with_retries( func=handler_class, @@ -1554,8 +1564,7 @@ def start(self, start_doc: RunStart) -> None: if uid in self._start_to_start_doc: if self._start_to_start_doc[uid] == start_doc: raise ValueError( - "RunRouter received the same 'start' document twice:\n" - "{start_doc!r}" + "RunRouter received the same 'start' document twice:\n{start_doc!r}" ) else: raise ValueError( @@ -1821,9 +1830,8 @@ class MismatchedDataKeys(InvalidData): DocumentNames.resource: "schemas/resource.json", DocumentNames.stream_datum: "schemas/stream_datum.json", DocumentNames.stream_resource: "schemas/stream_resource.json", - # DEPRECATED: - DocumentNames.bulk_events: "schemas/bulk_events.json", DocumentNames.bulk_datum: "schemas/bulk_datum.json", + DocumentNames.bulk_events: "schemas/bulk_events.json", } schemas = {} for name, filename in SCHEMA_NAMES.items(): @@ -2155,7 +2163,7 @@ def __call__( ) -> RunStop: if self.poison_pill: raise EventModelError( - "Already composed a RunStop document for run " "{!r}.".format( + "Already composed a RunStop document for run {!r}.".format( self.start["uid"] ) ) diff --git a/src/event_model/basemodels/__init__.py b/src/event_model/basemodels/__init__.py new file mode 100644 index 000000000..20748fd30 --- /dev/null +++ b/src/event_model/basemodels/__init__.py @@ -0,0 +1,61 @@ +from typing import Tuple, Type, Union + +from event_model.basemodels.datum import Datum +from event_model.basemodels.datum_page import DatumPage +from event_model.basemodels.event import Event +from event_model.basemodels.event_descriptor import ( + Dtype, + EventDescriptor, + Limits, + LimitsRange, +) +from event_model.basemodels.event_page import EventPage +from event_model.basemodels.resource import Resource +from event_model.basemodels.run_start import RunStart +from event_model.basemodels.run_stop import RunStop +from event_model.basemodels.stream_datum import StreamDatum +from event_model.basemodels.stream_resource import StreamResource + +DocumentType = Union[ + Type[Datum], + Type[DatumPage], + Type[Event], + Type[EventDescriptor], + Type[EventPage], + Type[Resource], + Type[RunStart], + Type[RunStop], + Type[StreamDatum], + Type[StreamResource], +] + +ALL_BASEMODELS: Tuple[DocumentType, ...] = ( + Datum, + DatumPage, + Event, + EventDescriptor, + EventPage, + Resource, + RunStart, + RunStop, + StreamDatum, + StreamResource, +) + + +__all__ = [ + "Datum", + "DatumPage", + "Dtype", + "Event", + "EventDescriptor", + "EventPage", + "Limits", + "LimitsRange", + "Resource", + "RunStart", + "RunStop", + "StreamDatum", + "StreamResource", + "DocumentType", +] diff --git a/src/event_model/basemodels/datum.py b/src/event_model/basemodels/datum.py new file mode 100644 index 000000000..ef863da16 --- /dev/null +++ b/src/event_model/basemodels/datum.py @@ -0,0 +1,32 @@ +from typing import Any, Dict + +from pydantic import ( + BaseModel, + ConfigDict, + Field, +) +from typing_extensions import Annotated + + +class Datum(BaseModel): + """Document to reference a quanta of externally-stored data""" + + model_config = ConfigDict(extra="forbid") + + datum_id: Annotated[ + str, + Field( + description="Globally unique identifier for this Datum (akin to 'uid' " + "for other Document types), typically formatted as '/'" + ), + ] + datum_kwargs: Annotated[ + Dict[str, Any], + Field( + description="Arguments to pass to the Handler to " + "retrieve one quanta of data", + ), + ] + resource: Annotated[ + str, Field(description="The UID of the Resource to which this Datum belongs") + ] diff --git a/src/event_model/basemodels/datum_page.py b/src/event_model/basemodels/datum_page.py new file mode 100644 index 000000000..ca26f12e4 --- /dev/null +++ b/src/event_model/basemodels/datum_page.py @@ -0,0 +1,35 @@ +from typing import Any, Dict, List + +from pydantic import BaseModel, ConfigDict, Field, RootModel +from typing_extensions import Annotated + + +class DataFrameForDatumPage(RootModel): + root: List[str] = Field(alias="Dataframe") + + +class DatumPage(BaseModel): + """Page of documents to reference a quanta of externally-stored data""" + + model_config = ConfigDict(extra="forbid") + + datum_id: Annotated[ + DataFrameForDatumPage, + Field( + description="Array unique identifiers for each Datum (akin to 'uid' for " + "other Document types), typically formatted as '/'" + ), + ] + datum_kwargs: Annotated[ + Dict[str, List[Any]], + Field( + description="Array of arguments to pass to the Handler to " + "retrieve one quanta of data" + ), + ] + resource: Annotated[ + str, + Field( + description="The UID of the Resource to which all Datums in the page belong" + ), + ] diff --git a/src/event_model/basemodels/event.py b/src/event_model/basemodels/event.py new file mode 100644 index 000000000..120a83da4 --- /dev/null +++ b/src/event_model/basemodels/event.py @@ -0,0 +1,48 @@ +from typing import Any, Dict, Union + +from pydantic import BaseModel, ConfigDict, Field +from typing_extensions import Annotated + + +class PartialEvent(BaseModel): + model_config = ConfigDict(extra="forbid") + + data: Annotated[Dict[str, Any], Field(description="The actual measurement data")] + filled: Annotated[ + Dict[str, Union[bool, str]], + Field( + default_factory=dict, + description="Mapping each of the keys of externally-stored data to the " + "boolean False, indicating that the data has not been loaded, or to " + "foreign keys (moved here from 'data' when the data was loaded)", + ), + ] + time: Annotated[ + float, + Field( + description="The event time. This maybe different than the timestamps on " + "each of the data entries.", + ), + ] + timestamps: Annotated[ + Dict[str, Any], + Field(description="The timestamps of the individual measurement data"), + ] + + +class Event(PartialEvent): + """Document to record a quanta of collected data""" + + model_config = ConfigDict(extra="forbid") + + descriptor: Annotated[ + str, Field(description="UID of the EventDescriptor to which this Event belongs") + ] + seq_num: Annotated[ + int, + Field( + description="Sequence number to identify the location of this Event in the " + "Event stream", + ), + ] + uid: Annotated[str, Field(description="Globally unique identifier for this Event")] diff --git a/src/event_model/basemodels/event_descriptor.py b/src/event_model/basemodels/event_descriptor.py new file mode 100644 index 000000000..8fe8d2a97 --- /dev/null +++ b/src/event_model/basemodels/event_descriptor.py @@ -0,0 +1,319 @@ +import re +from typing import Any, Dict, List, Optional, Tuple, Union + +from pydantic import ( + BaseModel, + ConfigDict, + Field, + RootModel, + field_validator, + model_validator, +) +from pydantic.config import JsonDict +from typing_extensions import Annotated, Literal + + +class Dtype(RootModel): + root: Literal["string", "number", "array", "boolean", "integer"] + + +NO_DOTS_PATTERN = r"^([^./]+)$" + + +class DataType(RootModel): + root: Any = Field(alias="DataType") + + @field_validator("root") + def validate_root(cls, value): + if not isinstance(value, dict): + return value + for key, val in value.items(): + if not re.match(NO_DOTS_PATTERN, key): + raise ValueError( + f"Key '{key}' does not match pattern '{NO_DOTS_PATTERN}'" + ) + if isinstance(val, dict): + value[key] = DataType(val) + return value + + +class LimitsRange(BaseModel): + model_config = ConfigDict(extra="forbid") + + low: Optional[float] + high: Optional[float] + + +class RdsRange(BaseModel): + """RDS (Read different than set) parameters range. + + + https://tango-controls.readthedocs.io/en/latest/development/device-api/attribute-alarms.html#the-read-different-than-set-rds-alarm + """ + + time_difference: Annotated[ + float, + Field( + description=( + "ms since last update to fail after if set point and " + "read point are not within `value_difference` of each other." + ) + ), + ] + value_difference: Annotated[ + float, + Field( + description=( + "Allowed difference in value between set point and read point " + "after `time_difference`." + ), + ), + ] + + +class Limits(BaseModel): + """ + Epics limits: + see 3.4.1 https://epics.anl.gov/base/R3-14/12-docs/AppDevGuide/node4.html + """ + + model_config = ConfigDict(extra="forbid") + + control: Annotated[ + Optional[LimitsRange], Field(default=None, description="Control limits.") + ] + display: Annotated[ + Optional[LimitsRange], Field(default=None, description="Display limits.") + ] + warning: Annotated[ + Optional[LimitsRange], Field(default=None, description="Warning limits.") + ] + alarm: Annotated[ + Optional[LimitsRange], Field(default=None, description="Alarm limits.") + ] + + hysteresis: Annotated[ + Optional[float], Field(default=None, description="Hysteresis.") + ] + rds: Annotated[ + Optional[RdsRange], Field(default=None, description="RDS parameters.") + ] + + +_ConstrainedDtype = Annotated[ + str, + Field( + description="A numpy dtype e.g `//" + ), + ] + seq_nums: Annotated[ + StreamRange, + Field( + description="A slice object showing the Event numbers the " + "resource corresponds to" + ), + ] + indices: Annotated[ + StreamRange, + Field( + description="A slice object passed to the StreamResource " + "handler so it can hand back data and timestamps" + ), + ] diff --git a/src/event_model/basemodels/stream_resource.py b/src/event_model/basemodels/stream_resource.py new file mode 100644 index 000000000..be00838f6 --- /dev/null +++ b/src/event_model/basemodels/stream_resource.py @@ -0,0 +1,49 @@ +from typing import Any, Dict + +from pydantic import BaseModel, ConfigDict, Field +from typing_extensions import Annotated + + +class StreamResource(BaseModel): + """ + Document to reference a collection (e.g. file or group of files) of + externally-stored data streams + """ + + model_config = ConfigDict( + extra="allow", + ) + + data_key: Annotated[ + str, + Field( + description="A string to show which data_key of the " + "Descriptor are being streamed" + ), + ] + parameters: Annotated[ + Dict[str, Any], + Field( + description="Additional keyword arguments to pass to the Handler to read a " + "Stream Resource", + ), + ] + uri: Annotated[str, Field(description="URI for locating this resource")] + run_start: Annotated[ + str, + Field( + description="Globally unique ID to the run_start document " + "this Stream Resource is associated with.", + default="", + ), + ] + mimetype: Annotated[ + str, + Field( + description="String identifying the format/type of this Stream Resource, " + "used to identify a compatible Handler", + ), + ] + uid: Annotated[ + str, Field(description="Globally unique identifier for this Stream Resource") + ] diff --git a/src/event_model/documents/__init__.py b/src/event_model/documents/__init__.py index 33a2faefd..6266c3c8a 100644 --- a/src/event_model/documents/__init__.py +++ b/src/event_model/documents/__init__.py @@ -1,62 +1,40 @@ +# generated in `event_model/generate` + from typing import Tuple, Type, Union -# flake8: noqa -from event_model.documents.datum import Datum -from event_model.documents.datum_page import DatumPage -from event_model.documents.event import Event -from event_model.documents.event_descriptor import ( - Dtype, - EventDescriptor, - Limits, - LimitsRange, -) -from event_model.documents.event_page import EventPage -from event_model.documents.resource import Resource -from event_model.documents.run_start import RunStart -from event_model.documents.run_stop import RunStop -from event_model.documents.stream_datum import StreamDatum -from event_model.documents.stream_resource import StreamResource +from .datum import * # noqa: F403 +from .datum_page import * # noqa: F403 +from .event import * # noqa: F403 +from .event_descriptor import * # noqa: F403 +from .event_page import * # noqa: F403 +from .resource import * # noqa: F403 +from .run_start import * # noqa: F403 +from .run_stop import * # noqa: F403 +from .stream_datum import * # noqa: F403 +from .stream_resource import * # noqa: F403 DocumentType = Union[ - Type[Datum], - Type[DatumPage], - Type[Event], - Type[EventDescriptor], - Type[EventPage], - Type[Resource], - Type[RunStart], - Type[RunStop], - Type[StreamDatum], - Type[StreamResource], + Type[Datum], # noqa: F405, + Type[DatumPage], # noqa: F405, + Type[Event], # noqa: F405, + Type[EventDescriptor], # noqa: F405, + Type[EventPage], # noqa: F405, + Type[Resource], # noqa: F405, + Type[RunStart], # noqa: F405, + Type[RunStop], # noqa: F405, + Type[StreamDatum], # noqa: F405, + Type[StreamResource], # noqa: F405, ] ALL_DOCUMENTS: Tuple[DocumentType, ...] = ( - Datum, - DatumPage, - Event, - EventDescriptor, - EventPage, - Resource, - RunStart, - RunStop, - StreamDatum, - StreamResource, + Datum, # noqa: F405 + DatumPage, # noqa: F405 + Event, # noqa: F405 + EventDescriptor, # noqa: F405 + EventPage, # noqa: F405 + Resource, # noqa: F405 + RunStart, # noqa: F405 + RunStop, # noqa: F405 + StreamDatum, # noqa: F405 + StreamResource, # noqa: F405 ) - - -__all__ = [ - "Datum", - "DatumPage", - "Dtype", - "Event", - "EventDescriptor", - "EventPage", - "Limits", - "LimitsRange", - "Resource", - "RunStart", - "RunStop", - "StreamDatum", - "StreamResource", - "DocumentType", -] diff --git a/src/event_model/documents/datum.py b/src/event_model/documents/datum.py index 335f1247d..07470f928 100644 --- a/src/event_model/documents/datum.py +++ b/src/event_model/documents/datum.py @@ -1,30 +1,26 @@ -from typing import Any, Dict +# ruff: noqa +# generated by datamodel-codegen: +# filename: datum.json -from typing_extensions import Annotated, TypedDict +from __future__ import annotations -from .generate.type_wrapper import Field, add_extra_schema +from typing import Any, Dict, TypedDict -DATUM_EXTRA_SCHEMA = {"additionalProperties": False} - -@add_extra_schema(DATUM_EXTRA_SCHEMA) class Datum(TypedDict): - """Document to reference a quanta of externally-stored data""" + """ + Document to reference a quanta of externally-stored data + """ - datum_id: Annotated[ - str, - Field( - description="Globally unique identifier for this Datum (akin to 'uid' " - "for other Document types), typically formatted as '/'" - ), - ] - datum_kwargs: Annotated[ - Dict[str, Any], - Field( - description="Arguments to pass to the Handler to " - "retrieve one quanta of data", - ), - ] - resource: Annotated[ - str, Field(description="The UID of the Resource to which this Datum belongs") - ] + datum_id: str + """ + Globally unique identifier for this Datum (akin to 'uid' for other Document types), typically formatted as '/' + """ + datum_kwargs: Dict[str, Any] + """ + Arguments to pass to the Handler to retrieve one quanta of data + """ + resource: str + """ + The UID of the Resource to which this Datum belongs + """ diff --git a/src/event_model/documents/datum_page.py b/src/event_model/documents/datum_page.py index fe66d6505..69ee840cb 100644 --- a/src/event_model/documents/datum_page.py +++ b/src/event_model/documents/datum_page.py @@ -1,33 +1,28 @@ -from typing import Any, Dict, List +# ruff: noqa +# generated by datamodel-codegen: +# filename: datum_page.json -from typing_extensions import Annotated, TypedDict +from __future__ import annotations -from .generate.type_wrapper import DataFrameForDatumPage, Field, add_extra_schema +from typing import Dict, List, TypedDict -DATUM_PAGE_EXTRA_SCHEMA = {"additionalProperties": False} +DataFrameForDatumPage = List[str] -@add_extra_schema(DATUM_PAGE_EXTRA_SCHEMA) class DatumPage(TypedDict): - """Page of documents to reference a quanta of externally-stored data""" + """ + Page of documents to reference a quanta of externally-stored data + """ - datum_id: Annotated[ - DataFrameForDatumPage, - Field( - description="Array unique identifiers for each Datum (akin to 'uid' for " - "other Document types), typically formatted as '/'" - ), - ] - datum_kwargs: Annotated[ - Dict[str, List[Any]], - Field( - description="Array of arguments to pass to the Handler to " - "retrieve one quanta of data" - ), - ] - resource: Annotated[ - str, - Field( - description="The UID of the Resource to which all Datums in the page belong" - ), - ] + datum_id: DataFrameForDatumPage + """ + Array unique identifiers for each Datum (akin to 'uid' for other Document types), typically formatted as '/' + """ + datum_kwargs: Dict[str, List] + """ + Array of arguments to pass to the Handler to retrieve one quanta of data + """ + resource: str + """ + The UID of the Resource to which all Datums in the page belong + """ diff --git a/src/event_model/documents/event.py b/src/event_model/documents/event.py index 2b6dc4773..c33a99607 100644 --- a/src/event_model/documents/event.py +++ b/src/event_model/documents/event.py @@ -1,49 +1,47 @@ -from typing import Any, Dict, Union +# ruff: noqa +# generated by datamodel-codegen: +# filename: event.json -from typing_extensions import Annotated, NotRequired, TypedDict +from __future__ import annotations -from .generate.type_wrapper import Field, add_extra_schema +from typing import Any, Dict, TypedDict, Union -EVENT_EXTRA_SCHEMA = {"additionalProperties": False} +from typing_extensions import NotRequired class PartialEvent(TypedDict): - data: Annotated[Dict[str, Any], Field(description="The actual measurement data")] - filled: NotRequired[ - Annotated[ - Dict[str, Union[bool, str]], - Field( - description="Mapping each of the keys of externally-stored data to the " - "boolean False, indicating that the data has not been loaded, or to " - "foreign keys (moved here from 'data' when the data was loaded)" - ), - ] - ] - time: Annotated[ - float, - Field( - description="The event time. This maybe different than the timestamps on " - "each of the data entries.", - ), - ] - timestamps: Annotated[ - Dict[str, Any], - Field(description="The timestamps of the individual measurement data"), - ] - - -@add_extra_schema(EVENT_EXTRA_SCHEMA) + data: Dict[str, Any] + """ + The actual measurement data + """ + filled: NotRequired[Dict[str, Union[bool, str]]] + """ + Mapping each of the keys of externally-stored data to the boolean False, indicating that the data has not been loaded, or to foreign keys (moved here from 'data' when the data was loaded) + """ + time: float + """ + The event time. This maybe different than the timestamps on each of the data entries. + """ + timestamps: Dict[str, Any] + """ + The timestamps of the individual measurement data + """ + + class Event(PartialEvent): - """Document to record a quanta of collected data""" - - descriptor: Annotated[ - str, Field(description="UID of the EventDescriptor to which this Event belongs") - ] - seq_num: Annotated[ - int, - Field( - description="Sequence number to identify the location of this Event in the " - "Event stream", - ), - ] - uid: Annotated[str, Field(description="Globally unique identifier for this Event")] + """ + Document to record a quanta of collected data + """ + + descriptor: str + """ + UID of the EventDescriptor to which this Event belongs + """ + seq_num: int + """ + Sequence number to identify the location of this Event in the Event stream + """ + uid: str + """ + Globally unique identifier for this Event + """ diff --git a/src/event_model/documents/event_descriptor.py b/src/event_model/documents/event_descriptor.py index b92966981..74e337294 100644 --- a/src/event_model/documents/event_descriptor.py +++ b/src/event_model/documents/event_descriptor.py @@ -1,247 +1,193 @@ -from typing import Any, Dict, List, Optional, Tuple, Union +# ruff: noqa +# generated by datamodel-codegen: +# filename: event_descriptor.json -from typing_extensions import Annotated, Literal, NotRequired, TypedDict +from __future__ import annotations -from .generate.type_wrapper import Field, add_extra_schema +from typing import Any, Dict, List, Literal, Optional, TypedDict, Union -Dtype = Literal["string", "number", "array", "boolean", "integer"] +from typing_extensions import NotRequired +DtypeNumpy = str -class RdsRange(TypedDict): - """RDS (Read different than set) parameters range. +DtypeNumpyItem = List + + +DataType = Any - https://tango-controls.readthedocs.io/en/latest/development/device-api/attribute-alarms.html#the-read-different-than-set-rds-alarm - """ - time_difference: Annotated[ - float, - Field( - description=( - "ms since last update to fail after if set point and " - "read point are not within `value_difference` of each other." - ) - ), - ] - value_difference: NotRequired[ - Annotated[ - float, - Field( - description=( - "Allowed difference in value between set point and read point " - "after `time_difference`." - ) - ), - ] - ] +Dtype = Literal["string", "number", "array", "boolean", "integer"] class LimitsRange(TypedDict): - low: Optional[float] high: Optional[float] + low: Optional[float] -class Limits(TypedDict): +class PerObjectHint(TypedDict): """ - Epics and tango limits: - see 3.4.1 https://epics.anl.gov/base/R3-14/12-docs/AppDevGuide/node4.html - and - https://tango-controls.readthedocs.io/en/latest/development/device-api/attribute-alarms.html + The 'interesting' data keys for this device. """ - control: NotRequired[Annotated[LimitsRange, Field(description="Control limits.")]] - display: NotRequired[Annotated[LimitsRange, Field(description="Display limits.")]] - warning: NotRequired[Annotated[LimitsRange, Field(description="Warning limits.")]] - alarm: NotRequired[Annotated[LimitsRange, Field(description="Alarm limits.")]] - hysteresis: NotRequired[Annotated[float, Field(description="Hysteresis.")]] - rds: NotRequired[Annotated[RdsRange, Field(description="RDS parameters.")]] + NX_class: NotRequired[str] + """ + The NeXus class definition for this device. + """ + fields: NotRequired[List[str]] + """ + The 'interesting' data keys for this device. + """ -_ConstrainedDtype = Annotated[ - str, - Field( - description="A numpy dtype e.g ` Dict: - def inner(cls): - extra_schema[cls] = schema - return cls - - return inner - - # If pydantic is not installed (e.g the install isn't [dev]), - # or pydantic v1 is being used, then we expect to be able to - # run event-model, just not the schema generation code. - except (ModuleNotFoundError, ImportError): - # None of the dummy functions/classes should have been overwritten. - assert pydantic_version is None diff --git a/src/event_model/documents/generate/typeddict_to_schema.py b/src/event_model/documents/generate/typeddict_to_schema.py deleted file mode 100644 index 45bf11c40..000000000 --- a/src/event_model/documents/generate/typeddict_to_schema.py +++ /dev/null @@ -1,144 +0,0 @@ -import json -from collections import OrderedDict -from pathlib import Path -from typing import Dict, Optional - -from event_model.documents import ALL_DOCUMENTS, DocumentType -from event_model.documents.generate.type_wrapper import ( - GenerateJsonSchema, - TypeAdapter, - extra_schema, - to_snake, -) - - -def sort_alphabetically(schema: Dict) -> Dict: - """Sorts the schema alphabetically by key name, exchanging the - properties dicts for OrderedDicts""" - schema = OrderedDict(sorted(schema.items(), key=lambda x: x[0])) - - return schema - - -SortOrder = { - "title": 0, - "description": 1, - "type": 2, - "$defs": 3, - "properties": 4, - "required": 5, - "additionalProperties": 6, - "patternProperties": 7, -} - - -def sort_schema(document_schema: Dict) -> Dict: - assert isinstance(document_schema, dict) - document_schema = OrderedDict( - sorted( - document_schema.items(), - key=lambda x: SortOrder.get(x[0], len(SortOrder)), - ) - ) - - for key in document_schema: - if key in ("$defs", "properties", "required"): - if isinstance(document_schema[key], dict): - document_schema[key] = sort_alphabetically(document_schema[key]) - for key2 in document_schema[key]: - if isinstance(document_schema[key][key2], dict): - document_schema[key][key2] = sort_schema( - document_schema[key][key2] - ) - elif isinstance(document_schema[key], list): - document_schema[key].sort() - - return document_schema - - -def add_extra_schema(document_schema: Dict, extra_schema: Dict): - """ - Used to add user written schema to autogenerated schema. - - Once https://github.com/pydantic/pydantic/issues/8698 is fixed we can - use the following instead of this function: - - class document_type(document_type): - __pydantic_config__ = ConfigDict( - title=document_type.__name__, - json_schema_extra=extra_schema.get(document_type, None) - ) - """ - for key in extra_schema: - if key not in document_schema: - document_schema[key] = extra_schema[key] - - elif not isinstance(document_schema[key], type(extra_schema[key])): - raise ValueError( - f"Cannot merge {repr(document_schema[key])}:" - f"{type(document_schema[key])} with {extra_schema[key]}:" - f"{type(extra_schema[key])} in {document_schema['title']}[{key}]" - ) - - elif isinstance(document_schema[key], dict): - add_extra_schema(document_schema[key], extra_schema[key]) - - elif isinstance(document_schema[key], list): - document_schema[key] += extra_schema[key] - else: - raise ValueError( - f"{repr(document_schema[key])}:{type(document_schema[key])} in " - f"{document_schema['title']}[{key}] is of unsupport type" - ) - - -def dump_json(dictionary: Dict, file_path: Path, mode="w"): - with open(file_path, mode) as f: - json.dump(dictionary, f, indent=4) - - -class _GenerateJsonSchema(GenerateJsonSchema): - def generate(self, schema, mode="validation"): - json_schema = super().generate(schema, mode=mode) - json_schema["title"] = to_snake(json_schema["title"]) - json_schema["description"] = ( - json_schema["description"].replace("\n", " ").strip() - ) - return json_schema - - -def typeddict_to_schema( - document_type: DocumentType, schema_dir: Optional[Path] = None, sort: bool = True -) -> Dict: - assert document_type in ALL_DOCUMENTS - - type_adapter = TypeAdapter(document_type) - type_adapter.by_alias = True - document_schema = type_adapter.json_schema(schema_generator=_GenerateJsonSchema) - - if sort: - document_schema = sort_schema(document_schema) - - # Add the manually defined extra schema - add_extra_schema(document_schema, extra_schema.get(document_type, {})) - - if schema_dir: - file_path = schema_dir / f"{to_snake(document_schema['title'])}.json" - - # Check if the file has been updated - if not file_path.exists(): - print(f"{str(file_path)} does not exist yet, writing") - dump_json(document_schema, file_path) - else: - with open(file_path) as json_file: - skip_writing = json.load(json_file) == document_schema - - if skip_writing: - print(f"{document_schema['title']} is unchanged") - else: - print( - f"{document_schema['title']} has been changed, writing new schema" - ) - dump_json(document_schema, file_path) - - return document_schema diff --git a/src/event_model/documents/resource.py b/src/event_model/documents/resource.py index 882ceb694..5fab61a33 100644 --- a/src/event_model/documents/resource.py +++ b/src/event_model/documents/resource.py @@ -1,60 +1,48 @@ -from typing import Any, Dict +# ruff: noqa +# generated by datamodel-codegen: +# filename: resource.json -from typing_extensions import Annotated, Literal, NotRequired, TypedDict +from __future__ import annotations -from .generate.type_wrapper import Field, add_extra_schema +from typing import Any, Dict, Literal, TypedDict -RESOURCE_EXTRA_SCHEMA = {"additionalProperties": False} +from typing_extensions import NotRequired class PartialResource(TypedDict): - spec: Annotated[ - str, - Field( - description="String identifying the format/type of this Resource, used to " - "identify a compatible Handler" - ), - ] - resource_path: Annotated[ - str, Field(description="Filepath or URI for locating this resource") - ] - resource_kwargs: Annotated[ - Dict[str, Any], - Field( - description="Additional argument to pass to the Handler to read a Resource" - ), - ] - - root: Annotated[ - str, - Field( - description="Subset of resource_path that is a local detail, not semantic." - ), - ] - uid: Annotated[ - str, Field(description="Globally unique identifier for this Resource") - ] - - -@add_extra_schema(RESOURCE_EXTRA_SCHEMA) + resource_kwargs: Dict[str, Any] + """ + Additional argument to pass to the Handler to read a Resource + """ + resource_path: str + """ + Filepath or URI for locating this resource + """ + root: str + """ + Subset of resource_path that is a local detail, not semantic. + """ + spec: str + """ + String identifying the format/type of this Resource, used to identify a compatible Handler + """ + uid: str + """ + Globally unique identifier for this Resource + """ + + class Resource(PartialResource): """ Document to reference a collection (e.g. file or group of files) of externally-stored data """ - path_semantics: NotRequired[ - Annotated[ - Literal["posix", "windows"], - Field(description="Rules for joining paths"), - ] - ] - run_start: NotRequired[ - Annotated[ - str, - Field( - description="Globally unique ID to the run_start document this " - "resource is associated with.", - ), - ] - ] + path_semantics: NotRequired[Literal["posix", "windows"]] + """ + Rules for joining paths + """ + run_start: NotRequired[str] + """ + Globally unique ID to the run_start document this resource is associated with. + """ diff --git a/src/event_model/documents/run_start.py b/src/event_model/documents/run_start.py index 742237ef6..41c500d2e 100644 --- a/src/event_model/documents/run_start.py +++ b/src/event_model/documents/run_start.py @@ -1,213 +1,166 @@ -from typing import Any, Dict, List, Union +# ruff: noqa +# generated by datamodel-codegen: +# filename: run_start.json -from typing_extensions import Annotated, Literal, NotRequired, TypedDict +from __future__ import annotations -from event_model.documents.generate.type_wrapper import Field, add_extra_schema +from typing import Any, Dict, List, TypedDict, Union -from .generate.type_wrapper import DataType +from typing_extensions import NotRequired + + +class Calculation(TypedDict): + args: NotRequired[List] + callable: str + """ + callable function to perform calculation + """ + kwargs: NotRequired[Dict[str, Any]] + """ + kwargs for calcalation callable + """ + + +class ConfigurationProjection(TypedDict): + config_device: str + config_index: int + field: str + location: NotRequired[str] + """ + Projection comes from configuration fields in the event_descriptor document + """ + stream: str + type: NotRequired[str] + """ + Projection is of type linked, a value linked from the data set. + """ + + +DataType = Any class Hints(TypedDict): - """Start-level hints""" - - dimensions: NotRequired[ - Annotated[ - List[List[Union[List[str], str]]], - Field( - description="The independent axes of the experiment. " - "Ordered slow to fast", - ), - ] - ] + """ + Start-level hints + """ + dimensions: NotRequired[List[List[Union[List[str], str]]]] + """ + The independent axes of the experiment. Ordered slow to fast + """ -class Calculation(TypedDict): - args: NotRequired[List] - callable: Annotated[ - str, Field(description="callable function to perform calculation") - ] - kwargs: NotRequired[ - Annotated[Dict[str, Any], Field(description="kwargs for calcalation callable")] - ] +class LinkedEventProjection(TypedDict): + field: str + location: NotRequired[str] + """ + Projection comes and event + """ + stream: str + type: NotRequired[str] + """ + Projection is of type linked, a value linked from the data set. + """ -class Projection(TypedDict): - """Where to get the data from""" - calculation: NotRequired[ - Annotated[ - Calculation, - Field( - description="required fields if type is calculated", - title="calculation properties", - ), - ] - ] - config_index: NotRequired[int] - config_device: NotRequired[str] - field: NotRequired[str] - location: NotRequired[ - Annotated[ - Literal["start", "event", "configuration"], - Field( - description="start comes from metadata fields in the start document, " - "event comes from event, configuration comes from configuration " - "fields in the event_descriptor document" - ), - ] - ] - stream: NotRequired[str] - type: NotRequired[ - Annotated[ - Literal["linked", "calculated", "static"], - Field( - description="linked: a value linked from the data set, " - "calculated: a value that requires calculation, " - "static: a value defined here in the projection ", - ), - ] - ] - value: NotRequired[ - Annotated[ - Any, - Field( - description="value explicitely defined in the projection " - "when type==static." - ), - ] - ] +class StaticProjection(TypedDict): + type: NotRequired[str] + """ + Projection is of type static, a value defined here in the projection + """ + value: Any + """ + value explicitely defined in the static projection + """ -RUN_START_EXTRA_SCHEMA = { - "$defs": { - "DataType": { - "patternProperties": {"^([^./]+)$": {"$ref": "#/$defs/DataType"}}, - "additionalProperties": False, - }, - "Projection": { - "allOf": [ - { - "if": { - "allOf": [ - {"properties": {"location": {"enum": ["configuration"]}}}, - {"properties": {"type": {"enum": ["linked"]}}}, - ] - }, - "then": { - "required": [ - "type", - "location", - "config_index", - "config_device", - "field", - "stream", - ] - }, - }, - { - "if": { - "allOf": [ - {"properties": {"location": {"enum": ["event"]}}}, - {"properties": {"type": {"enum": ["linked"]}}}, - ] - }, - "then": {"required": ["type", "location", "field", "stream"]}, - }, - { - "if": { - "allOf": [ - {"properties": {"location": {"enum": ["event"]}}}, - {"properties": {"type": {"enum": ["calculated"]}}}, - ] - }, - "then": {"required": ["type", "field", "stream", "calculation"]}, - }, - { - "if": {"properties": {"type": {"enum": ["static"]}}}, - "then": {"required": ["type", "value"]}, - }, - ], - }, - }, - "properties": { - "hints": { - "additionalProperties": False, - "patternProperties": {"^([^.]+)$": {"$ref": "#/$defs/DataType"}}, - }, - }, - "patternProperties": {"^([^./]+)$": {"$ref": "#/$defs/DataType"}}, - "additionalProperties": False, -} +class CalculatedEventProjection(TypedDict): + calculation: Calculation + """ + required fields if type is calculated + """ + field: str + location: NotRequired[str] + """ + Projection comes and event + """ + stream: str + type: NotRequired[str] + """ + Projection is of type calculated, a value that requires calculation. + """ class Projections(TypedDict): - """Describe how to interperet this run as the given projection""" + """ + Describe how to interperet this run as the given projection + """ - configuration: Annotated[ - Dict[str, Any], Field(description="Static information about projection") - ] - name: NotRequired[Annotated[str, Field(description="The name of the projection")]] - projection: Annotated[Dict[Any, Projection], Field(description="")] - version: Annotated[ + configuration: Dict[str, Any] + """ + Static information about projection + """ + name: NotRequired[str] + """ + The name of the projection + """ + projection: Dict[ str, - Field( - description="The version of the projection spec. Can specify the version " - "of an external specification.", - ), - ] + Union[ + ConfigurationProjection, + LinkedEventProjection, + CalculatedEventProjection, + StaticProjection, + ], + ] + version: str + """ + The version of the projection spec. Can specify the version of an external specification. + """ -@add_extra_schema(RUN_START_EXTRA_SCHEMA) class RunStart(TypedDict): """ Document created at the start of run. Provides a seach target and later documents link to it """ - data_groups: NotRequired[ - Annotated[ - List[str], - Field( - description="An optional list of data access groups that have meaning " - "to some external system. Examples might include facility, beamline, " - "end stations, proposal, safety form.", - ), - ] - ] - data_session: NotRequired[ - Annotated[ - str, - Field( - description="An optional field for grouping runs. The meaning is " - "not mandated, but this is a data management grouping and not a " - "scientific grouping. It is intended to group runs in a visit or " - "set of trials.", - ), - ] - ] - data_type: NotRequired[Annotated[DataType, Field(description="")]] - group: NotRequired[ - Annotated[str, Field(description="Unix group to associate this data with")] - ] - hints: NotRequired[Annotated[Hints, Field(description="Start-level hints")]] - owner: NotRequired[ - Annotated[str, Field(description="Unix owner to associate this data with")] - ] - project: NotRequired[ - Annotated[str, Field(description="Name of project that this run is part of")] - ] - projections: NotRequired[Annotated[List[Projections], Field(description="")]] - sample: NotRequired[ - Annotated[ - Union[Dict[str, Any], str], - Field( - description="Information about the sample, may be a UID to " - "another collection" - ), - ] - ] - scan_id: NotRequired[ - Annotated[int, Field(description="Scan ID number, not globally unique")] - ] - time: Annotated[float, Field(description="Time the run started. Unix epoch time")] - uid: Annotated[str, Field(description="Globally unique ID for this run")] + data_groups: NotRequired[List[str]] + """ + An optional list of data access groups that have meaning to some external system. Examples might include facility, beamline, end stations, proposal, safety form. + """ + data_session: NotRequired[str] + """ + An optional field for grouping runs. The meaning is not mandated, but this is a data management grouping and not a scientific grouping. It is intended to group runs in a visit or set of trials. + """ + data_type: NotRequired[DataType] + group: NotRequired[str] + """ + Unix group to associate this data with + """ + hints: NotRequired[Hints] + owner: NotRequired[str] + """ + Unix owner to associate this data with + """ + project: NotRequired[str] + """ + Name of project that this run is part of + """ + projections: NotRequired[List[Projections]] + sample: NotRequired[Union[Dict[str, Any], str]] + """ + Information about the sample, may be a UID to another collection + """ + scan_id: NotRequired[int] + """ + Scan ID number, not globally unique + """ + time: float + """ + Time the run started. Unix epoch time + """ + uid: str + """ + Globally unique ID for this run + """ diff --git a/src/event_model/documents/run_stop.py b/src/event_model/documents/run_stop.py index 14ae19bd4..9bc390c8c 100644 --- a/src/event_model/documents/run_stop.py +++ b/src/event_model/documents/run_stop.py @@ -1,44 +1,47 @@ -from typing import Dict +# ruff: noqa +# generated by datamodel-codegen: +# filename: run_stop.json -from typing_extensions import Annotated, Literal, NotRequired, TypedDict +from __future__ import annotations -from .generate.type_wrapper import DataType, Field, add_extra_schema +from typing import Any, Dict, Literal, TypedDict -RUN_STOP_EXTRA_SCHEMA = { - "patternProperties": {"^([^./]+)$": {"$ref": "#/$defs/DataType"}}, - "additionalProperties": False, -} +from typing_extensions import NotRequired + +DataType = Any -@add_extra_schema(RUN_STOP_EXTRA_SCHEMA) class RunStop(TypedDict): """ Document for the end of a run indicating the success/fail state of the run and the end time """ - data_type: NotRequired[Annotated[DataType, Field(description="data_type")]] - exit_status: Annotated[ - Literal["success", "abort", "fail"], - Field(description="State of the run when it ended"), - ] - num_events: NotRequired[ - Annotated[ - Dict[str, int], - Field( - description="Number of Events per named stream", - ), - ] - ] - reason: NotRequired[ - Annotated[str, Field(description="Long-form description of why the run ended")] - ] - run_start: Annotated[ - str, - Field( - description="Reference back to the run_start document that this document " - "is paired with.", - ), - ] - time: Annotated[float, Field(description="The time the run ended. Unix epoch")] - uid: Annotated[str, Field(description="Globally unique ID for this document")] + data_type: NotRequired[DataType] + """ + data_type + """ + exit_status: Literal["success", "abort", "fail"] + """ + State of the run when it ended + """ + num_events: NotRequired[Dict[str, int]] + """ + Number of Events per named stream + """ + reason: NotRequired[str] + """ + Long-form description of why the run ended + """ + run_start: str + """ + Reference back to the run_start document that this document is paired with. + """ + time: float + """ + The time the run ended. Unix epoch + """ + uid: str + """ + Globally unique ID for this document + """ diff --git a/src/event_model/documents/stream_datum.py b/src/event_model/documents/stream_datum.py index a48de7aba..4fc62e631 100644 --- a/src/event_model/documents/stream_datum.py +++ b/src/event_model/documents/stream_datum.py @@ -1,56 +1,49 @@ -from typing_extensions import Annotated, TypedDict +# ruff: noqa +# generated by datamodel-codegen: +# filename: stream_datum.json -from .generate.type_wrapper import Field, add_extra_schema +from __future__ import annotations +from typing import TypedDict -class StreamRange(TypedDict): - """The parameters required to describe a sequence of incrementing integers""" - - start: Annotated[ - int, - Field(description="First number in the range"), - ] - stop: Annotated[ - int, - Field(description="Last number in the range is less than this number"), - ] +class StreamRange(TypedDict): + """ + The parameters required to describe a sequence of incrementing integers + """ -STREAM_DATUM_EXTRA_SCHEMA = {"additionalProperties": False} + start: int + """ + First number in the range + """ + stop: int + """ + Last number in the range is less than this number + """ -@add_extra_schema(STREAM_DATUM_EXTRA_SCHEMA) class StreamDatum(TypedDict): - """Document to reference a quanta of an externally-stored stream of data.""" - - descriptor: Annotated[ - str, - Field(description="UID of the EventDescriptor to " "which this Datum belongs"), - ] - stream_resource: Annotated[ - str, - Field( - description="The UID of the Stream Resource to which this Datum belongs." - ), - ] - uid: Annotated[ - str, - Field( - description="Globally unique identifier for this Datum. A suggested " - "formatting being '//" - ), - ] - seq_nums: Annotated[ - StreamRange, - Field( - description="A slice object showing the Event numbers the " - "resource corresponds to" - ), - ] - indices: Annotated[ - StreamRange, - Field( - description="A slice object passed to the StreamResource " - "handler so it can hand back data and timestamps" - ), - ] + """ + Document to reference a quanta of an externally-stored stream of data. + """ + + descriptor: str + """ + UID of the EventDescriptor to which this Datum belongs + """ + indices: StreamRange + """ + A slice object passed to the StreamResource handler so it can hand back data and timestamps + """ + seq_nums: StreamRange + """ + A slice object showing the Event numbers the resource corresponds to + """ + stream_resource: str + """ + The UID of the Stream Resource to which this Datum belongs. + """ + uid: str + """ + Globally unique identifier for this Datum. A suggested formatting being '// + """ diff --git a/src/event_model/documents/stream_resource.py b/src/event_model/documents/stream_resource.py index e675e5810..aca50f539 100644 --- a/src/event_model/documents/stream_resource.py +++ b/src/event_model/documents/stream_resource.py @@ -1,50 +1,41 @@ -from typing import Any, Dict +# ruff: noqa +# generated by datamodel-codegen: +# filename: stream_resource.json -from typing_extensions import Annotated, NotRequired, TypedDict +from __future__ import annotations -from .generate.type_wrapper import Field, add_extra_schema +from typing import Any, Dict, TypedDict -STREAM_RESOURCE_EXTRA_SCHEMA = {"additionalProperties": False} +from typing_extensions import NotRequired -@add_extra_schema(STREAM_RESOURCE_EXTRA_SCHEMA) class StreamResource(TypedDict): """ Document to reference a collection (e.g. file or group of files) of externally-stored data streams """ - data_key: Annotated[ - str, - Field( - description="A string to show which data_key of the " - "Descriptor are being streamed" - ), - ] - parameters: Annotated[ - Dict[str, Any], - Field( - description="Additional keyword arguments to pass to the Handler to read a " - "Stream Resource", - ), - ] - uri: Annotated[str, Field(description="URI for locating this resource")] - run_start: NotRequired[ - Annotated[ - str, - Field( - description="Globally unique ID to the run_start document " - "this Stream Resource is associated with.", - ), - ] - ] - mimetype: Annotated[ - str, - Field( - description="String identifying the format/type of this Stream Resource, " - "used to identify a compatible Handler", - ), - ] - uid: Annotated[ - str, Field(description="Globally unique identifier for this Stream Resource") - ] + data_key: str + """ + A string to show which data_key of the Descriptor are being streamed + """ + mimetype: str + """ + String identifying the format/type of this Stream Resource, used to identify a compatible Handler + """ + parameters: Dict[str, Any] + """ + Additional keyword arguments to pass to the Handler to read a Stream Resource + """ + run_start: NotRequired[str] + """ + Globally unique ID to the run_start document this Stream Resource is associated with. + """ + uid: str + """ + Globally unique identifier for this Stream Resource + """ + uri: str + """ + URI for locating this resource + """ diff --git a/src/event_model/tests/__init__.py b/src/event_model/generate/__init__.py similarity index 100% rename from src/event_model/tests/__init__.py rename to src/event_model/generate/__init__.py diff --git a/src/event_model/generate/__main__.py b/src/event_model/generate/__main__.py new file mode 100644 index 000000000..1aa720734 --- /dev/null +++ b/src/event_model/generate/__main__.py @@ -0,0 +1,4 @@ +from event_model.generate.create_documents import generate + +if __name__ == "__main__": + generate() diff --git a/src/event_model/generate/create_documents.py b/src/event_model/generate/create_documents.py new file mode 100644 index 000000000..6e921593d --- /dev/null +++ b/src/event_model/generate/create_documents.py @@ -0,0 +1,270 @@ +import json +from collections import OrderedDict +from pathlib import Path +from typing import Any, Dict, Type, cast + +import datamodel_code_generator +from pydantic import BaseModel +from pydantic.alias_generators import to_snake +from pydantic.json_schema import GenerateJsonSchema + +from event_model.basemodels import ALL_BASEMODELS + +JSONSCHEMA = Path(__file__).parent.parent / "schemas" +TYPEDDICTS = Path(__file__).parent.parent / "documents" +TEMPLATES = Path(__file__).parent / "templates" + + +class SnakeCaseTitleField(GenerateJsonSchema): + def generate(self, schema, mode="validation"): + jsonschema = super().generate(schema, mode=mode) + jsonschema["title"] = to_snake(jsonschema["title"]) + + for key, property in jsonschema.get("properties", {}).items(): + property["title"] = to_snake(key) + for definition in jsonschema.get("$defs", {}).values(): + definition["title"] = to_snake(definition["title"]) + for key, property in definition.get("properties", {}).items(): + property["title"] = to_snake(key) + return jsonschema + + +def snake_to_title(snake: str) -> str: + return snake.title().replace("_", "") + + +# Used to add user written schema to autogenerated schema. +def merge_dicts(dict1: dict, dict2: dict) -> dict: + return_dict = dict2.copy() + + for key in dict1: + if key not in dict2: + return_dict[key] = dict1[key] + + elif not isinstance(dict1[key], type(dict2[key])): + return_dict[key] = dict1[key] + + elif isinstance(dict1[key], dict): + return_dict[key] = merge_dicts(dict1[key], dict2[key]) + + elif isinstance(dict1[key], list): + return_dict[key] = dict1[key] + dict2[key] + + return return_dict + + +def sort_alphabetically(schema: Dict) -> Dict: + """Sorts the schema alphabetically by key name, exchanging the + properties dicts for OrderedDicts""" + schema = OrderedDict(sorted(schema.items(), key=lambda x: x[0])) + + return schema + + +SortOrder = { + "title": 0, + "description": 1, + "type": 2, + "allOf": 3, + "$defs": 4, + "properties": 5, + "required": 6, + "patternProperties": 7, + "additionalProperties": 8, + "unevaluatedProperties": 9, +} + + +def sort_schema(document_schema: Dict[str, Any]) -> Dict[str, Any]: + assert isinstance(document_schema, dict) + document_schema = OrderedDict( + sorted( + document_schema.items(), + key=lambda x: SortOrder.get(x[0], len(SortOrder)), + ) + ) + + for key in document_schema: + if key in ("$defs", "properties", "required"): + if isinstance(document_schema[key], dict): + document_schema[key] = sort_alphabetically(document_schema[key]) + for key2 in document_schema[key]: + if isinstance(document_schema[key][key2], dict): + document_schema[key][key2] = sort_schema( + document_schema[key][key2] + ) + elif isinstance(document_schema[key], list): + document_schema[key].sort() + + return document_schema + + +def dump_json(schema: Dict[str, Any], jsonschema_path: Path): + """Returns true if the basemodel had to change, false otherwise""" + sorted_schema = sort_schema(schema) + with jsonschema_path.open(mode="w") as f: + json.dump(sorted_schema, f, indent=4) + f.write("\n") + return True + + +def generate_typeddict(jsonschema_path: Path, documents_root=TYPEDDICTS) -> Path: + document_path = documents_root / f"{jsonschema_path.stem}.py" + datamodel_code_generator.generate( + input_=jsonschema_path, + input_file_type=datamodel_code_generator.InputFileType.JsonSchema, + output=document_path, + output_model_type=datamodel_code_generator.DataModelType.TypingTypedDict, + target_python_version=datamodel_code_generator.PythonVersion.PY_38, + use_schema_description=True, + use_field_description=True, + use_annotated=True, + field_constraints=True, + wrap_string_literal=True, + use_double_quotes=True, + disable_timestamp=True, + custom_template_dir=TEMPLATES, + ) + with document_path.open("r+") as f: + content = f.read() + f.seek(0, 0) + f.write("# ruff: noqa\n" + content) + return document_path + + +def get_jsonschema_path(jsonschema: Dict, root=JSONSCHEMA) -> Path: + return root / f"{to_snake(jsonschema['title'])}.json" + + +def _generate_jsonschema( + basemodel: Type[BaseModel], + jsonschema_root=JSONSCHEMA, + is_parent: bool = False, +) -> Dict[str, Any]: + refs = [] + + for parent in [parent for parent in basemodel.__bases__ if parent is not BaseModel]: + assert issubclass( + parent, BaseModel + ) # Parents of BaseModel's can only be other BaseModel + parent_jsonschema = _generate_jsonschema( + parent, + jsonschema_root=jsonschema_root, + is_parent=True, + ) + refs.append(parent_jsonschema) + + schema_extra: Dict[str, Any] = cast( + Dict[str, Any], basemodel.model_config.pop("json_schema_extra", {}) + ) + model_jsonschema = basemodel.model_json_schema(schema_generator=SnakeCaseTitleField) + model_jsonschema = merge_dicts(model_jsonschema, schema_extra) + + additional_properties = ( + model_jsonschema.pop("additionalProperties", None) + if is_parent or refs + else None + ) + + if refs: + if additional_properties is not None and not is_parent: + model_jsonschema["unevaluatedProperties"] = additional_properties + + for ref in refs: + for property in ref["properties"]: + model_jsonschema["properties"].pop(property, None) + if model_jsonschema.get("$defs", None) is None: + model_jsonschema["$defs"] = {} + model_jsonschema["$defs"][snake_to_title(ref["title"])] = ref + + if model_jsonschema.get("required", None) is not None: + for ref_property in ref["properties"]: + if ref_property in model_jsonschema["required"]: + model_jsonschema["required"].remove(ref_property) + + model_jsonschema["allOf"] = [ + {"$ref": f"#/$defs/{snake_to_title(ref['title'])}"} for ref in refs + ] + + return model_jsonschema + + +def generate_jsonschema( + basemodel: Type[BaseModel], + jsonschema_root=JSONSCHEMA, +) -> Path: + model_jsonschema = _generate_jsonschema(basemodel, jsonschema_root=jsonschema_root) + jsonschema_path = get_jsonschema_path(model_jsonschema, root=jsonschema_root) + dump_json(model_jsonschema, jsonschema_path) + + return jsonschema_path + + +GENERATED_INIT_PY = """# generated in `event_model/generate` + +from typing import Tuple, Type, Union + +{0} + +{1}Type = Union[ +{2} +] + +ALL_{3}: Tuple[{1}Type, ...] = ( +{4} +)""" + + +def generate_init_py(output_root: Path): + document_names = sorted( + [ + file.stem + for file in output_root.iterdir() + if file.stem != "__init__" and file.suffix == ".py" + ] + ) + + document_class_names = [ + f"{snake_to_title(document_name)}" for document_name in document_names + ] + + init_py_imports = "\n".join( + sorted( + [ + f"from .{document_name} import * # noqa: F403" + for document_name in document_names + ] + ) + ) + + document_types = "\n".join( + [ + f" Type[{class_name}], # noqa: F405," + for class_name in document_class_names + ] + ) + + all_documents = "\n".join( + [f" {class_name}, # noqa: F405" for class_name in document_class_names] + ) + + init_py = GENERATED_INIT_PY.format( + init_py_imports, + output_root.name.rstrip("s").title(), + document_types, + output_root.name.upper(), + all_documents, + ) + + with open(output_root / "__init__.py", "w") as f: + f.write(init_py + "\n") + + +def generate(jsonschema_root: Path = JSONSCHEMA, documents_root: Path = TYPEDDICTS): + for basemodel in ALL_BASEMODELS: + generate_jsonschema(basemodel, jsonschema_root=jsonschema_root) + for jsonschema_path in jsonschema_root.iterdir(): + if not jsonschema_path.name.startswith("bulk"): + generate_typeddict(jsonschema_path, documents_root=documents_root) + + generate_init_py(documents_root) diff --git a/src/event_model/generate/templates/root.jinja2 b/src/event_model/generate/templates/root.jinja2 new file mode 100644 index 000000000..35ee1f1aa --- /dev/null +++ b/src/event_model/generate/templates/root.jinja2 @@ -0,0 +1,8 @@ +{%- set field = fields[0] %} +{%- if class_name == 'DataType' %} +{{ class_name }} = Any +{%- elif field.annotated %} +{{ class_name }} = {{ field.annotated }} +{%- else %} +{{ class_name }} = {{ field.type_hint }} +{%- endif %} diff --git a/src/event_model/schemas/datum.json b/src/event_model/schemas/datum.json index a85c6cf80..987962454 100644 --- a/src/event_model/schemas/datum.json +++ b/src/event_model/schemas/datum.json @@ -4,17 +4,17 @@ "type": "object", "properties": { "datum_id": { - "title": "Datum Id", + "title": "datum_id", "description": "Globally unique identifier for this Datum (akin to 'uid' for other Document types), typically formatted as '/'", "type": "string" }, "datum_kwargs": { - "title": "Datum Kwargs", + "title": "datum_kwargs", "description": "Arguments to pass to the Handler to retrieve one quanta of data", "type": "object" }, "resource": { - "title": "Resource", + "title": "resource", "description": "The UID of the Resource to which this Datum belongs", "type": "string" } diff --git a/src/event_model/schemas/datum_page.json b/src/event_model/schemas/datum_page.json index 768adb26f..3148512fb 100644 --- a/src/event_model/schemas/datum_page.json +++ b/src/event_model/schemas/datum_page.json @@ -4,7 +4,7 @@ "type": "object", "$defs": { "DataFrameForDatumPage": { - "title": "DataFrameForDatumPage", + "title": "data_frame_for_datum_page", "type": "array", "items": { "type": "string" @@ -13,11 +13,12 @@ }, "properties": { "datum_id": { + "title": "datum_id", "description": "Array unique identifiers for each Datum (akin to 'uid' for other Document types), typically formatted as '/'", "$ref": "#/$defs/DataFrameForDatumPage" }, "datum_kwargs": { - "title": "Datum Kwargs", + "title": "datum_kwargs", "description": "Array of arguments to pass to the Handler to retrieve one quanta of data", "type": "object", "additionalProperties": { @@ -26,7 +27,7 @@ } }, "resource": { - "title": "Resource", + "title": "resource", "description": "The UID of the Resource to which all Datums in the page belong", "type": "string" } diff --git a/src/event_model/schemas/event.json b/src/event_model/schemas/event.json index af2f6eff4..22b64d4af 100644 --- a/src/event_model/schemas/event.json +++ b/src/event_model/schemas/event.json @@ -2,60 +2,75 @@ "title": "event", "description": "Document to record a quanta of collected data", "type": "object", + "allOf": [ + { + "$ref": "#/$defs/PartialEvent" + } + ], + "$defs": { + "PartialEvent": { + "title": "partial_event", + "type": "object", + "properties": { + "data": { + "title": "data", + "description": "The actual measurement data", + "type": "object" + }, + "filled": { + "title": "filled", + "description": "Mapping each of the keys of externally-stored data to the boolean False, indicating that the data has not been loaded, or to foreign keys (moved here from 'data' when the data was loaded)", + "type": "object", + "additionalProperties": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ] + } + }, + "time": { + "title": "time", + "description": "The event time. This maybe different than the timestamps on each of the data entries.", + "type": "number" + }, + "timestamps": { + "title": "timestamps", + "description": "The timestamps of the individual measurement data", + "type": "object" + } + }, + "required": [ + "data", + "time", + "timestamps" + ] + } + }, "properties": { - "data": { - "title": "Data", - "description": "The actual measurement data", - "type": "object" - }, "descriptor": { - "title": "Descriptor", + "title": "descriptor", "description": "UID of the EventDescriptor to which this Event belongs", "type": "string" }, - "filled": { - "title": "Filled", - "description": "Mapping each of the keys of externally-stored data to the boolean False, indicating that the data has not been loaded, or to foreign keys (moved here from 'data' when the data was loaded)", - "type": "object", - "additionalProperties": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "string" - } - ] - } - }, "seq_num": { - "title": "Seq Num", + "title": "seq_num", "description": "Sequence number to identify the location of this Event in the Event stream", "type": "integer" }, - "time": { - "title": "Time", - "description": "The event time. This maybe different than the timestamps on each of the data entries.", - "type": "number" - }, - "timestamps": { - "title": "Timestamps", - "description": "The timestamps of the individual measurement data", - "type": "object" - }, "uid": { - "title": "Uid", + "title": "uid", "description": "Globally unique identifier for this Event", "type": "string" } }, "required": [ - "data", "descriptor", "seq_num", - "time", - "timestamps", "uid" ], - "additionalProperties": false + "unevaluatedProperties": false } diff --git a/src/event_model/schemas/event_descriptor.json b/src/event_model/schemas/event_descriptor.json index 02338d088..1db10d7f1 100644 --- a/src/event_model/schemas/event_descriptor.json +++ b/src/event_model/schemas/event_descriptor.json @@ -1,67 +1,66 @@ { "title": "event_descriptor", - "description": "Document to describe the data captured in the associated event documents", + "description": "Document to describe the data captured in the associated event\ndocuments", "type": "object", "$defs": { "Configuration": { - "title": "Configuration", + "title": "configuration", "type": "object", "properties": { "data": { - "title": "Data", + "title": "data", "description": "The actual measurement data", - "type": "object" + "type": "object", + "default": {} }, "data_keys": { - "title": "Data Keys", + "title": "data_keys", "description": "This describes the data stored alongside it in this configuration object.", "type": "object", "additionalProperties": { "$ref": "#/$defs/DataKey" - } + }, + "default": {} }, "timestamps": { - "title": "Timestamps", + "title": "timestamps", "description": "The timestamps of the individual measurement data", - "type": "object" + "type": "object", + "default": {} } - } + }, + "additionalProperties": true }, "DataKey": { - "title": "DataKey", + "title": "data_key", "description": "Describes the objects in the data property of Event documents", "type": "object", "properties": { "choices": { - "title": "Choices", + "title": "choices", "description": "Choices of enum value.", "type": "array", + "default": [], "items": { "type": "string" } }, "dims": { - "title": "Dims", + "title": "dims", "description": "The names for dimensions of the data. Null or empty list if scalar data", "type": "array", + "default": [], "items": { "type": "string" } }, "dtype": { - "title": "Dtype", + "title": "dtype", "description": "The type of the data in the event, given as a broad JSON schema type.", - "type": "string", - "enum": [ - "string", - "number", - "array", - "boolean", - "integer" - ] + "$ref": "#/$defs/Dtype" }, "dtype_numpy": { - "title": "Dtype Numpy", + "title": "dtype_numpy", "description": "The type of the data in the event, given as a numpy dtype string (or, for structured dtypes, array).", "anyOf": [ { @@ -87,25 +86,29 @@ }, "type": "array" } - ] + ], + "default": "" }, "external": { - "title": "External", + "title": "external", "description": "Where the data is stored if it is stored external to the events", "type": "string", + "default": "", "pattern": "^[A-Z]+:?" }, "limits": { + "title": "limits", "description": "Epics limits.", "$ref": "#/$defs/Limits" }, "object_name": { - "title": "Object Name", + "title": "object_name", "description": "The name of the object this key was pulled from.", - "type": "string" + "type": "string", + "default": "" }, "precision": { - "title": "Precision", + "title": "precision", "description": "Number of digits after decimal place if a floating point number", "anyOf": [ { @@ -114,10 +117,11 @@ { "type": "null" } - ] + ], + "default": null }, "shape": { - "title": "Shape", + "title": "shape", "description": "The shape of the data. Empty list indicates scalar data.", "type": "array", "items": { @@ -125,67 +129,136 @@ } }, "source": { - "title": "Source", + "title": "source", "description": "The source (ex piece of hardware) of the data.", "type": "string" }, "units": { - "title": "Units", + "title": "units", "description": "Engineering units of the value", - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] + "type": "string", + "default": "" } }, "required": [ "dtype", "shape", "source" + ], + "additionalProperties": true + }, + "DataType": { + "title": "DataType", + "patternProperties": { + "^([^./]+)$": { + "$ref": "#/$defs/DataType" + } + }, + "additionalProperties": false + }, + "Dtype": { + "title": "dtype", + "type": "string", + "enum": [ + "string", + "number", + "array", + "boolean", + "integer" ] }, "Limits": { - "title": "Limits", - "description": "Epics and tango limits:\nsee 3.4.1 https://epics.anl.gov/base/R3-14/12-docs/AppDevGuide/node4.html\nand\nhttps://tango-controls.readthedocs.io/en/latest/development/device-api/attribute-alarms.html", + "title": "limits", + "description": "Epics limits:\nsee 3.4.1 https://epics.anl.gov/base/R3-14/12-docs/AppDevGuide/node4.html", "type": "object", "properties": { "alarm": { + "title": "alarm", "description": "Alarm limits.", - "$ref": "#/$defs/LimitsRange" + "anyOf": [ + { + "$ref": "#/$defs/LimitsRange" + }, + { + "type": "null" + } + ], + "default": null }, "control": { + "title": "control", "description": "Control limits.", - "$ref": "#/$defs/LimitsRange" + "anyOf": [ + { + "$ref": "#/$defs/LimitsRange" + }, + { + "type": "null" + } + ], + "default": null }, "display": { + "title": "display", "description": "Display limits.", - "$ref": "#/$defs/LimitsRange" + "anyOf": [ + { + "$ref": "#/$defs/LimitsRange" + }, + { + "type": "null" + } + ], + "default": null }, "hysteresis": { - "title": "Hysteresis", + "title": "hysteresis", "description": "Hysteresis.", - "type": "number" + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null }, "rds": { + "title": "rds", "description": "RDS parameters.", - "$ref": "#/$defs/RdsRange" + "anyOf": [ + { + "$ref": "#/$defs/RdsRange" + }, + { + "type": "null" + } + ], + "default": null }, "warning": { + "title": "warning", "description": "Warning limits.", - "$ref": "#/$defs/LimitsRange" + "anyOf": [ + { + "$ref": "#/$defs/LimitsRange" + }, + { + "type": "null" + } + ], + "default": null } - } + }, + "additionalProperties": false }, "LimitsRange": { - "title": "LimitsRange", + "title": "limits_range", "type": "object", "properties": { "high": { - "title": "High", + "title": "high", "anyOf": [ { "type": "number" @@ -196,7 +269,7 @@ ] }, "low": { - "title": "Low", + "title": "low", "anyOf": [ { "type": "number" @@ -210,67 +283,64 @@ "required": [ "high", "low" - ] + ], + "additionalProperties": false }, "PerObjectHint": { - "title": "PerObjectHint", + "title": "per_object_hint", "description": "The 'interesting' data keys for this device.", "type": "object", "properties": { "NX_class": { - "title": "Nx Class", + "title": "nx_class", "description": "The NeXus class definition for this device.", "type": "string", + "default": "", "pattern": "^NX[A-Za-z_]+$" }, "fields": { - "title": "Fields", + "title": "fields", "description": "The 'interesting' data keys for this device.", "type": "array", + "default": [], "items": { "type": "string" } } - } + }, + "additionalProperties": true }, "RdsRange": { - "title": "RdsRange", + "title": "rds_range", "description": "RDS (Read different than set) parameters range.\n\n\nhttps://tango-controls.readthedocs.io/en/latest/development/device-api/attribute-alarms.html#the-read-different-than-set-rds-alarm", "type": "object", "properties": { "time_difference": { - "title": "Time Difference", + "title": "time_difference", "description": "ms since last update to fail after if set point and read point are not within `value_difference` of each other.", "type": "number" }, "value_difference": { - "title": "Value Difference", + "title": "value_difference", "description": "Allowed difference in value between set point and read point after `time_difference`.", "type": "number" } }, "required": [ - "time_difference" + "time_difference", + "value_difference" ] - }, - "DataType": { - "title": "DataType", - "patternProperties": { - "^([^./]+)$": { - "$ref": "#/$defs/DataType" - } - }, - "additionalProperties": false } }, "properties": { "configuration": { - "title": "Configuration", + "title": "configuration", "description": "Readings of configurational fields necessary for interpreting data in the Events.", "type": "object", "additionalProperties": { "$ref": "#/$defs/Configuration" - } + }, + "default": {} }, "data_keys": { "title": "data_keys", @@ -281,25 +351,28 @@ } }, "hints": { + "title": "hints", "$ref": "#/$defs/PerObjectHint" }, "name": { - "title": "Name", + "title": "name", "description": "A human-friendly name for this data stream, such as 'primary' or 'baseline'.", - "type": "string" + "type": "string", + "default": "" }, "object_keys": { - "title": "Object Keys", + "title": "object_keys", "description": "Maps a Device/Signal name to the names of the entries it produces in data_keys.", - "type": "object" + "type": "object", + "default": {} }, "run_start": { - "title": "Run Start", + "title": "run_start", "description": "Globally unique ID of this run's 'start' document.", "type": "string" }, "time": { - "title": "Time", + "title": "time", "description": "Creation time of the document as unix epoch time.", "type": "number" }, diff --git a/src/event_model/schemas/event_page.json b/src/event_model/schemas/event_page.json index 0469c6074..4f384d7c0 100644 --- a/src/event_model/schemas/event_page.json +++ b/src/event_model/schemas/event_page.json @@ -2,69 +2,85 @@ "title": "event_page", "description": "Page of documents to record a quanta of collected data", "type": "object", + "allOf": [ + { + "$ref": "#/$defs/PartialEventPage" + } + ], "$defs": { - "DataFrameForEventPage": { - "title": "DataFrameForEventPage", - "type": "object", - "additionalProperties": { - "items": {}, - "type": "array" - } - }, - "DataFrameForFilled": { - "title": "DataFrameForFilled", + "PartialEventPage": { + "title": "partial_event_page", "type": "object", - "additionalProperties": { - "items": { - "anyOf": [ - { - "type": "boolean" + "properties": { + "data": { + "title": "data", + "description": "The actual measurement data", + "type": "object", + "additionalProperties": { + "items": {}, + "type": "array" + } + }, + "filled": { + "title": "filled", + "description": "Mapping each of the keys of externally-stored data to an array containing the boolean False, indicating that the data has not been loaded, or to foreign keys (moved here from 'data' when the data was loaded)", + "type": "object", + "additionalProperties": { + "items": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ] }, - { - "type": "string" - } - ] + "type": "array" + }, + "default": {} }, - "type": "array" - } + "time": { + "title": "time", + "description": "Array of Event times. This maybe different than the timestamps on each of the data entries", + "type": "array", + "items": { + "type": "number" + } + }, + "timestamps": { + "title": "timestamps", + "description": "The timestamps of the individual measurement data", + "type": "object", + "additionalProperties": { + "items": {}, + "type": "array" + } + } + }, + "required": [ + "data", + "time", + "timestamps" + ] } }, "properties": { - "data": { - "description": "The actual measurement data", - "$ref": "#/$defs/DataFrameForEventPage" - }, "descriptor": { - "title": "Descriptor", + "title": "descriptor", "description": "The UID of the EventDescriptor to which all of the Events in this page belong", "type": "string" }, - "filled": { - "description": "Mapping each of the keys of externally-stored data to an array containing the boolean False, indicating that the data has not been loaded, or to foreign keys (moved here from 'data' when the data was loaded)", - "$ref": "#/$defs/DataFrameForFilled" - }, "seq_num": { - "title": "Seq Num", + "title": "seq_num", "description": "Array of sequence numbers to identify the location of each Event in the Event stream", "type": "array", "items": { "type": "integer" } }, - "time": { - "title": "Time", - "description": "Array of Event times. This maybe different than the timestamps on each of the data entries", - "type": "array", - "items": { - "type": "number" - } - }, - "timestamps": { - "description": "The timestamps of the individual measurement data", - "$ref": "#/$defs/DataFrameForEventPage" - }, "uid": { - "title": "Uid", + "title": "uid", "description": "Array of globally unique identifiers for each Event", "type": "array", "items": { @@ -73,12 +89,9 @@ } }, "required": [ - "data", "descriptor", "seq_num", - "time", - "timestamps", "uid" ], - "additionalProperties": false + "unevaluatedProperties": false } diff --git a/src/event_model/schemas/resource.json b/src/event_model/schemas/resource.json index 1a3b6d9d1..476ce10c4 100644 --- a/src/event_model/schemas/resource.json +++ b/src/event_model/schemas/resource.json @@ -1,54 +1,70 @@ { "title": "resource", - "description": "Document to reference a collection (e.g. file or group of files) of externally-stored data", + "description": "Document to reference a collection (e.g. file or group of files) of\nexternally-stored data", "type": "object", + "allOf": [ + { + "$ref": "#/$defs/PartialResource" + } + ], + "$defs": { + "PartialResource": { + "title": "partial_resource", + "type": "object", + "properties": { + "resource_kwargs": { + "title": "resource_kwargs", + "description": "Additional argument to pass to the Handler to read a Resource", + "type": "object" + }, + "resource_path": { + "title": "resource_path", + "description": "Filepath or URI for locating this resource", + "type": "string" + }, + "root": { + "title": "root", + "description": "Subset of resource_path that is a local detail, not semantic.", + "type": "string" + }, + "spec": { + "title": "spec", + "description": "String identifying the format/type of this Resource, used to identify a compatible Handler", + "type": "string" + }, + "uid": { + "title": "uid", + "description": "Globally unique identifier for this Resource", + "type": "string" + } + }, + "required": [ + "resource_kwargs", + "resource_path", + "root", + "spec", + "uid" + ] + } + }, "properties": { "path_semantics": { - "title": "Path Semantics", + "title": "path_semantics", "description": "Rules for joining paths", "type": "string", + "default": "posix", "enum": [ "posix", "windows" ] }, - "resource_kwargs": { - "title": "Resource Kwargs", - "description": "Additional argument to pass to the Handler to read a Resource", - "type": "object" - }, - "resource_path": { - "title": "Resource Path", - "description": "Filepath or URI for locating this resource", - "type": "string" - }, - "root": { - "title": "Root", - "description": "Subset of resource_path that is a local detail, not semantic.", - "type": "string" - }, "run_start": { - "title": "Run Start", + "title": "run_start", "description": "Globally unique ID to the run_start document this resource is associated with.", - "type": "string" - }, - "spec": { - "title": "Spec", - "description": "String identifying the format/type of this Resource, used to identify a compatible Handler", - "type": "string" - }, - "uid": { - "title": "Uid", - "description": "Globally unique identifier for this Resource", - "type": "string" + "type": "string", + "default": "" } }, - "required": [ - "resource_kwargs", - "resource_path", - "root", - "spec", - "uid" - ], - "additionalProperties": false + "required": [], + "unevaluatedProperties": false } diff --git a/src/event_model/schemas/run_start.json b/src/event_model/schemas/run_start.json index d698d5cc7..06db5532f 100644 --- a/src/event_model/schemas/run_start.json +++ b/src/event_model/schemas/run_start.json @@ -1,50 +1,135 @@ { "title": "run_start", - "description": "Document created at the start of run. Provides a seach target and later documents link to it", + "description": "Document created at the start of run. Provides a seach target and\nlater documents link to it", "type": "object", "$defs": { + "CalculatedEventProjection": { + "title": "calculated_event_projection", + "type": "object", + "properties": { + "calculation": { + "title": "calculation", + "description": "required fields if type is calculated", + "$ref": "#/$defs/Calculation" + }, + "field": { + "title": "field", + "type": "string" + }, + "location": { + "title": "location", + "description": "Projection comes and event", + "type": "string", + "const": "event" + }, + "stream": { + "title": "stream", + "type": "string" + }, + "type": { + "title": "type", + "description": "Projection is of type calculated, a value that requires calculation.", + "type": "string", + "const": "calculated" + } + }, + "required": [ + "calculation", + "field", + "location", + "stream", + "type" + ] + }, "Calculation": { - "title": "Calculation", + "title": "calculation", "type": "object", "properties": { "args": { - "title": "Args", + "title": "args", "type": "array", + "default": [], "items": {} }, "callable": { - "title": "Callable", + "title": "callable", "description": "callable function to perform calculation", "type": "string" }, "kwargs": { - "title": "Kwargs", + "title": "kwargs", "description": "kwargs for calcalation callable", - "type": "object" + "type": "object", + "default": {} } }, "required": [ "callable" + ], + "additionalProperties": true + }, + "ConfigurationProjection": { + "title": "configuration_projection", + "type": "object", + "properties": { + "config_device": { + "title": "config_device", + "type": "string" + }, + "config_index": { + "title": "config_index", + "type": "integer" + }, + "field": { + "title": "field", + "type": "string" + }, + "location": { + "title": "location", + "description": "Projection comes from configuration fields in the event_descriptor document", + "type": "string", + "const": "configuration" + }, + "stream": { + "title": "stream", + "type": "string" + }, + "type": { + "title": "type", + "description": "Projection is of type linked, a value linked from the data set.", + "type": "string", + "const": "linked" + } + }, + "required": [ + "config_device", + "config_index", + "field", + "location", + "stream", + "type" ] }, "DataType": { - "title": "DataType", + "title": "data_type", "patternProperties": { "^([^./]+)$": { "$ref": "#/$defs/DataType" } }, - "additionalProperties": false + "additionalProperties": false, + "default": null }, "Hints": { - "title": "Hints", + "title": "hints", "description": "Start-level hints", "type": "object", "properties": { "dimensions": { - "title": "Dimensions", + "title": "dimensions", "description": "The independent axes of the experiment. Ordered slow to fast", "type": "array", + "default": [], "items": { "items": { "anyOf": [ @@ -62,202 +147,80 @@ "type": "array" } } - } + }, + "additionalProperties": true }, - "Projection": { - "title": "Projection", - "description": "Where to get the data from", + "LinkedEventProjection": { + "title": "linked_event_projection", "type": "object", "properties": { - "calculation": { - "title": "calculation properties", - "description": "required fields if type is calculated", - "$ref": "#/$defs/Calculation" - }, - "config_device": { - "title": "Config Device", - "type": "string" - }, - "config_index": { - "title": "Config Index", - "type": "integer" - }, "field": { - "title": "Field", + "title": "field", "type": "string" }, "location": { - "title": "Location", - "description": "start comes from metadata fields in the start document, event comes from event, configuration comes from configuration fields in the event_descriptor document", + "title": "location", + "description": "Projection comes and event", "type": "string", - "enum": [ - "start", - "event", - "configuration" - ] + "const": "event" }, "stream": { - "title": "Stream", + "title": "stream", "type": "string" }, "type": { - "title": "Type", - "description": "linked: a value linked from the data set, calculated: a value that requires calculation, static: a value defined here in the projection ", + "title": "type", + "description": "Projection is of type linked, a value linked from the data set.", "type": "string", - "enum": [ - "linked", - "calculated", - "static" - ] - }, - "value": { - "title": "Value", - "description": "value explicitely defined in the projection when type==static." + "const": "linked" } }, - "allOf": [ - { - "if": { - "allOf": [ - { - "properties": { - "location": { - "enum": [ - "configuration" - ] - } - } - }, - { - "properties": { - "type": { - "enum": [ - "linked" - ] - } - } - } - ] - }, - "then": { - "required": [ - "type", - "location", - "config_index", - "config_device", - "field", - "stream" - ] - } - }, - { - "if": { - "allOf": [ - { - "properties": { - "location": { - "enum": [ - "event" - ] - } - } - }, - { - "properties": { - "type": { - "enum": [ - "linked" - ] - } - } - } - ] - }, - "then": { - "required": [ - "type", - "location", - "field", - "stream" - ] - } - }, - { - "if": { - "allOf": [ - { - "properties": { - "location": { - "enum": [ - "event" - ] - } - } - }, - { - "properties": { - "type": { - "enum": [ - "calculated" - ] - } - } - } - ] - }, - "then": { - "required": [ - "type", - "field", - "stream", - "calculation" - ] - } - }, - { - "if": { - "properties": { - "type": { - "enum": [ - "static" - ] - } - } - }, - "then": { - "required": [ - "type", - "value" - ] - } - } + "required": [ + "field", + "location", + "stream", + "type" ] }, "Projections": { - "title": "Projections", + "title": "projections", "description": "Describe how to interperet this run as the given projection", "type": "object", "properties": { "configuration": { - "title": "Configuration", + "title": "configuration", "description": "Static information about projection", "type": "object" }, "name": { - "title": "Name", + "title": "name", "description": "The name of the projection", - "type": "string" + "type": "string", + "default": "" }, "projection": { - "title": "Projection", + "title": "projection", "description": "", "type": "object", "additionalProperties": { - "$ref": "#/$defs/Projection" + "anyOf": [ + { + "$ref": "#/$defs/ConfigurationProjection" + }, + { + "$ref": "#/$defs/LinkedEventProjection" + }, + { + "$ref": "#/$defs/CalculatedEventProjection" + }, + { + "$ref": "#/$defs/StaticProjection" + } + ] } }, "version": { - "title": "Version", + "title": "version", "description": "The version of the projection spec. Can specify the version of an external specification.", "type": "string" } @@ -267,60 +230,88 @@ "projection", "version" ] + }, + "StaticProjection": { + "title": "static_projection", + "type": "object", + "properties": { + "type": { + "title": "type", + "description": "Projection is of type static, a value defined here in the projection", + "type": "string", + "const": "static" + }, + "value": { + "title": "value", + "description": "value explicitely defined in the static projection" + } + }, + "required": [ + "type", + "value" + ] } }, "properties": { "data_groups": { - "title": "Data Groups", + "title": "data_groups", "description": "An optional list of data access groups that have meaning to some external system. Examples might include facility, beamline, end stations, proposal, safety form.", "type": "array", + "default": [], "items": { "type": "string" } }, "data_session": { - "title": "Data Session", + "title": "data_session", "description": "An optional field for grouping runs. The meaning is not mandated, but this is a data management grouping and not a scientific grouping. It is intended to group runs in a visit or set of trials.", - "type": "string" + "type": "string", + "default": "" }, "data_type": { + "title": "data_type", "description": "", "$ref": "#/$defs/DataType" }, "group": { - "title": "Group", + "title": "group", "description": "Unix group to associate this data with", - "type": "string" + "type": "string", + "default": "" }, "hints": { - "$ref": "#/$defs/Hints", - "additionalProperties": false, + "title": "hints", "patternProperties": { "^([^.]+)$": { "$ref": "#/$defs/DataType" } - } + }, + "additionalProperties": false, + "$ref": "#/$defs/Hints" }, "owner": { - "title": "Owner", + "title": "owner", "description": "Unix owner to associate this data with", - "type": "string" + "type": "string", + "default": "" }, "project": { - "title": "Project", + "title": "project", "description": "Name of project that this run is part of", - "type": "string" + "type": "string", + "default": "" }, "projections": { - "title": "Projections", + "title": "projections", "description": "", "type": "array", + "default": [], "items": { "$ref": "#/$defs/Projections" } }, "sample": { - "title": "Sample", + "title": "sample", "description": "Information about the sample, may be a UID to another collection", "anyOf": [ { @@ -329,20 +320,22 @@ { "type": "string" } - ] + ], + "default": "" }, "scan_id": { - "title": "Scan Id", + "title": "scan_id", "description": "Scan ID number, not globally unique", - "type": "integer" + "type": "integer", + "default": 0 }, "time": { - "title": "Time", + "title": "time", "description": "Time the run started. Unix epoch time", "type": "number" }, "uid": { - "title": "Uid", + "title": "uid", "description": "Globally unique ID for this run", "type": "string" } diff --git a/src/event_model/schemas/run_stop.json b/src/event_model/schemas/run_stop.json index f401bcea3..59525dd23 100644 --- a/src/event_model/schemas/run_stop.json +++ b/src/event_model/schemas/run_stop.json @@ -1,19 +1,21 @@ { "title": "run_stop", - "description": "Document for the end of a run indicating the success/fail state of the run and the end time", + "description": "Document for the end of a run indicating the success/fail state of the\nrun and the end time", "type": "object", "$defs": { "DataType": { - "title": "DataType" + "title": "data_type", + "default": null } }, "properties": { "data_type": { + "title": "data_type", "description": "data_type", "$ref": "#/$defs/DataType" }, "exit_status": { - "title": "Exit Status", + "title": "exit_status", "description": "State of the run when it ended", "type": "string", "enum": [ @@ -23,30 +25,32 @@ ] }, "num_events": { - "title": "Num Events", + "title": "num_events", "description": "Number of Events per named stream", "type": "object", "additionalProperties": { "type": "integer" - } + }, + "default": {} }, "reason": { - "title": "Reason", + "title": "reason", "description": "Long-form description of why the run ended", - "type": "string" + "type": "string", + "default": "" }, "run_start": { - "title": "Run Start", + "title": "run_start", "description": "Reference back to the run_start document that this document is paired with.", "type": "string" }, "time": { - "title": "Time", + "title": "time", "description": "The time the run ended. Unix epoch", "type": "number" }, "uid": { - "title": "Uid", + "title": "uid", "description": "Globally unique ID for this document", "type": "string" } diff --git a/src/event_model/schemas/stream_datum.json b/src/event_model/schemas/stream_datum.json index 35204f6a6..b36770dd2 100644 --- a/src/event_model/schemas/stream_datum.json +++ b/src/event_model/schemas/stream_datum.json @@ -4,17 +4,17 @@ "type": "object", "$defs": { "StreamRange": { - "title": "StreamRange", + "title": "stream_range", "description": "The parameters required to describe a sequence of incrementing integers", "type": "object", "properties": { "start": { - "title": "Start", + "title": "start", "description": "First number in the range", "type": "integer" }, "stop": { - "title": "Stop", + "title": "stop", "description": "Last number in the range is less than this number", "type": "integer" } @@ -27,25 +27,27 @@ }, "properties": { "descriptor": { - "title": "Descriptor", + "title": "descriptor", "description": "UID of the EventDescriptor to which this Datum belongs", "type": "string" }, "indices": { + "title": "indices", "description": "A slice object passed to the StreamResource handler so it can hand back data and timestamps", "$ref": "#/$defs/StreamRange" }, "seq_nums": { + "title": "seq_nums", "description": "A slice object showing the Event numbers the resource corresponds to", "$ref": "#/$defs/StreamRange" }, "stream_resource": { - "title": "Stream Resource", + "title": "stream_resource", "description": "The UID of the Stream Resource to which this Datum belongs.", "type": "string" }, "uid": { - "title": "Uid", + "title": "uid", "description": "Globally unique identifier for this Datum. A suggested formatting being '//", "type": "string" } @@ -57,5 +59,5 @@ "stream_resource", "uid" ], - "additionalProperties": false + "additionalProperties": true } diff --git a/src/event_model/schemas/stream_resource.json b/src/event_model/schemas/stream_resource.json index 440768c30..0a622d31c 100644 --- a/src/event_model/schemas/stream_resource.json +++ b/src/event_model/schemas/stream_resource.json @@ -1,35 +1,36 @@ { "title": "stream_resource", - "description": "Document to reference a collection (e.g. file or group of files) of externally-stored data streams", + "description": "Document to reference a collection (e.g. file or group of files) of\nexternally-stored data streams", "type": "object", "properties": { "data_key": { - "title": "Data Key", + "title": "data_key", "description": "A string to show which data_key of the Descriptor are being streamed", "type": "string" }, "mimetype": { - "title": "Mimetype", + "title": "mimetype", "description": "String identifying the format/type of this Stream Resource, used to identify a compatible Handler", "type": "string" }, "parameters": { - "title": "Parameters", + "title": "parameters", "description": "Additional keyword arguments to pass to the Handler to read a Stream Resource", "type": "object" }, "run_start": { - "title": "Run Start", + "title": "run_start", "description": "Globally unique ID to the run_start document this Stream Resource is associated with.", - "type": "string" + "type": "string", + "default": "" }, "uid": { - "title": "Uid", + "title": "uid", "description": "Globally unique identifier for this Stream Resource", "type": "string" }, "uri": { - "title": "Uri", + "title": "uri", "description": "URI for locating this resource", "type": "string" } @@ -41,5 +42,5 @@ "uid", "uri" ], - "additionalProperties": false + "additionalProperties": true } diff --git a/src/event_model/tests/test_schema_generation.py b/src/event_model/tests/test_schema_generation.py index 45f6a7664..325805525 100644 --- a/src/event_model/tests/test_schema_generation.py +++ b/src/event_model/tests/test_schema_generation.py @@ -1,32 +1,44 @@ # type: ignore -# Test schema generation -import json -import os +from pathlib import Path import pytest +from pydantic.warnings import PydanticDeprecatedSince20 -import event_model -from event_model.documents import ALL_DOCUMENTS -from event_model.documents.generate.typeddict_to_schema import typeddict_to_schema +from event_model.generate.create_documents import JSONSCHEMA, TYPEDDICTS, generate -SCHEMA_PATH = event_model.__path__[0] + "/schemas/" +def test_generated_json_matches_typed_dict(tmpdir: Path): + tmp_documents = Path(tmpdir) / "documents" + tmp_documents.mkdir() + tmp_jsonschema = Path(tmpdir) / "jsonschema" + tmp_jsonschema.mkdir() -@pytest.mark.parametrize("typed_dict_class", ALL_DOCUMENTS) -def test_generated_json_matches_typed_dict(typed_dict_class, tmpdir): - typeddict_to_schema(typed_dict_class, schema_dir=tmpdir) - file_name = os.listdir(tmpdir)[0] - generated_file_path = os.path.join(tmpdir, file_name) - old_file_path = os.path.join(SCHEMA_PATH, file_name) + with pytest.warns(PydanticDeprecatedSince20): + generate(jsonschema_root=tmp_jsonschema, documents_root=tmp_documents) - with open(generated_file_path) as generated_file, open(old_file_path) as old_file: - try: - assert json.load(generated_file) == json.load(old_file) - except AssertionError as error: + for new_jsonschema_path in tmp_jsonschema.iterdir(): + old_jsonschema_path = JSONSCHEMA / new_jsonschema_path.name + + if ( + not old_jsonschema_path.exists() + or new_jsonschema_path.read_text() != old_jsonschema_path.read_text() + ): + raise Exception( + f"{str(old_jsonschema_path)} does not match " + f"{str(new_jsonschema_path)}. Did you forget to run " + "`python -m event_model.generate` after changes?" + ) + + for new_document_path in tmp_documents.iterdir(): + old_document_path = TYPEDDICTS / new_document_path.name + + if ( + not old_document_path.exists() + or new_document_path.read_text() != old_document_path.read_text() + ): raise Exception( - f"`{typed_dict_class.__name__}` can generate a json schema, but " - f"it doesn't match the schema in `{SCHEMA_PATH}`. Did you forget " - "to run `python event_model/documents/generate` after changes " - f"to `{typed_dict_class.__name__}`?" - ) from error + f"{str(old_document_path)} does not match " + f"{str(new_document_path)}. Did you forget to run " + "`python -m event_model.generate` after changes?" + )