diff --git a/packages/zarr-metadata/src/zarr_metadata/__init__.py b/packages/zarr-metadata/src/zarr_metadata/__init__.py index 953bb2e897..3b48e855f5 100644 --- a/packages/zarr-metadata/src/zarr_metadata/__init__.py +++ b/packages/zarr-metadata/src/zarr_metadata/__init__.py @@ -4,10 +4,12 @@ ArrayMetadataV2, ArrayOrderV2, DataTypeMetadataV2, + ZArrayMetadata, ) +from zarr_metadata.v2.attributes import ZAttrsMetadata from zarr_metadata.v2.codec import CodecMetadataV2 from zarr_metadata.v2.consolidated import ConsolidatedMetadataV2 -from zarr_metadata.v2.group import GroupMetadataV2 +from zarr_metadata.v2.group import GroupMetadataV2, ZGroupMetadata from zarr_metadata.v3._common import MetadataFieldV3 from zarr_metadata.v3.array import ArrayMetadataV3, ExtensionFieldV3 from zarr_metadata.v3.consolidated import ConsolidatedMetadataV3 @@ -32,5 +34,8 @@ "GroupMetadataV3", "MetadataFieldV3", "NamedConfig", + "ZArrayMetadata", + "ZAttrsMetadata", + "ZGroupMetadata", "__version__", ] diff --git a/packages/zarr-metadata/src/zarr_metadata/v2/__init__.py b/packages/zarr-metadata/src/zarr_metadata/v2/__init__.py index 06892df48e..4e9a76125b 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v2/__init__.py +++ b/packages/zarr-metadata/src/zarr_metadata/v2/__init__.py @@ -5,10 +5,12 @@ ArrayMetadataV2, ArrayOrderV2, DataTypeMetadataV2, + ZArrayMetadata, ) +from zarr_metadata.v2.attributes import ZAttrsMetadata from zarr_metadata.v2.codec import CodecMetadataV2 from zarr_metadata.v2.consolidated import ConsolidatedMetadataV2 -from zarr_metadata.v2.group import GroupMetadataV2 +from zarr_metadata.v2.group import GroupMetadataV2, ZGroupMetadata __all__ = [ "ArrayDimensionSeparatorV2", @@ -18,4 +20,7 @@ "ConsolidatedMetadataV2", "DataTypeMetadataV2", "GroupMetadataV2", + "ZArrayMetadata", + "ZAttrsMetadata", + "ZGroupMetadata", ] diff --git a/packages/zarr-metadata/src/zarr_metadata/v2/array.py b/packages/zarr-metadata/src/zarr_metadata/v2/array.py index 4f9f946a12..9043fd1893 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v2/array.py +++ b/packages/zarr-metadata/src/zarr_metadata/v2/array.py @@ -39,16 +39,39 @@ """ +class ZArrayMetadata(TypedDict): + """ + On-disk `.zarray` file content. + + Strict shape of the JSON document persisted at `/.zarray` for + a v2 array. User attributes live in a sibling `.zattrs` file and are + NOT part of this type; see `ZAttrsMetadata`. + + See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html + """ + + zarr_format: Literal[2] + shape: tuple[int, ...] + chunks: tuple[int, ...] + dtype: DataTypeMetadataV2 + compressor: CodecMetadataV2 | None + fill_value: object + order: ArrayOrderV2 + filters: tuple[CodecMetadataV2, ...] | None + dimension_separator: NotRequired[ArrayDimensionSeparatorV2] + + class ArrayMetadataV2(TypedDict): """ - Zarr v2 array metadata document. + Zarr v2 array metadata document, in-memory merged form. Models the union of `.zarray` (the spec-defined fields) and `.zattrs` (user attributes). On disk, attributes live in a sibling `.zattrs` file and are not part of `.zarray`; this type folds them in as the `attributes` field so a single TypedDict represents the complete in-memory state of a v2 array node. Consumers that read or write a - real `.zarray` file should split / merge `attributes` accordingly. + real `.zarray` file should split / merge `attributes` accordingly, + or use `ZArrayMetadata` (strict on-disk) plus `ZAttrsMetadata` directly. See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html """ @@ -62,7 +85,7 @@ class ArrayMetadataV2(TypedDict): order: ArrayOrderV2 filters: tuple[CodecMetadataV2, ...] | None dimension_separator: NotRequired[ArrayDimensionSeparatorV2] - attributes: Mapping[str, object] + attributes: NotRequired[Mapping[str, object]] """User attributes from the sibling `.zattrs` file (not part of `.zarray`). See the class docstring for the rationale behind the merged representation. @@ -74,4 +97,5 @@ class ArrayMetadataV2(TypedDict): "ArrayMetadataV2", "ArrayOrderV2", "DataTypeMetadataV2", + "ZArrayMetadata", ] diff --git a/packages/zarr-metadata/src/zarr_metadata/v2/attributes.py b/packages/zarr-metadata/src/zarr_metadata/v2/attributes.py new file mode 100644 index 0000000000..f260537b80 --- /dev/null +++ b/packages/zarr-metadata/src/zarr_metadata/v2/attributes.py @@ -0,0 +1,20 @@ +"""Zarr v2 user-attributes file content. + +See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html +""" + +from collections.abc import Mapping + +ZAttrsMetadata = Mapping[str, object] +"""On-disk `.zattrs` file content. + +A JSON object holding user-defined attributes for a v2 array or group. +Spec-defined keys for arrays / groups live in sibling `.zarray` / `.zgroup` +files (modeled by `ZArrayMetadata` / `ZGroupMetadata`). This type does not +constrain the keys or values of the attributes mapping. +""" + + +__all__ = [ + "ZAttrsMetadata", +] diff --git a/packages/zarr-metadata/src/zarr_metadata/v2/consolidated.py b/packages/zarr-metadata/src/zarr_metadata/v2/consolidated.py index 6fb75740bb..61a5527085 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v2/consolidated.py +++ b/packages/zarr-metadata/src/zarr_metadata/v2/consolidated.py @@ -10,8 +10,9 @@ from typing_extensions import TypedDict -from zarr_metadata.v2.array import ArrayMetadataV2 -from zarr_metadata.v2.group import GroupMetadataV2 +from zarr_metadata.v2.array import ZArrayMetadata +from zarr_metadata.v2.attributes import ZAttrsMetadata +from zarr_metadata.v2.group import ZGroupMetadata class ConsolidatedMetadataV2(TypedDict): @@ -20,11 +21,20 @@ class ConsolidatedMetadataV2(TypedDict): The `metadata` map uses flat path keys (`"foo/bar/.zarray"`, `"foo/.zattrs"`, etc.) pointing to the JSON contents of the file at - that path. The keys include the filename suffix, not just the node path. + that path. The keys include the filename suffix, not just the node + path; the value's shape is determined by which file the key points at: + + - `/.zarray` -> `ZArrayMetadata` + - `/.zgroup` -> `ZGroupMetadata` + - `/.zattrs` -> `ZAttrsMetadata` + + The TypedDict cannot discriminate the value shape on the key suffix + at the type level; consumers should narrow at runtime by inspecting + `key.endswith(".zarray")` etc. """ zarr_consolidated_format: int - metadata: Mapping[str, GroupMetadataV2 | ArrayMetadataV2] + metadata: Mapping[str, ZArrayMetadata | ZGroupMetadata | ZAttrsMetadata] __all__ = [ diff --git a/packages/zarr-metadata/src/zarr_metadata/v2/group.py b/packages/zarr-metadata/src/zarr_metadata/v2/group.py index 2110b9c9fd..5ef9b963d2 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v2/group.py +++ b/packages/zarr-metadata/src/zarr_metadata/v2/group.py @@ -3,24 +3,46 @@ See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html """ -from typing import Literal +from collections.abc import Mapping +from typing import Literal, NotRequired from typing_extensions import TypedDict +class ZGroupMetadata(TypedDict): + """ + On-disk `.zgroup` file content. + + Strict shape of the JSON document persisted at `/.zgroup` for + a v2 group. The spec defines exactly one field. User attributes live + in a sibling `.zattrs` file and are NOT part of this type; see + `ZAttrsMetadata`. + + See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html + """ + + zarr_format: Literal[2] + + class GroupMetadataV2(TypedDict): """ - Zarr v2 group metadata document (the `.zgroup` content). + Zarr v2 group metadata document, in-memory merged form. - Attributes live in a sibling `.zattrs` file, so they are not part - of this dict. + Models the union of `.zgroup` (the spec-defined `zarr_format` field) + and `.zattrs` (user attributes). On disk these are persisted as two + separate files; this type folds them so a single TypedDict represents + the complete in-memory state of a v2 group node. Consumers that read + or write the real on-disk files should use `ZGroupMetadata` (strict + `.zgroup`) plus `ZAttrsMetadata` directly. See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html """ zarr_format: Literal[2] + attributes: NotRequired[Mapping[str, object]] __all__ = [ "GroupMetadataV2", + "ZGroupMetadata", ] diff --git a/packages/zarr-metadata/tests/v2/array/blosc_compressor_with_filters.json b/packages/zarr-metadata/tests/v2/array/blosc_compressor_with_filters.json index f78add1a0b..d7c01563e0 100644 --- a/packages/zarr-metadata/tests/v2/array/blosc_compressor_with_filters.json +++ b/packages/zarr-metadata/tests/v2/array/blosc_compressor_with_filters.json @@ -15,6 +15,5 @@ "filters": [ {"id": "delta", "dtype": "