Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion packages/zarr-metadata/src/zarr_metadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
ArrayMetadataV2,
ArrayOrderV2,
DataTypeMetadataV2,
ZArrayMetadata,
)
from zarr_metadata.v2.attributes import ZAttrsMetadata
from zarr_metadata.v2.codec import CodecMetadataV2
from zarr_metadata.v2.consolidated import ConsolidatedMetadataV2
from zarr_metadata.v2.group import GroupMetadataV2
from zarr_metadata.v2.group import GroupMetadataV2, ZGroupMetadata
from zarr_metadata.v3._common import MetadataFieldV3
from zarr_metadata.v3.array import ArrayMetadataV3, ExtensionFieldV3
from zarr_metadata.v3.consolidated import ConsolidatedMetadataV3
Expand All @@ -32,5 +34,8 @@
"GroupMetadataV3",
"MetadataFieldV3",
"NamedConfig",
"ZArrayMetadata",
"ZAttrsMetadata",
"ZGroupMetadata",
"__version__",
]
7 changes: 6 additions & 1 deletion packages/zarr-metadata/src/zarr_metadata/v2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
ArrayMetadataV2,
ArrayOrderV2,
DataTypeMetadataV2,
ZArrayMetadata,
)
from zarr_metadata.v2.attributes import ZAttrsMetadata
from zarr_metadata.v2.codec import CodecMetadataV2
from zarr_metadata.v2.consolidated import ConsolidatedMetadataV2
from zarr_metadata.v2.group import GroupMetadataV2
from zarr_metadata.v2.group import GroupMetadataV2, ZGroupMetadata

__all__ = [
"ArrayDimensionSeparatorV2",
Expand All @@ -18,4 +20,7 @@
"ConsolidatedMetadataV2",
"DataTypeMetadataV2",
"GroupMetadataV2",
"ZArrayMetadata",
"ZAttrsMetadata",
"ZGroupMetadata",
]
30 changes: 27 additions & 3 deletions packages/zarr-metadata/src/zarr_metadata/v2/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,39 @@
"""


class ZArrayMetadata(TypedDict):
"""
On-disk `.zarray` file content.

Strict shape of the JSON document persisted at `<path>/.zarray` for
a v2 array. User attributes live in a sibling `.zattrs` file and are
NOT part of this type; see `ZAttrsMetadata`.

See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html
"""

zarr_format: Literal[2]
shape: tuple[int, ...]
chunks: tuple[int, ...]
dtype: DataTypeMetadataV2
compressor: CodecMetadataV2 | None
fill_value: object
order: ArrayOrderV2
filters: tuple[CodecMetadataV2, ...] | None
dimension_separator: NotRequired[ArrayDimensionSeparatorV2]


class ArrayMetadataV2(TypedDict):
"""
Zarr v2 array metadata document.
Zarr v2 array metadata document, in-memory merged form.

Models the union of `.zarray` (the spec-defined fields) and `.zattrs`
(user attributes). On disk, attributes live in a sibling `.zattrs` file
and are not part of `.zarray`; this type folds them in as the
`attributes` field so a single TypedDict represents the complete
in-memory state of a v2 array node. Consumers that read or write a
real `.zarray` file should split / merge `attributes` accordingly.
real `.zarray` file should split / merge `attributes` accordingly,
or use `ZArrayMetadata` (strict on-disk) plus `ZAttrsMetadata` directly.

See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html
"""
Expand All @@ -62,7 +85,7 @@ class ArrayMetadataV2(TypedDict):
order: ArrayOrderV2
filters: tuple[CodecMetadataV2, ...] | None
dimension_separator: NotRequired[ArrayDimensionSeparatorV2]
attributes: Mapping[str, object]
attributes: NotRequired[Mapping[str, object]]
Comment thread
ilan-gold marked this conversation as resolved.
"""User attributes from the sibling `.zattrs` file (not part of `.zarray`).

See the class docstring for the rationale behind the merged representation.
Expand All @@ -74,4 +97,5 @@ class ArrayMetadataV2(TypedDict):
"ArrayMetadataV2",
"ArrayOrderV2",
"DataTypeMetadataV2",
"ZArrayMetadata",
]
20 changes: 20 additions & 0 deletions packages/zarr-metadata/src/zarr_metadata/v2/attributes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""Zarr v2 user-attributes file content.

See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html
"""

from collections.abc import Mapping

ZAttrsMetadata = Mapping[str, object]
"""On-disk `.zattrs` file content.

A JSON object holding user-defined attributes for a v2 array or group.
Spec-defined keys for arrays / groups live in sibling `.zarray` / `.zgroup`
files (modeled by `ZArrayMetadata` / `ZGroupMetadata`). This type does not
constrain the keys or values of the attributes mapping.
"""


__all__ = [
"ZAttrsMetadata",
]
18 changes: 14 additions & 4 deletions packages/zarr-metadata/src/zarr_metadata/v2/consolidated.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@

from typing_extensions import TypedDict

from zarr_metadata.v2.array import ArrayMetadataV2
from zarr_metadata.v2.group import GroupMetadataV2
from zarr_metadata.v2.array import ZArrayMetadata
from zarr_metadata.v2.attributes import ZAttrsMetadata
from zarr_metadata.v2.group import ZGroupMetadata


class ConsolidatedMetadataV2(TypedDict):
Expand All @@ -20,11 +21,20 @@ class ConsolidatedMetadataV2(TypedDict):

The `metadata` map uses flat path keys (`"foo/bar/.zarray"`,
`"foo/.zattrs"`, etc.) pointing to the JSON contents of the file at
that path. The keys include the filename suffix, not just the node path.
that path. The keys include the filename suffix, not just the node
path; the value's shape is determined by which file the key points at:

- `<path>/.zarray` -> `ZArrayMetadata`
- `<path>/.zgroup` -> `ZGroupMetadata`
- `<path>/.zattrs` -> `ZAttrsMetadata`

The TypedDict cannot discriminate the value shape on the key suffix
at the type level; consumers should narrow at runtime by inspecting
`key.endswith(".zarray")` etc.
"""

zarr_consolidated_format: int
metadata: Mapping[str, GroupMetadataV2 | ArrayMetadataV2]
metadata: Mapping[str, ZArrayMetadata | ZGroupMetadata | ZAttrsMetadata]


__all__ = [
Expand Down
30 changes: 26 additions & 4 deletions packages/zarr-metadata/src/zarr_metadata/v2/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,46 @@
See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html
"""

from typing import Literal
from collections.abc import Mapping
from typing import Literal, NotRequired

from typing_extensions import TypedDict


class ZGroupMetadata(TypedDict):
"""
On-disk `.zgroup` file content.

Strict shape of the JSON document persisted at `<path>/.zgroup` for
a v2 group. The spec defines exactly one field. User attributes live
in a sibling `.zattrs` file and are NOT part of this type; see
`ZAttrsMetadata`.

See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html
"""

zarr_format: Literal[2]


class GroupMetadataV2(TypedDict):
"""
Zarr v2 group metadata document (the `.zgroup` content).
Zarr v2 group metadata document, in-memory merged form.

Attributes live in a sibling `.zattrs` file, so they are not part
of this dict.
Models the union of `.zgroup` (the spec-defined `zarr_format` field)
and `.zattrs` (user attributes). On disk these are persisted as two
separate files; this type folds them so a single TypedDict represents
the complete in-memory state of a v2 group node. Consumers that read
or write the real on-disk files should use `ZGroupMetadata` (strict
`.zgroup`) plus `ZAttrsMetadata` directly.

See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html
"""

zarr_format: Literal[2]
attributes: NotRequired[Mapping[str, object]]
Comment thread
ilan-gold marked this conversation as resolved.


__all__ = [
"GroupMetadataV2",
"ZGroupMetadata",
]
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,5 @@
"filters": [
{"id": "delta", "dtype": "<f8"}
],
"dimension_separator": ".",
"attributes": {"name": "demo"}
"dimension_separator": "."
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,5 @@
"compressor": {"id": "gzip", "level": 1},
"fill_value": 0,
"order": "C",
"filters": [],
"attributes": {}
"filters": []
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,5 @@
"compressor": null,
"fill_value": 0,
"order": "C",
"filters": null,
"attributes": {}
"filters": null
}
3 changes: 1 addition & 2 deletions packages/zarr-metadata/tests/v2/array/structured_dtype.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,5 @@
"fill_value": 0,
"order": "F",
"filters": null,
"dimension_separator": "/",
"attributes": {}
"dimension_separator": "/"
}
9 changes: 6 additions & 3 deletions packages/zarr-metadata/tests/v2/array/test_fixtures.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
"""Decode v2 array metadata fixtures via pydantic.

Each `*.json` file in this directory is a representative on-disk
`.zarray` that should validate cleanly as `ArrayMetadataV2`.
`.zarray` that should validate cleanly as `ZArrayMetadata` (the strict
on-disk shape). User attributes live in sibling `.zattrs` files and are
not part of these fixtures.

Fixtures cover simple/structured dtypes, presence/absence of compressor
and filters, and both row/column orderings.
"""
Expand All @@ -14,11 +17,11 @@
import pytest
from pydantic import TypeAdapter

from zarr_metadata.v2.array import ArrayMetadataV2
from zarr_metadata.v2.array import ZArrayMetadata

FIXTURES_DIR = Path(__file__).parent
FIXTURES = sorted(FIXTURES_DIR.glob("*.json"))
ADAPTER = TypeAdapter(ArrayMetadataV2)
ADAPTER = TypeAdapter(ZArrayMetadata)


@pytest.mark.parametrize("fixture", FIXTURES, ids=lambda p: p.stem)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"zarr_consolidated_format": 1,
"metadata": {
".zgroup": {"zarr_format": 2},
".zattrs": {"description": "root group attrs"},
"data/.zarray": {
"zarr_format": 2,
"shape": [100],
Expand All @@ -10,8 +11,8 @@
"compressor": null,
"fill_value": 0,
"order": "C",
"filters": null,
"attributes": {}
}
"filters": null
},
"data/.zattrs": {"units": "meters"}
Comment thread
ilan-gold marked this conversation as resolved.
}
}
12 changes: 9 additions & 3 deletions packages/zarr-metadata/tests/v2/group/test_fixtures.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
"""Decode v2 group metadata fixtures via pydantic."""
"""Decode v2 group metadata fixtures via pydantic.

Each `*.json` file in this directory is a representative on-disk
`.zgroup` that should validate cleanly as `ZGroupMetadata` (the strict
on-disk shape). User attributes live in sibling `.zattrs` files and are
not part of these fixtures.
"""

from __future__ import annotations

Expand All @@ -8,11 +14,11 @@
import pytest
from pydantic import TypeAdapter

from zarr_metadata.v2.group import GroupMetadataV2
from zarr_metadata.v2.group import ZGroupMetadata

FIXTURES_DIR = Path(__file__).parent
FIXTURES = sorted(FIXTURES_DIR.glob("*.json"))
ADAPTER = TypeAdapter(GroupMetadataV2)
ADAPTER = TypeAdapter(ZGroupMetadata)


@pytest.mark.parametrize("fixture", FIXTURES, ids=lambda p: p.stem)
Expand Down
Loading
Loading