Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ repos:
- typing_extensions
- universal-pathlib
- obstore>=0.5.1
- zarr-metadata>=0.1.1
# Tests
- pytest
- hypothesis
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ dependencies = [
'google-crc32c>=1.5',
'typing_extensions>=4.13',
'donfig>=0.8',
'zarr-metadata>=0.1.1',
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So development of these packages will proceed entirely independently and one is not developed against another? Or is there a secrete setting somewhere that enables this to be linked?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you explain what you mean? zarr-metadata is on pypi, which is how we expect people who install zarr-python to get that package. zarr-metadata will be developed with the goal of primarily supporting zarr-python, and when new versions of zarr-metadata are released, we would update zarr-python to depend on them.

]

dynamic = [
Expand Down
40 changes: 22 additions & 18 deletions src/zarr/codecs/blosc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,22 @@
import numcodecs
from numcodecs.blosc import Blosc
from packaging.version import Version
from zarr_metadata.v3.codec.blosc import (
BloscCName as _BloscCName,
)
from zarr_metadata.v3.codec.blosc import (
BloscCodecConfiguration as _BloscCodecConfiguration,
)
from zarr_metadata.v3.codec.blosc import (
BloscCodecObject as _BloscCodecObject,
)
from zarr_metadata.v3.codec.blosc import (
BloscShuffle as _BloscShuffle,
)

from zarr.abc.codec import BytesBytesCodec
from zarr.core.buffer.cpu import as_numpy_array_wrapper
from zarr.core.common import JSON, NamedRequiredConfig, parse_named_configuration
from zarr.core.common import JSON, parse_named_configuration
from zarr.core.dtype.common import HasItemSize

if TYPE_CHECKING:
Expand All @@ -22,19 +34,23 @@
from zarr.core.array_spec import ArraySpec
from zarr.core.buffer import Buffer

BloscShuffleLiteral = Literal["noshuffle", "shuffle", "bitshuffle"]
# Re-export under zarr-python's historical names; canonical definitions
# live in `zarr_metadata.v3.codec.blosc`.
BloscShuffleLiteral = _BloscShuffle
"""The shuffle values permitted for the blosc codec"""

BLOSC_SHUFFLE: Final = ("noshuffle", "shuffle", "bitshuffle")

BloscCnameLiteral = Literal["lz4", "lz4hc", "blosclz", "snappy", "zlib", "zstd"]
BloscCnameLiteral = _BloscCName
"""The codec identifiers used in the blosc codec"""

BLOSC_CNAME: Final = ("lz4", "lz4hc", "blosclz", "snappy", "zlib", "zstd")


class BloscConfigV2(TypedDict):
"""Configuration for the V2 Blosc codec"""
"""Configuration for the V2 Blosc codec.

v2 codec shapes predate zarr-metadata, which models only v3 codecs."""

cname: BloscCnameLiteral
clevel: int
Expand All @@ -43,20 +59,8 @@ class BloscConfigV2(TypedDict):
typesize: NotRequired[int]


class BloscConfigV3(TypedDict):
"""Configuration for the V3 Blosc codec"""

cname: BloscCnameLiteral
clevel: int
shuffle: BloscShuffleLiteral
blocksize: int
typesize: int


class BloscJSON_V3(NamedRequiredConfig[Literal["blosc"], BloscConfigV3]):
"""
The JSON form of the Blosc codec in Zarr V3.
"""
BloscConfigV3 = _BloscCodecConfiguration
BloscJSON_V3 = _BloscCodecObject


class _DeprecatedStrEnumMeta(type):
Expand Down
15 changes: 3 additions & 12 deletions src/zarr/codecs/cast_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from collections.abc import Mapping
from dataclasses import dataclass, replace
from typing import TYPE_CHECKING, Final, Literal, TypedDict, cast
from typing import TYPE_CHECKING, Final, TypedDict, cast

import numpy as np

Expand All @@ -23,6 +23,8 @@
if TYPE_CHECKING:
from typing import NotRequired, Self

from zarr_metadata.v3.codec.cast_value import OutOfRangeMode, RoundingMode

from zarr.core.array_spec import ArraySpec
from zarr.core.buffer import NDBuffer
from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
Expand All @@ -33,17 +35,6 @@ class ScalarMapJSON(TypedDict):
decode: NotRequired[list[tuple[object, object]]]


RoundingMode = Literal[
"nearest-even",
"towards-zero",
"towards-positive",
"towards-negative",
"nearest-away",
]

OutOfRangeMode = Literal["clamp", "wrap"]


class ScalarMap(TypedDict, total=False):
"""
The normalized, in-memory form of a scalar map.
Expand Down
16 changes: 5 additions & 11 deletions src/zarr/core/metadata/v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import warnings
from collections.abc import Iterable, Sequence
from functools import cached_property
from typing import TYPE_CHECKING, Any, TypedDict, cast
from typing import TYPE_CHECKING, Any, cast

from zarr.abc.metadata import Metadata
from zarr.abc.numcodec import Numcodec, _is_numcodec
Expand All @@ -29,8 +29,11 @@
from dataclasses import dataclass, field, fields, replace

import numpy as np
from zarr_metadata.v2.array import ArrayMetadataV2 as _ArrayMetadataV2

from zarr.core.array_spec import ArrayConfig, ArraySpec

# Re-export the v2 array metadata JSON shape under zarr-python's historical name.
from zarr.core.chunk_key_encodings import parse_separator
from zarr.core.common import (
JSON,
Expand All @@ -42,18 +45,9 @@
from zarr.core.config import config, parse_indexing_order
from zarr.core.metadata.common import parse_attributes


class ArrayV2MetadataDict(TypedDict):
"""
A typed dictionary model for Zarr format 2 metadata.
"""

zarr_format: Literal[2]
attributes: dict[str, JSON]


# Union of acceptable types for v2 compressors
type CompressorLikev2 = dict[str, JSON] | Numcodec | None
ArrayV2MetadataDict = _ArrayMetadataV2


@dataclass(frozen=True, kw_only=True)
Expand Down
44 changes: 18 additions & 26 deletions src/zarr/core/metadata/v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
import json
from collections.abc import Iterable, Mapping, Sequence
from dataclasses import dataclass, field, replace
from typing import TYPE_CHECKING, Any, Final, Literal, NotRequired, TypeGuard, cast
from typing import TYPE_CHECKING, Any, Final, Literal, TypeGuard, cast

from typing_extensions import TypedDict
from zarr_metadata.v3.array import ArrayMetadataV3, ExtensionFieldV3

from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec
from zarr.abc.metadata import Metadata
Expand Down Expand Up @@ -139,14 +140,12 @@ def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]:
)


class AllowedExtraField(TypedDict, extra_items=JSON): # type: ignore[call-arg]
"""
This class models allowed extra fields in array metadata.
They must have ``must_understand`` set to ``False``, and may contain
arbitrary additional JSON data.
"""
AllowedExtraField = ExtensionFieldV3
"""Alias for `zarr_metadata.v3.array.ExtensionFieldV3`.

must_understand: Literal[False]
`must_understand` is typed as `bool` to match the spec (extension authors that
*understand* a field may produce `True`); the runtime guard
`check_allowed_extra_field` enforces that zarr-python only accepts `False`."""


def check_allowed_extra_field(data: object) -> TypeGuard[AllowedExtraField]:
Expand Down Expand Up @@ -421,25 +420,12 @@ def parse_chunk_grid(
raise ValueError(f"Unknown chunk grid name: {name!r}")


class ArrayMetadataJSON_V3(TypedDict, extra_items=AllowedExtraField): # type: ignore[call-arg]
"""
A typed dictionary model for zarr v3 array metadata.

Extra keys are permitted if they conform to ``AllowedExtraField``
(i.e. they are mappings with ``must_understand: false``).
"""
ArrayMetadataJSON_V3 = ArrayMetadataV3
"""Alias for `zarr_metadata.v3.array.ArrayMetadataV3`.

zarr_format: Literal[3]
node_type: Literal["array"]
data_type: str | NamedConfig[str, Mapping[str, JSON]]
shape: tuple[int, ...]
chunk_grid: str | NamedConfig[str, Mapping[str, JSON]]
chunk_key_encoding: str | NamedConfig[str, Mapping[str, JSON]]
fill_value: JSON
codecs: tuple[str | NamedConfig[str, Mapping[str, JSON]], ...]
attributes: NotRequired[Mapping[str, JSON]]
storage_transformers: NotRequired[tuple[str | NamedConfig[str, Mapping[str, JSON]], ...]]
dimension_names: NotRequired[tuple[str | None, ...]]
The TypedDict from the metadata package is the canonical model of the v3
array metadata document; this alias preserves the historical zarr-python
name. Extra keys are permitted if they conform to `ExtensionFieldV3`."""


"""
Expand Down Expand Up @@ -671,6 +657,12 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
)

def to_dict(self) -> dict[str, JSON]:
"""Serialize as a JSON-shaped dict matching `ArrayMetadataV3`.

Return type is `dict[str, JSON]` rather than `ArrayMetadataV3` so the
result composes with other zarr-python metadata serialisation paths
that traffic in `dict[str, JSON]` (notably consolidated metadata).
"""
out_dict = super().to_dict()
extra_fields = out_dict.pop("extra_fields")
out_dict = out_dict | extra_fields # type: ignore[operator]
Expand Down
Loading
Loading