diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index a6c7202c5b..90a317f79d 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -63,9 +63,119 @@ jobs: ls ls dist + # --------------------------------------------------------------------------- + # Pre-publish gate: confirm zarr-metadata's required floor is on PyPI. + # + # zarr-python and zarr-metadata co-develop in this monorepo. During local + # development zarr-metadata is resolved from packages/zarr-metadata/ via the + # uv workspace (see [tool.uv.sources] in pyproject.toml). The wheel we are + # about to publish, however, only carries a version-range requirement + # (e.g. `zarr-metadata>=0.1.1,<0.2`); end users will resolve that against + # PyPI. + # + # The failure mode this job catches: a zarr-python PR added code that + # depends on a zarr-metadata feature that has been merged into + # packages/zarr-metadata/ but not yet released to PyPI. CI passed because + # the workspace override resolved to the in-tree copy, but a user installing + # the resulting zarr-python wheel would get a published zarr-metadata that + # lacks the feature, and zarr-python would fail at import or first use. + # + # The mitigation here is a presence check on PyPI: extract the floor of + # zarr-python's zarr-metadata requirement from the wheel's METADATA file, + # and refuse to upload if that exact version is not yet on PyPI. This is + # analogous to what `cargo publish` does automatically against crates.io, + # but expressed as a CI step because twine has no built-in equivalent. + # + # When you bump zarr-metadata to a new version that zarr-python depends on, + # the required release order is: + # 1. release zarr-metadata to PyPI; + # 2. bump the floor in zarr-python's [project.dependencies]; + # 3. release zarr-python. + # This job will fail at step 3 if step 1 was skipped. + # --------------------------------------------------------------------------- + verify_pypi_dependency: + name: Verify zarr-metadata floor is on PyPI + needs: [build_artifacts] + runs-on: ubuntu-latest + # Run only on actual releases. Pull-request and push-to-main runs go + # through CI without this gate, since their wheels are never uploaded. + if: github.event_name == 'release' + steps: + - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: releases + path: dist + + - name: Check zarr-metadata floor is published on PyPI + run: | + # The wheel's METADATA file lives at zarr-*.dist-info/METADATA inside + # the wheel. `unzip -p` writes a file's contents to stdout without + # extracting; the glob matches whichever dist-info dir is inside. + metadata="$(unzip -p dist/zarr-*.whl '*.dist-info/METADATA')" + + # Pick the Requires-Dist line for zarr-metadata. The wheel may have + # several Requires-Dist lines for different extras; we want the one + # that applies unconditionally (no `; extra == "..."` marker). + # Match `Requires-Dist: zarr-metadata` followed by anything that + # ends a project name in PEP 508: a version operator (<, >, =, !, + # ~), whitespace, `[` (extras), `;` (markers), `(` (legacy + # parenthesized version), or end-of-line. The character class + # excludes letters/digits/underscore/hyphen, so a hypothetical + # `zarr-metadata-ext` dep would not match. + req_line="$(printf '%s' "$metadata" \ + | grep -E '^Requires-Dist: zarr-metadata([^A-Za-z0-9_-]|$)' \ + | grep -v 'extra ==' \ + || true)" + + if [ -z "$req_line" ]; then + echo "::error::Could not find an unconditional Requires-Dist line for zarr-metadata in the built wheel." + echo "Wheel METADATA Requires-Dist lines:" + printf '%s' "$metadata" | grep '^Requires-Dist:' || true + exit 1 + fi + echo "Requires-Dist line: $req_line" + + # Extract the floor: the version after `>=`. Version specifiers in + # PEP 440 are comma-separated (e.g. `>=0.1.1, <0.2`); the floor is + # the bound after the first `>=`. `grep -oE '>=[^,]+'` captures + # `>=0.1.1` (everything up to the comma), then we strip the + # operator and surrounding whitespace. + floor="$(printf '%s' "$req_line" \ + | grep -oE '>=[[:space:]]*[^,]+' \ + | sed 's/^>=[[:space:]]*//; s/[[:space:]]*$//' \ + | head -1)" + + if [ -z "$floor" ]; then + echo "::error::Could not extract a `>=` floor from: $req_line" + echo "zarr-python's zarr-metadata requirement must include a `>=` bound so this gate has something to check." + exit 1 + fi + echo "zarr-metadata floor: $floor" + + # PyPI's JSON API returns 200 if the named version exists and 404 + # if it doesn't. -s silences progress output; -o /dev/null discards + # the body; -w %%{http_code} prints just the status. Any non-200 + # response means the floor has not been published yet. + status="$(curl -s -o /dev/null -w '%{http_code}' \ + "https://pypi.org/pypi/zarr-metadata/${floor}/json")" + + if [ "$status" != "200" ]; then + echo "::error::zarr-metadata ${floor} is not available on PyPI (HTTP ${status})." + echo "" + echo "The wheel about to be uploaded declares it requires zarr-metadata ${floor} or later," + echo "but no such release exists on PyPI. Publish zarr-metadata ${floor} first, then" + echo "re-run this release workflow." + exit 1 + fi + echo "OK: zarr-metadata ${floor} is on PyPI; safe to upload zarr-python." + upload_pypi: name: Upload to PyPI - needs: [build_artifacts, test_dist_pypi] + # Depend on the new gate so the upload step does not run if the floor + # is missing from PyPI. The gate runs only on releases (see its `if:` + # condition); on PR / push runs it is skipped, and skipped jobs in a + # `needs:` list are treated as satisfied by GitHub Actions. + needs: [build_artifacts, test_dist_pypi, verify_pypi_dependency] runs-on: ubuntu-latest if: github.event_name == 'release' environment: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c9c0e61668..232bf877fd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,6 +42,7 @@ repos: - typing_extensions - universal-pathlib - obstore>=0.5.1 + - zarr-metadata>=0.1.1 # Tests - pytest - hypothesis @@ -63,6 +64,14 @@ repos: entry: "\\.(lstrip|rstrip)\\([\"'][^\"']{2,}[\"']\\)" types: [python] files: ^(src|tests)/ + - id: check-min-deps-floor + name: check min_deps zarr-metadata pin matches the project floor + language: system + entry: python ci/check_min_deps_floor.py + # Run whenever pyproject.toml changes; pass_filenames is False because + # the script reads the file directly rather than processing argv. + pass_filenames: false + files: ^pyproject\.toml$ - repo: https://github.com/zizmorcore/zizmor-pre-commit rev: v1.24.1 hooks: diff --git a/changes/3961.feature.md b/changes/3961.feature.md new file mode 100644 index 0000000000..3dc6879e7a --- /dev/null +++ b/changes/3961.feature.md @@ -0,0 +1,3 @@ +``zarr-python`` now depends on the [``zarr-metadata``](https://pypi.org/project/zarr-metadata/) package, which provides spec-defined TypedDicts and literal types for Zarr v2 and v3 metadata documents. Several internal types previously defined in ``zarr-python`` are now aliases that re-export their canonical definitions from ``zarr-metadata``: ``zarr.codecs.blosc.BloscShuffleLiteral``, ``zarr.codecs.blosc.BloscCnameLiteral``, ``zarr.codecs.blosc.BloscConfigV3``, ``zarr.codecs.blosc.BloscJSON_V3``, ``zarr.codecs.cast_value.RoundingMode``, ``zarr.codecs.cast_value.OutOfRangeMode``, ``zarr.core.metadata.v2.ArrayV2MetadataDict``, ``zarr.core.metadata.v3.AllowedExtraField``, and ``zarr.core.metadata.v3.ArrayMetadataJSON_V3``. + +The version requirement (``zarr-metadata>=0.1.1,<0.2``) caps the major version so a future breaking change in ``zarr-metadata`` cannot silently break installed ``zarr-python``. During local development, ``zarr-metadata`` is resolved from the in-tree copy under ``packages/zarr-metadata/`` via a uv workspace; see [the contributing guide](https://zarr.readthedocs.io/en/stable/contributing.html) for details. diff --git a/ci/check_min_deps_floor.py b/ci/check_min_deps_floor.py new file mode 100644 index 0000000000..461e1d0e47 --- /dev/null +++ b/ci/check_min_deps_floor.py @@ -0,0 +1,111 @@ +""" +Enforce the invariant: `min_deps` pins zarr-metadata to the floor of +zarr-python's declared zarr-metadata range. + +zarr-python declares `zarr-metadata>=X.Y.Z,<...>` in `[project.dependencies]`. +The `min_deps` hatch env tests against the *minimum* supported deps, so it +must pin zarr-metadata to exactly that floor (e.g. `zarr-metadata==X.Y.Z`). +Without this script the two declarations can drift silently — the project's +floor could rise without `min_deps` noticing, and `min_deps` would no longer +verify what its name claims. + +Run: + python ci/check_min_deps_floor.py + +Exits 0 if floors agree; non-zero with a clear message if not. +""" + +from __future__ import annotations + +import re +import sys +import tomllib +from pathlib import Path + +ROOT = Path(__file__).parent.parent.resolve() +PYPROJECT = ROOT / "pyproject.toml" + +# Match `>=X.Y.Z` (with or without surrounding whitespace) inside a PEP 440 +# version specifier set. Captures just the version number. +_FLOOR_RE = re.compile(r">=\s*([^,\s]+)") +# Match `==X.Y.Z` likewise. Captures the version number. +_PIN_RE = re.compile(r"==\s*([^,\s]+)") + + +def find_zarr_metadata_floor(deps: list[str]) -> str: + """Return the >= floor of zarr-metadata declared in `deps`. + + `deps` is a list of PEP 508 strings, e.g. as found in + `[project.dependencies]`. Raises if zarr-metadata is not present, or + if its specifier set has no `>=` bound. + """ + for dep in deps: + # Project name is everything up to the first non-name character. + # Quick split: package name terminates at the first occurrence of a + # version operator, whitespace, `[`, `;`, or `(`. + name = re.split(r"[<>=!~\s\[;(]", dep, maxsplit=1)[0].strip() + if name == "zarr-metadata": + match = _FLOOR_RE.search(dep) + if not match: + raise SystemExit( + f"zarr-metadata dependency has no `>=` floor: {dep!r}\n" + "Floor verification requires an explicit lower bound." + ) + return match.group(1) + raise SystemExit( + "zarr-metadata not found in [project.dependencies]. " + "This script assumes zarr-python depends on zarr-metadata." + ) + + +def find_zarr_metadata_pin(deps: list[str]) -> str: + """Return the `==` pin of zarr-metadata declared in `deps`. + + `deps` is a list of PEP 508 strings, e.g. as found in + `[tool.hatch.envs.min_deps.extra-dependencies]`. Raises if + zarr-metadata is not present, or if its specifier is not a `==` pin. + """ + for dep in deps: + name = re.split(r"[<>=!~\s\[;(]", dep, maxsplit=1)[0].strip() + if name == "zarr-metadata": + match = _PIN_RE.search(dep) + if not match: + raise SystemExit( + f"min_deps zarr-metadata entry is not an `==` pin: {dep!r}\n" + "The min_deps env must pin zarr-metadata exactly to the floor." + ) + return match.group(1) + raise SystemExit( + "zarr-metadata not found in [tool.hatch.envs.min_deps.extra-dependencies].\n" + "Add `'zarr-metadata=='` to keep min_deps testing the declared floor." + ) + + +def main() -> int: + data = tomllib.loads(PYPROJECT.read_text()) + + project_deps = data["project"]["dependencies"] + floor = find_zarr_metadata_floor(project_deps) + + min_deps_extra = data["tool"]["hatch"]["envs"]["min_deps"]["extra-dependencies"] + pin = find_zarr_metadata_pin(min_deps_extra) + + if floor != pin: + print( + f"floor / min_deps pin mismatch for zarr-metadata:\n" + f" [project.dependencies] floor: >={floor}\n" + f" [tool.hatch.envs.min_deps] pin: =={pin}\n" + f"\n" + f"These must agree. Either update the floor in " + f"[project.dependencies] or the pin in min_deps so both name " + f"the same zarr-metadata version.", + file=sys.stderr, + ) + return 1 + + print(f"OK: zarr-metadata floor {floor} matches min_deps pin {pin}.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/docs/contributing.md b/docs/contributing.md index b9c7aa1aa2..5e61d01a3f 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -93,6 +93,44 @@ To verify that your development environment is working, you can run the unit tes hatch env run --env test.py3.12-optional run ``` +#### The zarr-metadata package and the workspace + +zarr-python depends on [`zarr-metadata`](https://pypi.org/project/zarr-metadata/), a small package of TypedDicts and literals describing the JSON shape of Zarr v2 and v3 metadata documents. Both packages live in this repository: + +- zarr-python: the project root. +- zarr-metadata: [`packages/zarr-metadata/`](https://github.com/zarr-developers/zarr-python/tree/main/packages/zarr-metadata) — its own `pyproject.toml`, source tree, and tests. + +This is configured as a workspace in two places, because the project supports both [`uv`](https://docs.astral.sh/uv/) and [`hatch`](https://hatch.pypa.io/) as front-ends. + +**uv workspace declaration** (consumed by `uv sync`, `uv run`, and anything reading uv's project metadata): + +```toml +[tool.uv.workspace] +members = ["packages/zarr-metadata"] + +[tool.uv.sources] +zarr-metadata = { workspace = true } +``` + +**Hatch workspace declaration** (consumed by `hatch env run`, including the CI test matrix in `test.yml`): + +```toml +[tool.hatch.envs.test] +workspace.members = ["packages/zarr-metadata"] +``` + +Both mechanisms point at the same in-tree path. They have to be declared separately because uv and hatch don't share configuration. The `dev` env, the `test` matrix, the inherited `gputest` and `upstream` envs all use the in-tree source. The `min_deps` env explicitly opts out (`workspace.members = []`) so it tests against the minimum supported zarr-metadata from PyPI — the floor of the version range in `[project.dependencies]`. + +What this means in practice: + +- **During local development** (whether you invoke `uv run pytest` or `hatch env run --env test.py3.12-optional run`), zarr-python resolves `zarr-metadata` from the in-tree source under `packages/zarr-metadata/`. Changes you make there are immediately visible to zarr-python without reinstalling. +- **In the published wheel**, only the `[project.dependencies]` version requirement (`zarr-metadata>=0.1.1,<0.2`) is carried. The workspace declarations are development-only configuration. Users installing zarr-python from PyPI get the published zarr-metadata wheel. +- **In CI**, the primary test matrix (`test.yml`) runs `hatch env run` against the in-tree zarr-metadata. A change in `packages/zarr-metadata/` that breaks zarr-python surfaces immediately, before zarr-metadata is released to PyPI. The `min_deps` job additionally exercises the published floor on every PR, so a change in zarr-python that *requires* an unreleased zarr-metadata feature also gets caught. + +If you change zarr-metadata, also run zarr-python's test suite. The workspace setup makes this transparent — your usual `uv run pytest` or `hatch env run` picks up the in-tree source automatically. + +When releasing a new zarr-metadata version that contains a breaking change, also bump zarr-python's version cap on zarr-metadata (currently `<0.2`) in the same release cycle. See [Releasing zarr-python when zarr-metadata has changed](#releasing-zarr-python-when-zarr-metadata-has-changed) below for the full procedure. + ### Creating a branch Before you do any new work or submit a pull request, please open an issue on GitHub to report the bug or propose the feature you'd like to add. @@ -349,6 +387,17 @@ Releases are prepared using the ["Prepare release notes"](https://github.com/zar 4. The release PR is automatically labeled `run-downstream`, which triggers the [downstream test workflow](https://github.com/zarr-developers/zarr-python/actions/workflows/downstream.yml) to run Xarray and numcodecs integration tests against the release branch. 5. Review the rendered changelog in `docs/release-notes.md` and verify downstream tests pass before merging. +### Releasing zarr-python when zarr-metadata has changed + +zarr-python depends on the [`zarr-metadata`](https://pypi.org/project/zarr-metadata/) package, which is developed in the same monorepo (see [The zarr-metadata package and the uv workspace](#the-zarr-metadata-package-and-the-uv-workspace) above). When a zarr-python release depends on a zarr-metadata change that has not yet been published to PyPI, the release must follow this order: + +1. **Bump zarr-metadata's version** in `packages/zarr-metadata/pyproject.toml` and `packages/zarr-metadata/src/zarr_metadata/__init__.py` (the version literal). Use semver: bump the minor for breaking type changes, the patch for additive changes. +2. **Release zarr-metadata to PyPI.** Tag and publish from `packages/zarr-metadata/`. +3. **Bump zarr-python's floor** on zarr-metadata in `[project.dependencies]` (e.g. `zarr-metadata>=0.1.1,<0.2` → `zarr-metadata>=0.2.0,<0.3`). Update `[tool.uv.workspace]` and `[tool.uv.sources]` only if necessary. +4. **Release zarr-python.** + +If steps 1 and 2 are skipped (or step 3's bumped floor names a version that does not yet exist on PyPI), the `verify_pypi_dependency` job in [`releases.yml`](https://github.com/zarr-developers/zarr-python/blob/main/.github/workflows/releases.yml) will fail before the upload step runs. This gate exists because the wheel ships only a version-range requirement; pip resolves that against PyPI on the user's machine, and there is no built-in equivalent of `cargo publish`'s automatic check that the declared dependency is actually available in the registry. + ## Benchmarks Zarr uses [pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/latest/) for running diff --git a/docs/zarr-metadata-followups.md b/docs/zarr-metadata-followups.md new file mode 100644 index 0000000000..4aa2b564bc --- /dev/null +++ b/docs/zarr-metadata-followups.md @@ -0,0 +1,353 @@ +# zarr-metadata Follow-Up Recommendations + +This document catalogs type-surface gaps and missing runtime constants in zarr-metadata 0.1.1 that prevent deeper integration with zarr-python. Each section identifies a concrete gap, shows what zarr-python requires, and proposes what zarr-metadata should add. + +## Working principle + +When zarr-metadata and zarr-python disagree on a type or shape, neither side automatically wins. Each disagreement is a prompt to think: + +- Is zarr-python doing the right thing and zarr-metadata is wrong? +- Is zarr-metadata doing the right thing and zarr-python is wrong (carrying a quirk that should be fixed)? +- Are both wrong, and a third design fits better? + +Examples already known on this branch: zarr-python's `GroupMetadata.to_dict()` emits `node_type` even for v2 (a spec violation that pre-dates strict typing); zarr-python's `RectilinearDimSpecJSON` uses `list` while zarr-metadata uses `tuple` (different concerns — JSON-deserialized form vs canonical form — both valid in their domain). + +The recommendations below are *prompts for discussion*, not decrees. Each entry should be evaluated on its own terms before any change lands in zarr-metadata. + +## 1. Current Public API Inventory + +**zarr-metadata 0.1.1** exports: + +### Top-level (`zarr_metadata/__init__.py`) +- `NamedConfig` — externally-tagged union for metadata fields +- `ArrayMetadataV2`, `ArrayMetadataV3`, `GroupMetadataV2`, `GroupMetadataV3` +- `ConsolidatedMetadataV2`, `ConsolidatedMetadataV3` +- `ExtensionFieldV3`, `MetadataFieldV3` — validator types +- `ArrayDimensionSeparatorV2`, `ArrayOrderV2`, `DataTypeMetadataV2`, `CodecMetadataV2` + +### Codec v3 (`v3/codec/__init__.py`) +Nine codec-metadata aliases: `BloscCodecMetadata`, `BytesCodecMetadata`, `CastValueCodecMetadata`, `Crc32cCodecMetadata`, `GzipCodecMetadata`, `ScaleOffsetCodecMetadata`, `ShardingIndexedCodecMetadata`, `TransposeCodecMetadata`, `ZstdCodecMetadata` + +### Data types v3 (`v3/data_type/__init__.py`) +Twenty dtype names + fill value types (e.g., `Float32DataTypeName`, `Float32FillValue`); plus three envelope types: `NumpyDatetime64`, `NumpyTimedelta64`, `Struct` + +### Chunk key encodings v3 (`v3/chunk_key_encoding/`) +- `DefaultChunkKeyEncodingMetadata`, `DefaultChunkKeyEncodingConfiguration` +- `V2ChunkKeyEncodingMetadata` (via `v2.py`) + +--- + +## 2. Missing Runtime Constants (Final Tuples) + +**Gap:** zarr-metadata defines literal types (e.g., `BloscShuffle = Literal["noshuffle", "shuffle", "bitshuffle"]`) but doesn't export the corresponding runtime tuple constants zarr-python uses. + +**zarr-python has** (`src/zarr/core/dtype/common.py` lines 19–30): +```python +ENDIANNESS_STR: Final = "little", "big" +SPECIAL_FLOAT_STRINGS: Final = ("NaN", "Infinity", "-Infinity") +OBJECT_CODEC_IDS: Final = ("vlen-utf8", "vlen-bytes", "vlen-array", "pickle", "json2", "msgpack2") +``` + +**zarr-python has** (`src/zarr/codecs/blosc.py` line 42): +```python +SHUFFLE: Final = ("noshuffle", "shuffle", "bitshuffle") +``` + +**zarr-python has** (`src/zarr/core/common.py` line 48): +```python +ANY_ACCESS_MODE: Final = "r", "r+", "a", "w", "w-" +``` + +**Proposal:** For each `Literal[...]` type in zarr-metadata (especially those reused across modules), export a sibling `Final` tuple. Examples: +- `BloscShuffle = Literal[...]` → add `BLOSC_SHUFFLE: Final = ("noshuffle", "shuffle", "bitshuffle")` +- `Endian = Literal["little", "big"]` (`v3/codec/bytes.py` line 17) → add `ENDIAN: Final = ("little", "big")` +- `DefaultChunkKeyEncodingSeparator = Literal["/", "."]` → add `DEFAULT_CHUNK_KEY_ENCODING_SEPARATORS: Final = ("/", ".")` +- `ObjectCodecID = Literal[...]` (if added; see below) → add `OBJECT_CODEC_IDS: Final = (...)` + +**Status:** Additive; no breaking changes. + +--- + +## 3. Missing Type Aliases: dtype Name and Fill-Value Constants + +**Gap:** zarr-python defines string constants and special-value sentinels for each dtype; zarr-metadata lacks a cross-dtype registry or naming convention for them. + +**Examples zarr-python has:** +- `src/zarr/core/dtype/npy/float.py` (line 24): per-float dtype v2 names as class variables +- `src/zarr/core/dtype/common.py` (line 28): `ObjectCodecID = Literal["vlen-utf8", "vlen-bytes", ...]` +- `src/zarr/core/dtype/common.py` (lines 19–23): `EndiannessStr`, `SpecialFloatStrings`, `JSONFloatV2`, `JSONFloatV3` +- `src/zarr/core/dtype/npy/common.py` (lines 1–3): `DATETIME_UNIT`, `NUMPY_ENDIANNESS_STR` + +**zarr-metadata has:** +- Per-dtype modules (`v3/data_type/float32.py`, etc.) with `Float32DataTypeName = Literal["float32"]`, `Float32SpecialFillValue`, `Float32FillValue` +- No cross-dtype registry of names or fill-value patterns + +**Proposal:** +1. Add `ObjectCodecID = Literal["vlen-utf8", "vlen-bytes", "vlen-array", "pickle", "json2", "msgpack2"]` to `v2/data_type/` or `_common.py` +2. Add `OBJECT_CODEC_IDS: Final = (...)` as the runtime constant +3. Optionally add a `v3/data_type/object.py` for object-codec dtype shapes if v3 supports them +4. Create `v2/data_type/common.py` for v2-shared constants (v2 dtype names, endianness, special floats) + +**Status:** Additive. + +--- + +## 4. Missing Per-Codec V2 Configuration TypedDicts + +**Gap:** zarr-metadata has only `CodecMetadataV2 = {"id": str, ...}` (generic). zarr-python defines per-codec v2 configs for specific behaviors. + +**zarr-python has:** +- `src/zarr/codecs/blosc.py` (line 45): `BloscConfigV2 = TypedDict` with fields `cname`, `clevel`, `shuffle`, `blocksize`, `typesize` + +**zarr-metadata has:** +- `v2/codec.py` (line 7): only generic `CodecMetadataV2` + +**Proposal:** Add `CodecMetadataV2Blosc` (or `BloscCodecConfigurationV2`?) TypedDict to `v2/codec.py` or a new `v2/codec/blosc.py`, matching zarr-metadata's per-codec pattern for v3: +```python +class BloscCodecConfigurationV2(TypedDict): + cname: str # Blosc compressor ID + clevel: int + shuffle: int + blocksize: int + typesize: NotRequired[int] +``` + +Future codecs may need similar treatment. + +**Status:** Additive; may become breaking if zarr-python standardizes on these types for v2 codec dispatch. + +--- + +## 5. Chunk Key Encoding: Missing Types + +**Gap:** zarr-python's `ChunkKeyEncodingParams` TypedDict doesn't match zarr-metadata's per-encoding types. + +**zarr-python has** (`src/zarr/core/chunk_key_encodings.py` line 27): +```python +class ChunkKeyEncodingParams(TypedDict): + name: Literal["v2", "default"] + separator: NotRequired[SeparatorLiteral] +``` + +**zarr-metadata has:** +- `DefaultChunkKeyEncodingConfiguration`, `DefaultChunkKeyEncodingMetadata` (with optional short-hand form) +- `V2ChunkKeyEncodingMetadata` (with optional short-hand form) +- No `ChunkKeyEncodingParams` union covering both + +**Proposal:** Add to `v3/chunk_key_encoding/__init__.py`: +```python +ChunkKeyEncodingParams = DefaultChunkKeyEncodingObject | V2ChunkKeyEncodingObject +``` +Or add a generic TypedDict analogous to `NamedConfig` to capture the `{name, configuration}` pattern for all chunk key encodings. + +**Status:** Additive. + +--- + +## 6. ConsolidatedMetadataV2 has the wrong value type + +**Gap:** `ConsolidatedMetadataV2.metadata` ([packages/zarr-metadata/src/zarr_metadata/v2/consolidated.py:27](packages/zarr-metadata/src/zarr_metadata/v2/consolidated.py#L27)) types values as `GroupMetadataV2 | ArrayMetadataV2`. That's wrong on two counts: + +1. The map's keys are file paths including the file suffix — `"foo/.zarray"`, `"foo/.zgroup"`, `"foo/.zattrs"`, etc. (the docstring acknowledges this). `.zattrs` entries have no representation in the union. +2. `ArrayMetadataV2` is the *merged* in-memory shape that folds `.zattrs` into an `attributes` field. The on-disk `.zarray` file has no `attributes` key — that data lives in the sibling `.zattrs` file. So `.zarray` content does not satisfy `ArrayMetadataV2`. + +**What exists today (zarr-metadata):** +- `ArrayMetadataV2` ([packages/zarr-metadata/src/zarr_metadata/v2/array.py:42](packages/zarr-metadata/src/zarr_metadata/v2/array.py#L42)) — the merged in-memory form, includes `attributes`. +- `GroupMetadataV2` ([packages/zarr-metadata/src/zarr_metadata/v2/group.py:11](packages/zarr-metadata/src/zarr_metadata/v2/group.py#L11)) — the on-disk `.zgroup` form, no `attributes`. (See section 8 for the symmetric question on the group side.) +- No type for `.zarray` on-disk content. +- No type for `.zattrs` on-disk content. + +**Proposal:** Introduce on-disk file types separate from in-memory shapes: + +```python +# .zarray content — no attributes field (those live in sibling .zattrs) +class ArrayFileMetadataV2(TypedDict): + zarr_format: Literal[2] + shape: tuple[int, ...] + chunks: tuple[int, ...] + dtype: DataTypeMetadataV2 + compressor: CodecMetadataV2 | None + fill_value: object + order: ArrayOrderV2 + filters: tuple[CodecMetadataV2, ...] | None + dimension_separator: NotRequired[ArrayDimensionSeparatorV2] + +# .zgroup content — already matches existing GroupMetadataV2 +GroupFileMetadataV2 = GroupMetadataV2 # or rename current GroupMetadataV2 + +# .zattrs content — arbitrary user attributes +ZAttrsV2 = Mapping[str, object] +``` + +Then: + +```python +class ConsolidatedMetadataV2(TypedDict): + zarr_consolidated_format: int + metadata: Mapping[str, ArrayFileMetadataV2 | GroupFileMetadataV2 | ZAttrsV2] +``` + +The value-type union can't discriminate on the key suffix at the type level — consumers will narrow at runtime based on `key.endswith(".zarray")` etc. — but at least every legal value shape is representable. + +**Knock-on:** This change requires a deliberate decision on what to name "the in-memory merged shape" vs "the on-disk file shape" for v2 arrays and groups. Options: + +- Keep `ArrayMetadataV2` as the merged in-memory shape (most useful as the principal type in zarr-metadata's public API), introduce `ArrayFileMetadataV2` as a separate on-disk type used inside `ConsolidatedMetadataV2`. +- Or invert: `ArrayMetadataV2` becomes the on-disk shape (matching the v2 spec literally), and add `ArrayInMemoryV2` (or similar) for the merged shape. This is more spec-faithful but breaks existing consumers using the merged form. + +Same question applies to `GroupMetadataV2` — see section 8. + +**Status:** Breaking. Consumers using `ConsolidatedMetadataV2.metadata` values today are using a wrong type, but their code may have been working by accident (since the runtime values are dicts and TypedDicts don't enforce at runtime). A correct type catches misuse but rejects existing patterns that may have been silently incorrect. + +--- + +## 7. GroupMetadataV2: Attributes Inconsistency + +**Gap:** zarr-metadata's v2 group and array metadata treat attributes differently. + +**zarr-metadata:** +- `v2/array.py`: `ArrayMetadataV2` includes `attributes: Mapping[str, object]` field +- `v2/group.py` (line 11): `GroupMetadataV2` only has `zarr_format: Literal[2]`; attributes omitted + +**zarr-python** (`src/zarr/core/group.py` line 146): `ConsolidatedMetadata` stores `GroupMetadata` objects with an `attributes` field. + +**Issue:** GroupMetadataV2 doesn't match the in-memory shape. The v2 spec stores `.zgroup` (just `zarr_format`) separately from `.zattrs`, so the on-disk shape is correct. But zarr-python tests pass consolidated metadata dictionaries with attributes included. + +**Proposal:** Either: +1. Split `GroupMetadataV2` into on-disk (`GroupMetadataV2File`) and in-memory (`GroupMetadataV2Consolidated`) variants +2. Add an optional `attributes` field to `GroupMetadataV2` to match `ArrayMetadataV2` (less spec-correct but more practical) +3. Document the asymmetry clearly and suggest zarr-python use `GroupMetadataV2 | dict[str, object]` for consolidated shapes + +**Status:** Clarification or minor extension; possibly breaking if zarr-metadata maintainers reject relaxing the spec. + +--- + +## 8. Partial/Update TypedDicts + +**Gap:** zarr-python's `replace()` and update operations benefit from `total=False` variants of metadata TypedDicts. + +**Context:** zarr-python uses `dataclasses.replace()` on `ArrayV3Metadata`, `GroupMetadata`, etc., which requires partial update shapes. + +**Proposal:** Add `Partial` TypedDicts (with `total=False`) to zarr-metadata for commonly updated types: +```python +# In v3/array.py +class ArrayMetadataV3Partial(TypedDict, total=False): + """Partial/update shape of ArrayMetadataV3 for use in replace operations.""" + zarr_format: Literal[3] + node_type: Literal["array"] + attributes: Mapping[str, object] + data_type: MetadataFieldV3 + fill_value: object + codecs: tuple[MetadataFieldV3, ...] + chunk_grid: MetadataFieldV3 + chunk_key_encoding: MetadataFieldV3 +``` + +This would clarify which fields zarr-python is allowed to modify in-place. + +**Status:** Additive; helpful but not blocking. + +--- + +## 9. PEP 728 extra_items Support (mypy Blocker) + +**Gap:** mypy v1.20's `extra_items=` support is incomplete. Annotating fields with `ExtensionFieldV3` produces false positives. + +**Example:** `src/zarr/core/metadata/v3.py` defines arrays/groups with extension fields; annotating extra keys as `ExtensionFieldV3` fails mypy validation. + +**Technical detail:** PEP 728 is not fully honored by mypy v1.20; the type checker doesn't enforce `extra_items=` constraints on TypedDict assignments. + +**Proposal:** Document this as an upstream blocker. When mypy gains full support, zarr-python can tighten its type annotations for extension-field validation. + +**Status:** Upstream issue; zarr-metadata cannot fix this unilaterally. + +--- + +## 10. Dtype JSON Envelopes and Special Floats + +**Gap:** zarr-python's dtype modules define JSON-specific types that zarr-metadata has not factored out. + +**zarr-python has:** +- `src/zarr/core/dtype/npy/float.py`: per-float-dtype JSON shape (includes special-value handling) +- `src/zarr/core/dtype/common.py` (line 25–26): `JSONFloatV2`, `JSONFloatV3` (unions of numeric, special strings, hex) + +**zarr-metadata has:** +- Per-dtype `FillValue` types (e.g., `Float32FillValue`), which capture some of this +- `SpecialFloatStrings = Literal[...]` in some modules (implicitly via float-specific types) +- No centralized `JSONFloatV2`, `JSONFloatV3` aliases + +**Proposal:** Add to `v3/data_type/common.py` (new file): +```python +SpecialFloatStrings = Literal["NaN", "Infinity", "-Infinity"] +SPECIAL_FLOAT_STRINGS: Final = ("NaN", "Infinity", "-Infinity") + +JSONFloatV2 = float | SpecialFloatStrings +JSONFloatV3 = float | SpecialFloatStrings | str # includes hex forms +``` + +Export these alongside per-dtype fill values. + +**Status:** Additive. + +--- + +## 11. ConsolidatedMetadataV3 Ambiguity with ArrayMetadataV3.to_dict() + +**Gap:** zarr-python's `ConsolidatedMetadata.to_dict()` and `GroupMetadata.to_dict()` return shapes that don't cleanly map to zarr-metadata TypedDicts. + +**zarr-python:** +- `src/zarr/core/group.py` line 150: `ConsolidatedMetadata.to_dict()` returns `{kind, must_understand, metadata: {k: v.to_dict() for ...}}` +- `src/zarr/core/group.py` line 435: `GroupMetadata.to_dict()` returns `dict[str, Any]` (uses `asdict`), which includes `node_type="group"` on v2 groups (spec violation) + +**zarr-metadata:** +- `v3/consolidated.py`: `ConsolidatedMetadataV3` has `metadata: Mapping[str, ArrayMetadataV3 | GroupMetadataV3]` +- Cannot annotate `to_dict()` return as `ConsolidatedMetadataV3` because the `.to_dict()` cascade through dict-comp breaks TypedDict narrowing + +**Proposal:** Document the gap and consider adding a runtime validator/assertion to catch v2 entries in v3 consolidated metadata. zarr-python maintainers should ensure `to_dict()` output is spec-compliant (no v2 entries in v3 consolidated metadata, no `node_type` on v2 group serialization). + +**Status:** Requires coordination between zarr-python and zarr-metadata maintainers; may need spec clarification. + +--- + +## 12. v2 Chunk Key Encoding and Format Detection + +**Gap:** zarr-metadata lacks explicit types for zarr v2's chunk key encoding, and zarr-python uses implicit logic. + +**Context:** Zarr v2 uses a fixed `.` or `/` separator; zarr-metadata models this under v3 chunk key encodings but doesn't explicitly name the v2 variant. + +**zarr-metadata has:** +- `v3/chunk_key_encoding/v2.py`: `V2ChunkKeyEncodingMetadata` (describes v2-style encoding used in v3) + +**Proposal:** Ensure v2-specific chunk key encoding is documented and exported clearly; consider adding `v2_chunk_key_encoding` or similar to `__init__.py` if not already there. + +**Status:** Clarification/documentation. + +--- + +## Summary of Recommendations + +| Category | Priority | Effort | Breaking? | Notes | +|----------|----------|--------|-----------|-------| +| Runtime Final tuples (SHUFFLE, ENDIANNESS_STR, etc.) | High | Low | No | Unlocks direct use of zarr-metadata constants in zarr-python | +| ObjectCodecID and v2 dtype constants | Medium | Low | No | Completes dtype metadata coverage | +| Per-codec v2 configs (BloscConfigV2, etc.) | Medium | Medium | No | Matches v3 pattern; future-proofs v2 codec dispatch | +| GroupMetadataV2 attributes field | Low | Low | Maybe | Clarifies in-memory vs on-disk distinction | +| ChunkKeyEncodingParams union | Low | Low | No | Simplifies chunk key encoding union type | +| Partial TypedDicts (total=False) | Low | Low | No | Documents update-safe fields | + +--- + +## Implementation Priority + +1. **Phase 1 (High-value, Low-risk):** + - Add runtime Final tuple constants (SHUFFLE, ENDIANNESS_STR, SPECIAL_FLOAT_STRINGS, etc.) + - Export ObjectCodecID + OBJECT_CODEC_IDS + - Add JSONFloatV2 and JSONFloatV3 aliases + +2. **Phase 2 (Medium-risk, Medium-gain):** + - Add per-codec v2 configuration TypedDicts + - Resolve GroupMetadataV2 attributes asymmetry (document or extend) + +3. **Phase 3 (Coordination-dependent):** + - Work with mypy team on PEP 728 extra_items support + - Clarify consolidation metadata spec alignment with zarr-python + - Add Partial TypedDict variants (documentation benefit) diff --git a/pyproject.toml b/pyproject.toml index cd7d32c286..f2d3512648 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dependencies = [ 'google-crc32c>=1.5', 'typing_extensions>=4.13', 'donfig>=0.8', + 'zarr-metadata>=0.1.1,<0.2', ] dynamic = [ @@ -147,6 +148,15 @@ omit = [ "src/zarr/testing/conftest.py", # only for downstream projects ] +# When developing zarr-python locally, resolve zarr-metadata from the in-tree +# package under packages/zarr-metadata/. The `[project.dependencies]` version +# requirement is what propagates to consumers installing from PyPI. +[tool.uv.workspace] +members = ["packages/zarr-metadata"] + +[tool.uv.sources] +zarr-metadata = { workspace = true } + [tool.hatch] version.source = "vcs" @@ -155,9 +165,18 @@ hooks.vcs.version-file = "src/zarr/_version.py" [tool.hatch.envs.dev] dependency-groups = ["dev"] +# Resolve zarr-metadata from the in-tree workspace member, not PyPI. See +# `[tool.uv.sources]` above for the equivalent for `uv run` invocations. +workspace.members = ["packages/zarr-metadata"] [tool.hatch.envs.test] dependency-groups = ["test"] +# Resolve zarr-metadata from the in-tree workspace member, not PyPI, so CI +# in `test.yml` exercises the integration between the two packages on every +# PR. Envs that inherit via `template = "test"` (gputest, upstream) pick +# this up automatically; min_deps overrides it (see below) to test against +# the published floor. +workspace.members = ["packages/zarr-metadata"] [tool.hatch.envs.test.env-vars] @@ -240,6 +259,11 @@ template = "test" python = "3.12" features = ["remote"] dependency-groups = ["remote-tests"] +# Override the inherited workspace.members so this env tests against the +# minimum supported zarr-metadata from PyPI (the floor of the version range +# declared in [project.dependencies]) instead of the in-tree source. This +# keeps the "minimum supported deps" guarantee honest. +workspace.members = [] extra-dependencies = [ 'packaging==22.*', 'numpy==2.0.*', @@ -250,6 +274,10 @@ extra-dependencies = [ 'typing_extensions==4.13.*', 'donfig==0.8.*', 'obstore==0.5.*', + # Pin to the floor of zarr-python's declared zarr-metadata range. Must + # match the >= bound in [project.dependencies] above; the + # `check_min_deps_floor.py` pre-commit hook enforces this invariant. + 'zarr-metadata==0.1.1', ] [tool.hatch.envs.defaults] diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py index 8a20282060..0ab01e8c78 100644 --- a/src/zarr/codecs/blosc.py +++ b/src/zarr/codecs/blosc.py @@ -10,10 +10,22 @@ import numcodecs from numcodecs.blosc import Blosc from packaging.version import Version +from zarr_metadata.v3.codec.blosc import ( + BloscCName as _BloscCName, +) +from zarr_metadata.v3.codec.blosc import ( + BloscCodecConfiguration as _BloscCodecConfiguration, +) +from zarr_metadata.v3.codec.blosc import ( + BloscCodecObject as _BloscCodecObject, +) +from zarr_metadata.v3.codec.blosc import ( + BloscShuffle as _BloscShuffle, +) from zarr.abc.codec import BytesBytesCodec from zarr.core.buffer.cpu import as_numpy_array_wrapper -from zarr.core.common import JSON, NamedRequiredConfig, parse_named_configuration +from zarr.core.common import JSON, parse_named_configuration from zarr.core.dtype.common import HasItemSize if TYPE_CHECKING: @@ -22,19 +34,23 @@ from zarr.core.array_spec import ArraySpec from zarr.core.buffer import Buffer -BloscShuffleLiteral = Literal["noshuffle", "shuffle", "bitshuffle"] +# Re-export under zarr-python's historical names; canonical definitions +# live in `zarr_metadata.v3.codec.blosc`. +BloscShuffleLiteral = _BloscShuffle """The shuffle values permitted for the blosc codec""" BLOSC_SHUFFLE: Final = ("noshuffle", "shuffle", "bitshuffle") -BloscCnameLiteral = Literal["lz4", "lz4hc", "blosclz", "snappy", "zlib", "zstd"] +BloscCnameLiteral = _BloscCName """The codec identifiers used in the blosc codec""" BLOSC_CNAME: Final = ("lz4", "lz4hc", "blosclz", "snappy", "zlib", "zstd") class BloscConfigV2(TypedDict): - """Configuration for the V2 Blosc codec""" + """Configuration for the V2 Blosc codec. + + v2 codec shapes predate zarr-metadata, which models only v3 codecs.""" cname: BloscCnameLiteral clevel: int @@ -43,20 +59,8 @@ class BloscConfigV2(TypedDict): typesize: NotRequired[int] -class BloscConfigV3(TypedDict): - """Configuration for the V3 Blosc codec""" - - cname: BloscCnameLiteral - clevel: int - shuffle: BloscShuffleLiteral - blocksize: int - typesize: int - - -class BloscJSON_V3(NamedRequiredConfig[Literal["blosc"], BloscConfigV3]): - """ - The JSON form of the Blosc codec in Zarr V3. - """ +BloscConfigV3 = _BloscCodecConfiguration +BloscJSON_V3 = _BloscCodecObject class _DeprecatedStrEnumMeta(type): diff --git a/src/zarr/codecs/cast_value.py b/src/zarr/codecs/cast_value.py index adf4886104..2de5097746 100644 --- a/src/zarr/codecs/cast_value.py +++ b/src/zarr/codecs/cast_value.py @@ -12,7 +12,7 @@ from collections.abc import Mapping from dataclasses import dataclass, replace -from typing import TYPE_CHECKING, Final, Literal, TypedDict, cast +from typing import TYPE_CHECKING, Final, TypedDict, cast import numpy as np @@ -23,6 +23,8 @@ if TYPE_CHECKING: from typing import NotRequired, Self + from zarr_metadata.v3.codec.cast_value import OutOfRangeMode, RoundingMode + from zarr.core.array_spec import ArraySpec from zarr.core.buffer import NDBuffer from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType @@ -33,17 +35,6 @@ class ScalarMapJSON(TypedDict): decode: NotRequired[list[tuple[object, object]]] -RoundingMode = Literal[ - "nearest-even", - "towards-zero", - "towards-positive", - "towards-negative", - "nearest-away", -] - -OutOfRangeMode = Literal["clamp", "wrap"] - - class ScalarMap(TypedDict, total=False): """ The normalized, in-memory form of a scalar map. diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index 8626d480a7..ff8af0d9f2 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -3,7 +3,7 @@ import warnings from collections.abc import Iterable, Sequence from functools import cached_property -from typing import TYPE_CHECKING, Any, TypedDict, cast +from typing import TYPE_CHECKING, Any, cast from zarr.abc.metadata import Metadata from zarr.abc.numcodec import Numcodec, _is_numcodec @@ -29,8 +29,11 @@ from dataclasses import dataclass, field, fields, replace import numpy as np +from zarr_metadata.v2.array import ArrayMetadataV2 as _ArrayMetadataV2 from zarr.core.array_spec import ArrayConfig, ArraySpec + +# Re-export the v2 array metadata JSON shape under zarr-python's historical name. from zarr.core.chunk_key_encodings import parse_separator from zarr.core.common import ( JSON, @@ -42,18 +45,9 @@ from zarr.core.config import config, parse_indexing_order from zarr.core.metadata.common import parse_attributes - -class ArrayV2MetadataDict(TypedDict): - """ - A typed dictionary model for Zarr format 2 metadata. - """ - - zarr_format: Literal[2] - attributes: dict[str, JSON] - - # Union of acceptable types for v2 compressors type CompressorLikev2 = dict[str, JSON] | Numcodec | None +ArrayV2MetadataDict = _ArrayMetadataV2 @dataclass(frozen=True, kw_only=True) diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index c794ee2e87..deb378c4a8 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -3,9 +3,10 @@ import json from collections.abc import Iterable, Mapping, Sequence from dataclasses import dataclass, field, replace -from typing import TYPE_CHECKING, Any, Final, Literal, NotRequired, TypeGuard, cast +from typing import TYPE_CHECKING, Any, Final, Literal, TypeGuard, cast from typing_extensions import TypedDict +from zarr_metadata.v3.array import ArrayMetadataV3, ExtensionFieldV3 from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec from zarr.abc.metadata import Metadata @@ -139,14 +140,12 @@ def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]: ) -class AllowedExtraField(TypedDict, extra_items=JSON): # type: ignore[call-arg] - """ - This class models allowed extra fields in array metadata. - They must have ``must_understand`` set to ``False``, and may contain - arbitrary additional JSON data. - """ +AllowedExtraField = ExtensionFieldV3 +"""Alias for `zarr_metadata.v3.array.ExtensionFieldV3`. - must_understand: Literal[False] +`must_understand` is typed as `bool` to match the spec (extension authors that +*understand* a field may produce `True`); the runtime guard +`check_allowed_extra_field` enforces that zarr-python only accepts `False`.""" def check_allowed_extra_field(data: object) -> TypeGuard[AllowedExtraField]: @@ -421,25 +420,12 @@ def parse_chunk_grid( raise ValueError(f"Unknown chunk grid name: {name!r}") -class ArrayMetadataJSON_V3(TypedDict, extra_items=AllowedExtraField): # type: ignore[call-arg] - """ - A typed dictionary model for zarr v3 array metadata. - - Extra keys are permitted if they conform to ``AllowedExtraField`` - (i.e. they are mappings with ``must_understand: false``). - """ +ArrayMetadataJSON_V3 = ArrayMetadataV3 +"""Alias for `zarr_metadata.v3.array.ArrayMetadataV3`. - zarr_format: Literal[3] - node_type: Literal["array"] - data_type: str | NamedConfig[str, Mapping[str, JSON]] - shape: tuple[int, ...] - chunk_grid: str | NamedConfig[str, Mapping[str, JSON]] - chunk_key_encoding: str | NamedConfig[str, Mapping[str, JSON]] - fill_value: JSON - codecs: tuple[str | NamedConfig[str, Mapping[str, JSON]], ...] - attributes: NotRequired[Mapping[str, JSON]] - storage_transformers: NotRequired[tuple[str | NamedConfig[str, Mapping[str, JSON]], ...]] - dimension_names: NotRequired[tuple[str | None, ...]] +The TypedDict from the metadata package is the canonical model of the v3 +array metadata document; this alias preserves the historical zarr-python +name. Extra keys are permitted if they conform to `ExtensionFieldV3`.""" """ @@ -671,6 +657,12 @@ def from_dict(cls, data: dict[str, JSON]) -> Self: ) def to_dict(self) -> dict[str, JSON]: + """Serialize as a JSON-shaped dict matching `ArrayMetadataV3`. + + Return type is `dict[str, JSON]` rather than `ArrayMetadataV3` so the + result composes with other zarr-python metadata serialisation paths + that traffic in `dict[str, JSON]` (notably consolidated metadata). + """ out_dict = super().to_dict() extra_fields = out_dict.pop("extra_fields") out_dict = out_dict | extra_fields # type: ignore[operator]