From 74deacca5122667b030d5aeaaaff78ee8492e72e Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Fri, 19 Jun 2026 19:20:57 +0200 Subject: [PATCH 1/4] Add downloadable cells dataset via scverse-misc Expose spatialdata.datasets.cells() alongside blobs/raccoon. It downloads the cells example dataset and loads it as a SpatialData object, reusing the scverse-misc datasets infrastructure (parse_registry + fetch with the built-in spatialdata loader) rather than reimplementing a downloader. - ship src/spatialdata/datasets.yaml registry (base_url + cells.zip sha256) - add scverse-misc[datasets]>=0.0.10 dependency - bump requires-python and ruff target to 3.12 (scverse-misc requires >=3.12) - update CI matrix 3.11 -> 3.12 - docs + network-free registry test and a slow download test Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/api/datasets.md | 1 + pyproject.toml | 1 + src/spatialdata/datasets.py | 33 ++++++++++++++++++++++++++++++++- src/spatialdata/datasets.yaml | 15 +++++++++++++++ tests/datasets/test_datasets.py | 30 +++++++++++++++++++++++++++++- 5 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 src/spatialdata/datasets.yaml diff --git a/docs/api/datasets.md b/docs/api/datasets.md index 7bf6d5a61..d0c43b56c 100644 --- a/docs/api/datasets.md +++ b/docs/api/datasets.md @@ -7,5 +7,6 @@ Convenience small datasets .. autofunction:: blobs .. autofunction:: blobs_annotating_element +.. autofunction:: cells .. autofunction:: raccoon ``` diff --git a/pyproject.toml b/pyproject.toml index 03181eadb..34592a8e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ dependencies = [ "spatial_image>=1.2.3", "scikit-image", "scipy!=1.17.0", + "scverse-misc[datasets]>=0.0.10", "typing_extensions>=4.8.0", "universal_pathlib>=0.2.6", "xarray>=2024.10.0", diff --git a/src/spatialdata/datasets.py b/src/spatialdata/datasets.py index 37f529c72..be27bf988 100644 --- a/src/spatialdata/datasets.py +++ b/src/spatialdata/datasets.py @@ -31,7 +31,7 @@ ) from spatialdata.transformations import Identity -__all__ = ["blobs", "raccoon"] +__all__ = ["blobs", "cells", "raccoon"] def blobs( @@ -79,6 +79,37 @@ def raccoon() -> SpatialData: return RaccoonDataset().raccoon() +def cells(path: str | None = None) -> SpatialData: + """ + Cells dataset. + + Download the ``cells`` example dataset and load it as a :class:`~spatialdata.SpatialData` + object. The download is hash-verified and cached, so repeated calls reuse the local copy + instead of downloading again. + + Parameters + ---------- + path + Directory in which to cache the downloaded data. If `None`, the default OS cache + location is used (:func:`pooch.os_cache` for ``"spatialdata"``). + + Returns + ------- + SpatialData object with the cells dataset. + """ + import importlib.resources + from pathlib import Path + + import pooch + from scverse_misc.datasets import fetch, parse_registry + + cache_dir = Path(path) if path is not None else Path(pooch.os_cache("spatialdata")) + registry = importlib.resources.files("spatialdata").joinpath("datasets.yaml") + with importlib.resources.as_file(registry) as registry_path: + base_url, datasets = parse_registry(registry_path) + return fetch(datasets["cells"], cache_dir, base_url=base_url) + + class RaccoonDataset: """Raccoon dataset.""" diff --git a/src/spatialdata/datasets.yaml b/src/spatialdata/datasets.yaml new file mode 100644 index 000000000..1811f090c --- /dev/null +++ b/src/spatialdata/datasets.yaml @@ -0,0 +1,15 @@ +# Registry of downloadable example datasets for ``spatialdata.datasets``. +# +# Parsed by ``scverse_misc.datasets.parse_registry`` and fetched (downloaded, +# hash-verified, cached and loaded) via ``scverse_misc.datasets.fetch``. +# +# type: spatialdata -> a .zip that extracts to a single .zarr store +base_url: https://exampledata.scverse.org/spatialdata/ +datasets: + cells: + type: spatialdata + doc_header: Cells dataset as a SpatialData object. + files: + - name: cells.zip + s3_key: cells.zip + sha256: dc9613cb9e16fd2cd8d83f3a9586eeda4af5ba8ba366f1066efb51305820c5fb diff --git a/tests/datasets/test_datasets.py b/tests/datasets/test_datasets.py index 2237e253c..10337915f 100644 --- a/tests/datasets/test_datasets.py +++ b/tests/datasets/test_datasets.py @@ -1,6 +1,11 @@ from __future__ import annotations -from spatialdata.datasets import blobs, raccoon +import importlib.resources + +import pytest + +from spatialdata import SpatialData +from spatialdata.datasets import blobs, cells, raccoon def test_datasets() -> None: @@ -26,3 +31,26 @@ def test_datasets() -> None: assert sdata_raccoon.images["raccoon"].shape == (3, 768, 1024) assert sdata_raccoon.labels["segmentation"].shape == (768, 1024) _ = str(sdata_raccoon) + + +def test_cells_registry() -> None: + # Network-free: the shipped registry parses and exposes the cells dataset. + from scverse_misc.datasets import parse_registry + + registry = importlib.resources.files("spatialdata").joinpath("datasets.yaml") + with importlib.resources.as_file(registry) as registry_path: + base_url, datasets = parse_registry(registry_path) + + assert base_url == "https://exampledata.scverse.org/spatialdata/" + entry = datasets["cells"] + assert entry.type == "spatialdata" + file = entry.file(name="cells.zip") + assert file.sha256 == "dc9613cb9e16fd2cd8d83f3a9586eeda4af5ba8ba366f1066efb51305820c5fb" + assert file.resolve_url(base_url) == "https://exampledata.scverse.org/spatialdata/cells.zip" + + +@pytest.mark.slow +def test_cells_download(tmp_path) -> None: + # Downloads ~3 MB from the scverse example data bucket; opt out with `-m "not slow"`. + sdata = cells(path=str(tmp_path)) + assert isinstance(sdata, SpatialData) From c453909ef793bc8c750d20acd8a4138b30669e1d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 19 Jun 2026 17:22:38 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spatialdata/datasets.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/spatialdata/datasets.yaml b/src/spatialdata/datasets.yaml index 1811f090c..9f5d235a1 100644 --- a/src/spatialdata/datasets.yaml +++ b/src/spatialdata/datasets.yaml @@ -6,10 +6,10 @@ # type: spatialdata -> a .zip that extracts to a single .zarr store base_url: https://exampledata.scverse.org/spatialdata/ datasets: - cells: - type: spatialdata - doc_header: Cells dataset as a SpatialData object. - files: - - name: cells.zip - s3_key: cells.zip - sha256: dc9613cb9e16fd2cd8d83f3a9586eeda4af5ba8ba366f1066efb51305820c5fb + cells: + type: spatialdata + doc_header: Cells dataset as a SpatialData object. + files: + - name: cells.zip + s3_key: cells.zip + sha256: dc9613cb9e16fd2cd8d83f3a9586eeda4af5ba8ba366f1066efb51305820c5fb From 49d0309a6bb71847621ca179d6056df40efe6fb8 Mon Sep 17 00:00:00 2001 From: anon Date: Mon, 22 Jun 2026 12:17:35 +0200 Subject: [PATCH 3/4] Fix mypy no-any-return in cells() Co-Authored-By: Claude Opus 4.8 (1M context) --- src/spatialdata/datasets.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/spatialdata/datasets.py b/src/spatialdata/datasets.py index be27bf988..8b0233f68 100644 --- a/src/spatialdata/datasets.py +++ b/src/spatialdata/datasets.py @@ -107,7 +107,8 @@ def cells(path: str | None = None) -> SpatialData: registry = importlib.resources.files("spatialdata").joinpath("datasets.yaml") with importlib.resources.as_file(registry) as registry_path: base_url, datasets = parse_registry(registry_path) - return fetch(datasets["cells"], cache_dir, base_url=base_url) + sdata: SpatialData = fetch(datasets["cells"], cache_dir, base_url=base_url) + return sdata class RaccoonDataset: From a48e66de58d5222d4f03b1777f33744bb21b6090 Mon Sep 17 00:00:00 2001 From: anon Date: Mon, 22 Jun 2026 12:20:49 +0200 Subject: [PATCH 4/4] Bump scverse-misc pin to >=0.1.0 (first released datasets version) Co-Authored-By: Claude Opus 4.8 (1M context) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 34592a8e5..3fe577a50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ dependencies = [ "spatial_image>=1.2.3", "scikit-image", "scipy!=1.17.0", - "scverse-misc[datasets]>=0.0.10", + "scverse-misc[datasets]>=0.1.0", "typing_extensions>=4.8.0", "universal_pathlib>=0.2.6", "xarray>=2024.10.0",