From 8212984185f7f4c2d63d4628c83f476d15c74cf8 Mon Sep 17 00:00:00 2001 From: Max Hargreaves Date: Tue, 29 Jul 2025 13:08:32 -0700 Subject: [PATCH 1/4] Change: fixed the read_zarr function for zarr.Group objects --- src/spatialdata/_io/io_zarr.py | 44 +++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/src/spatialdata/_io/io_zarr.py b/src/spatialdata/_io/io_zarr.py index 224ef1129..1497089b7 100644 --- a/src/spatialdata/_io/io_zarr.py +++ b/src/spatialdata/_io/io_zarr.py @@ -11,7 +11,11 @@ from zarr.errors import ArrayNotFoundError, MetadataError from spatialdata._core.spatialdata import SpatialData -from spatialdata._io._utils import BadFileHandleMethod, handle_read_errors, ome_zarr_logger +from spatialdata._io._utils import ( + BadFileHandleMethod, + handle_read_errors, + ome_zarr_logger, +) from spatialdata._io.io_points import _read_points from spatialdata._io.io_raster import _read_multiscale from spatialdata._io.io_shapes import _read_shapes @@ -36,14 +40,20 @@ def _open_zarr_store(store: str | Path | zarr.Group) -> tuple[zarr.Group, str]: # workaround: .zmetadata is being written as zmetadata (https://github.com/zarr-developers/zarr-python/issues/1121) if isinstance(store, str | Path) and str(store).startswith("http") and len(f) == 0: f = zarr.open_consolidated(store, mode="r", metadata_key="zmetadata") - f_store_path = f.store.store.path if isinstance(f.store, zarr.storage.ConsolidatedMetadataStore) else f.store.path + f_store_path = ( + f.store.store.path + if isinstance(f.store, zarr.storage.ConsolidatedMetadataStore) + else f.store.path + ) return f, f_store_path def read_zarr( store: str | Path | zarr.Group, selection: None | tuple[str] = None, - on_bad_files: Literal[BadFileHandleMethod.ERROR, BadFileHandleMethod.WARN] = BadFileHandleMethod.ERROR, + on_bad_files: Literal[ + BadFileHandleMethod.ERROR, BadFileHandleMethod.WARN + ] = BadFileHandleMethod.ERROR, ) -> SpatialData: """ Read a SpatialData dataset from a zarr store (on-disk or remote). @@ -80,7 +90,11 @@ def read_zarr( shapes = {} # TODO: remove table once deprecated. - selector = {"images", "labels", "points", "shapes", "tables", "table"} if not selection else set(selection or []) + selector = ( + {"images", "labels", "points", "shapes", "tables", "table"} + if not selection + else set(selection or []) + ) logger.debug(f"Reading selection {selector}") # read multiscale images @@ -133,9 +147,17 @@ def read_zarr( with handle_read_errors( on_bad_files, location=f"{group.path}/{subgroup_name}", - exc_types=(JSONDecodeError, KeyError, ValueError, ArrayNotFoundError, TypeError), + exc_types=( + JSONDecodeError, + KeyError, + ValueError, + ArrayNotFoundError, + TypeError, + ), ): - labels[subgroup_name] = _read_multiscale(f_elem_store, raster_type="labels") + labels[subgroup_name] = _read_multiscale( + f_elem_store, raster_type="labels" + ) count += 1 logger.debug(f"Found {count} elements in {group}") @@ -197,7 +219,9 @@ def read_zarr( exc_types=(JSONDecodeError, MetadataError), ): group = f["tables"] - tables = _read_table(f_store_path, f, group, tables, on_bad_files=on_bad_files) + tables = _read_table( + f_store_path, f, group, tables, on_bad_files=on_bad_files + ) if "table" in selector and "table" in f: warnings.warn( @@ -213,7 +237,9 @@ def read_zarr( exc_types=(JSONDecodeError, MetadataError), ): group = f[subgroup_name] - tables = _read_table(f_store_path, f, group, tables, on_bad_files=on_bad_files) + tables = _read_table( + f_store_path, f, group, tables, on_bad_files=on_bad_files + ) logger.debug(f"Found {count} elements in {group}") @@ -234,5 +260,5 @@ def read_zarr( tables=tables, attrs=attrs, ) - sdata.path = Path(store) + sdata.path = Path(store.path) if isinstance(store, zarr.Group) else Path(store) return sdata From 2274632051c23bbf8502d26d58c9089d920e1217 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 29 Jul 2025 20:11:36 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spatialdata/_io/io_zarr.py | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/src/spatialdata/_io/io_zarr.py b/src/spatialdata/_io/io_zarr.py index 1497089b7..9c62125b5 100644 --- a/src/spatialdata/_io/io_zarr.py +++ b/src/spatialdata/_io/io_zarr.py @@ -40,20 +40,14 @@ def _open_zarr_store(store: str | Path | zarr.Group) -> tuple[zarr.Group, str]: # workaround: .zmetadata is being written as zmetadata (https://github.com/zarr-developers/zarr-python/issues/1121) if isinstance(store, str | Path) and str(store).startswith("http") and len(f) == 0: f = zarr.open_consolidated(store, mode="r", metadata_key="zmetadata") - f_store_path = ( - f.store.store.path - if isinstance(f.store, zarr.storage.ConsolidatedMetadataStore) - else f.store.path - ) + f_store_path = f.store.store.path if isinstance(f.store, zarr.storage.ConsolidatedMetadataStore) else f.store.path return f, f_store_path def read_zarr( store: str | Path | zarr.Group, selection: None | tuple[str] = None, - on_bad_files: Literal[ - BadFileHandleMethod.ERROR, BadFileHandleMethod.WARN - ] = BadFileHandleMethod.ERROR, + on_bad_files: Literal[BadFileHandleMethod.ERROR, BadFileHandleMethod.WARN] = BadFileHandleMethod.ERROR, ) -> SpatialData: """ Read a SpatialData dataset from a zarr store (on-disk or remote). @@ -90,11 +84,7 @@ def read_zarr( shapes = {} # TODO: remove table once deprecated. - selector = ( - {"images", "labels", "points", "shapes", "tables", "table"} - if not selection - else set(selection or []) - ) + selector = {"images", "labels", "points", "shapes", "tables", "table"} if not selection else set(selection or []) logger.debug(f"Reading selection {selector}") # read multiscale images @@ -155,9 +145,7 @@ def read_zarr( TypeError, ), ): - labels[subgroup_name] = _read_multiscale( - f_elem_store, raster_type="labels" - ) + labels[subgroup_name] = _read_multiscale(f_elem_store, raster_type="labels") count += 1 logger.debug(f"Found {count} elements in {group}") @@ -219,9 +207,7 @@ def read_zarr( exc_types=(JSONDecodeError, MetadataError), ): group = f["tables"] - tables = _read_table( - f_store_path, f, group, tables, on_bad_files=on_bad_files - ) + tables = _read_table(f_store_path, f, group, tables, on_bad_files=on_bad_files) if "table" in selector and "table" in f: warnings.warn( @@ -237,9 +223,7 @@ def read_zarr( exc_types=(JSONDecodeError, MetadataError), ): group = f[subgroup_name] - tables = _read_table( - f_store_path, f, group, tables, on_bad_files=on_bad_files - ) + tables = _read_table(f_store_path, f, group, tables, on_bad_files=on_bad_files) logger.debug(f"Found {count} elements in {group}") From b22de3f1ab3bc823c0c769f341676602af4304df Mon Sep 17 00:00:00 2001 From: Max Hargreaves Date: Tue, 29 Jul 2025 13:18:04 -0700 Subject: [PATCH 3/4] Change: remove errant formatting --- src/spatialdata/_io/io_zarr.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/spatialdata/_io/io_zarr.py b/src/spatialdata/_io/io_zarr.py index 9c62125b5..425e8925b 100644 --- a/src/spatialdata/_io/io_zarr.py +++ b/src/spatialdata/_io/io_zarr.py @@ -11,11 +11,7 @@ from zarr.errors import ArrayNotFoundError, MetadataError from spatialdata._core.spatialdata import SpatialData -from spatialdata._io._utils import ( - BadFileHandleMethod, - handle_read_errors, - ome_zarr_logger, -) +from spatialdata._io._utils import BadFileHandleMethod, handle_read_errors, ome_zarr_logger from spatialdata._io.io_points import _read_points from spatialdata._io.io_raster import _read_multiscale from spatialdata._io.io_shapes import _read_shapes @@ -137,13 +133,7 @@ def read_zarr( with handle_read_errors( on_bad_files, location=f"{group.path}/{subgroup_name}", - exc_types=( - JSONDecodeError, - KeyError, - ValueError, - ArrayNotFoundError, - TypeError, - ), + exc_types=(JSONDecodeError, KeyError, ValueError, ArrayNotFoundError, TypeError), ): labels[subgroup_name] = _read_multiscale(f_elem_store, raster_type="labels") count += 1 From 6d9ac0581f5f5f44f4aeda766806751298b469ba Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Tue, 23 Dec 2025 14:33:01 +0100 Subject: [PATCH 4/4] support zarr.Group when reading sdata; better tests --- src/spatialdata/_core/spatialdata.py | 7 +++++-- src/spatialdata/_io/_utils.py | 4 ++-- src/spatialdata/_io/io_zarr.py | 7 ++++--- tests/io/test_readwrite.py | 28 ++++++++++++++++++++++++++++ 4 files changed, 39 insertions(+), 7 deletions(-) diff --git a/src/spatialdata/_core/spatialdata.py b/src/spatialdata/_core/spatialdata.py index f92bc9f54..21bd6c5b8 100644 --- a/src/spatialdata/_core/spatialdata.py +++ b/src/spatialdata/_core/spatialdata.py @@ -16,6 +16,7 @@ from dask.dataframe import Scalar, read_parquet from geopandas import GeoDataFrame from shapely import MultiPolygon, Polygon +from upath import UPath from xarray import DataArray, DataTree from zarr.errors import GroupNotFoundError @@ -1810,7 +1811,9 @@ def tables(self, tables: dict[str, AnnData]) -> None: @staticmethod def read( - file_path: Path | str, selection: tuple[str] | None = None, reconsolidate_metadata: bool = False + file_path: str | Path | UPath | zarr.Group, + selection: tuple[str] | None = None, + reconsolidate_metadata: bool = False, ) -> SpatialData: """ Read a SpatialData object from a Zarr storage (on-disk or remote). @@ -1818,7 +1821,7 @@ def read( Parameters ---------- file_path - The path or URL to the Zarr storage. + The path, URL, or zarr.Group to the Zarr storage. selection The elements to read (images, labels, points, shapes, table). If None, all elements are read. reconsolidate_metadata diff --git a/src/spatialdata/_io/_utils.py b/src/spatialdata/_io/_utils.py index a8e194a7b..b58d67445 100644 --- a/src/spatialdata/_io/_utils.py +++ b/src/spatialdata/_io/_utils.py @@ -470,8 +470,8 @@ def _resolve_zarr_store( if isinstance(path, zarr.Group): # if the input is a zarr.Group, wrap it with a store if isinstance(path.store, LocalStore): - # create a simple FSStore if the store is a LocalStore with just the path - return FsspecStore(os.path.join(path.store.path, path.path), **kwargs) + store_path = UPath(path.store.root) / path.path + return LocalStore(store_path.path) if isinstance(path.store, FsspecStore): # if the store within the zarr.Group is an FSStore, return it # but extend the path of the store with that of the zarr.Group diff --git a/src/spatialdata/_io/io_zarr.py b/src/spatialdata/_io/io_zarr.py index d19ca40ad..98919d613 100644 --- a/src/spatialdata/_io/io_zarr.py +++ b/src/spatialdata/_io/io_zarr.py @@ -11,6 +11,7 @@ from geopandas import GeoDataFrame from ome_zarr.format import Format from pyarrow import ArrowInvalid +from upath import UPath from zarr.errors import ArrayNotFoundError from spatialdata._core.spatialdata import SpatialData @@ -120,7 +121,7 @@ def get_raster_format_for_read( def read_zarr( - store: str | Path, + store: str | Path | UPath | zarr.Group, selection: None | tuple[str] = None, on_bad_files: Literal[BadFileHandleMethod.ERROR, BadFileHandleMethod.WARN] = BadFileHandleMethod.ERROR, ) -> SpatialData: @@ -130,7 +131,7 @@ def read_zarr( Parameters ---------- store - Path to the zarr store (on-disk or remote). + Path, URL, or zarr.Group to the zarr store (on-disk or remote). selection List of elements to read from the zarr store (images, labels, points, shapes, table). If None, all elements are @@ -228,7 +229,7 @@ def read_zarr( tables=tables, attrs=attrs, ) - sdata.path = Path(store.path) if isinstance(store, zarr.Group) else Path(store) + sdata.path = resolved_store.root return sdata diff --git a/tests/io/test_readwrite.py b/tests/io/test_readwrite.py index 6e948f519..11855a222 100644 --- a/tests/io/test_readwrite.py +++ b/tests/io/test_readwrite.py @@ -11,6 +11,7 @@ import zarr from anndata import AnnData from numpy.random import default_rng +from upath import UPath from zarr.errors import GroupNotFoundError from spatialdata import SpatialData, deepcopy, read_zarr @@ -963,3 +964,30 @@ def test_can_read_sdata_with_reconsolidation(full_sdata, sdata_container_format: new_sdata = SpatialData.read(path, reconsolidate_metadata=True) assert_spatial_data_objects_are_identical(full_sdata, new_sdata) + + +def test_read_sdata(tmp_path: Path, points: SpatialData) -> None: + sdata_path = tmp_path / "sdata.zarr" + points.write(sdata_path) + + # path as Path + sdata_from_path = SpatialData.read(sdata_path) + assert sdata_from_path.path == sdata_path + + # path as str + sdata_from_str = SpatialData.read(str(sdata_path)) + assert sdata_from_str.path == sdata_path + + # path as UPath + sdata_from_upath = SpatialData.read(UPath(sdata_path)) + assert sdata_from_upath.path == sdata_path + + # path as zarr Group + zarr_group = zarr.open_group(sdata_path, mode="r") + sdata_from_zarr_group = SpatialData.read(zarr_group) + assert sdata_from_zarr_group.path == sdata_path + + # Assert all read methods produce identical SpatialData objects + assert_spatial_data_objects_are_identical(sdata_from_path, sdata_from_str) + assert_spatial_data_objects_are_identical(sdata_from_path, sdata_from_upath) + assert_spatial_data_objects_are_identical(sdata_from_path, sdata_from_zarr_group)