From 68f6a82dc18ac256279b7b8cd2b88a2642c90bee Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 11 Jun 2026 22:57:55 +0200 Subject: [PATCH 01/41] docs: design spec for zarrs-backed low-level functional API Co-Authored-By: Claude Fable 5 --- .../2026-06-11-zarrs-functional-api-design.md | 180 ++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md diff --git a/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md b/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md new file mode 100644 index 0000000000..ad28c987f8 --- /dev/null +++ b/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md @@ -0,0 +1,180 @@ +# zarrs-backed low-level functional API for zarr-python + +Date: 2026-06-11 +Status: approved +Branch: `zarrs-bindings` + +## Goal + +Give zarr-python a low-level, functional API for zarr hierarchy CRUD whose +implementation delegates to the Rust [`zarrs`](https://docs.rs/zarrs) crate via +new PyO3 bindings. Every array routine takes a metadata document as an explicit +parameter, so callers can operate on read-only or virtual views of arrays +(e.g. decode a chunk with metadata the store never saw, or read a chunk as raw +bytes without decoding). + +Non-goals for this work: rewiring zarr-python's `Array`/`Group` classes or the +codec-pipeline registry through this API (possible later), fancy +(non-slice) indexing, and use of zarrs's experimental async feature. + +## Background + +- zarr-python is pure Python (hatchling). Its `Store` ABC + (`src/zarr/abc/store.py`) is async; metadata classes live under + `src/zarr/core/metadata/`. +- The Rust `zarrs` crate (~0.23) supports exactly the metadata-driven shape we + need: `Array::new_with_metadata(storage, path, metadata)` and + `Group::new_with_metadata(...)` construct nodes from a metadata document + without touching the store; `store_metadata()` persists separately. Chunk and + region I/O: `retrieve_chunk`, `retrieve_encoded_chunk` (raw bytes), + `retrieve_array_subset`, `partial_decoder` (sharding-aware), and the + corresponding `store_*` methods. `ArrayMetadata`/`GroupMetadata` parse + directly from JSON strings (v2 or v3; v2 converts internally). +- The existing `zarrs` PyPI package (github.com/zarrs/zarrs-python) exposes only + a codec pipeline (`CodecPipelineImpl`) and supports only a fixed set of + native stores. It cannot provide the API designed here, but its build setup + (maturin, PyO3 abi3, tokio/rayon) is the reference for ours. + +## Architecture + +Two distributions in this repo, hard boundary between them: + +1. **Rust crate `zarrs-bindings`** at the repo root (`zarrs-bindings/`), + built with maturin (PyO3, `abi3-py311`), publishing wheel `zarrs-bindings` + with native module `_zarrs_bindings`. It is a thin, mechanical binding over + `zarrs`: functions/pyclasses take metadata as a **JSON string**, a + store-config object, a node path, and return bytes / numpy arrays. It knows + nothing about zarr-python except the store sniffing described below. +2. **Python subpackage `zarr.zarrs`** in zarr-python: the public functional + API. Owns conversion between zarr-python types (`dict` metadata documents, + `zarr.abc.store.Store`, numpy arrays) and the binding layer, plus + validation, ergonomics, and error translation. Imports `_zarrs_bindings` + lazily and raises a helpful `ImportError` naming the `zarr[zarrs]` extra if + it is missing. + +zarr-python's own wheel remains pure Python; `zarrs-bindings` becomes an +optional dependency (`zarr[zarrs]`). + +## Public API (`zarr.zarrs`) + +All functions are `async def`. Parameters: + +- `metadata`: `dict[str, JSON]` — the literal metadata document (`zarr.json`, + or v2 `.zarray`/`.zgroup` equivalents). Never read from the store by the + array routines. +- `store`: `zarr.abc.store.Store`. +- `path`: node path within the store (str, `""` = root). +- `chunk_coords`: `tuple[int, ...]` grid coordinates. +- `selection`: tuple of `slice`/`int` only (v1 restriction). +- `options`: every function also accepts keyword-only + `options: ZarrsOptions | None = None` (omitted from the signatures below for + brevity) — a dataclass holding concurrency limits and checksum validation + flags. Defaults are applied when omitted; in Phase 1 the dataclass exists + but carries only defaults (fields become meaningful in Phase 3). + +```python +# node lifecycle +async def create_new_group(metadata, store, path) -> None # error if node exists +async def create_overwrite_group(metadata, store, path) -> None +async def create_new_array(metadata, store, path) -> None +async def create_overwrite_array(metadata, store, path) -> None +async def read_metadata(store, path) -> dict[str, JSON] # array or group doc +async def delete_node(store, path) -> None +async def list_children(store, path) -> list[tuple[str, dict]] # (path, metadata) + +# chunk-level I/O +async def decode_chunk(metadata, store, path, chunk_coords, *, selection=None) -> np.ndarray +async def read_encoded_chunk(metadata, store, path, chunk_coords) -> bytes | None +async def encode_chunk(metadata, store, path, chunk_coords, value) -> None +async def erase_chunk(metadata, store, path, chunk_coords) -> None + +# region-level I/O (selection in array coordinates, may span chunks) +async def decode_region(metadata, store, path, selection) -> np.ndarray +async def encode_region(metadata, store, path, selection, value) -> None +``` + +Mapping to zarrs primitives: + +| API function | zarrs primitive | +|---|---| +| `create_new_group` / `create_overwrite_group` | `Group::new_with_metadata` + `store_metadata` (existence check first for `new`) | +| `create_new_array` / `create_overwrite_array` | `Array::new_with_metadata` + `store_metadata` | +| `read_metadata` | `Array::open` / `Group::open` metadata retrieval | +| `delete_node` | `erase_metadata` + chunk erasure / prefix delete | +| `list_children` | `Group::children` / `traverse` | +| `decode_chunk` (no selection) | `retrieve_chunk` | +| `decode_chunk` (selection) | `partial_decoder(chunk).partial_decode` (sharding-aware) | +| `read_encoded_chunk` | `retrieve_encoded_chunk` | +| `encode_chunk` | `store_chunk` | +| `erase_chunk` | `erase_chunk` | +| `decode_region` | `retrieve_array_subset` | +| `encode_region` | `store_array_subset` | + +## Store bridge + +A Rust-side `StoreConfig` resolver, tried in priority order: + +1. `zarr.storage.LocalStore` → native `zarrs_filesystem` store. +2. obstore-backed `ObjectStore` → `zarrs_object_store` (Phase 3). +3. **Anything else** → generic `PyStore`: a Rust struct implementing + `ReadableStorageTraits` / `WritableStorageTraits` / + `ListableStorageTraits` over a Python callback object. + +The callback path: the async API function wraps the user's `Store` in a small +sync Python shim whose methods submit coroutines to zarr-python's existing +sync event-loop thread (`zarr.core.sync`, +`asyncio.run_coroutine_threadsafe(...)` + blocking result). Rust calls the +shim while holding no locks of its own. This makes any conformant `Store` +(Memory, Zip, Logging, Wrapper, user-defined) work without Rust knowing its +type. Deadlock safety relies on the existing invariant that code running on +the zarr sync loop never blocks on these Rust entry points. + +## Sync/async seam + +The public API is async to match zarr-python conventions. Internally each +function calls a blocking Rust entry point via `asyncio.to_thread`; the Rust +side releases the GIL during I/O and compute (reacquiring it only inside +`PyStore` callbacks). zarrs's experimental async feature is not used. + +## Error handling + +The binding layer raises a small set of typed exceptions defined in one place: +`NodeExistsError`, `NodeNotFoundError`, and `ValueError` subclasses for +metadata-parse and decode failures. `zarr.zarrs` translates to zarr-python +native exception types where an obvious equivalent exists (e.g. +`zarr.errors.ContainsArrayError`). Store-callback exceptions from Python +propagate through Rust unchanged. + +## Testing + +`tests/zarrs/`, module-level skip when `_zarrs_bindings` is not importable. + +- **Differential tests** are the core: every operation checked against + zarr-python's own implementation on the same store — write with zarr-python, + read with zarrs, and vice versa; metadata documents produced by both must + round-trip. +- Parametrized over: `MemoryStore` (exercises generic bridge) and `LocalStore` + (native path); zarr formats v2 and v3; a codec matrix including + `sharding_indexed`. +- Read-only-view tests: decode a chunk using a metadata dict not present in + the store; `read_encoded_chunk` returns bytes identical to `store.get`. +- A CI job builds the crate with `maturin develop` and runs `tests/zarrs/`. + Existing CI jobs are untouched (the suite skips without the extension). + +## Phasing + +1. **Phase 1**: crate scaffolding (maturin, CI build), store bridge (native + LocalStore + generic PyStore), node lifecycle functions, whole-chunk + `decode_chunk` / `read_encoded_chunk` / `encode_chunk` / `erase_chunk`. +2. **Phase 2**: `decode_region` / `encode_region`, chunk-subset `selection` + via partial decoders. +3. **Phase 3**: `ZarrsOptions` surface (concurrency, checksum validation, + direct IO), obstore native path, benchmarks vs. the pure-Python pipeline. + +## Naming decisions + +- Python API: `zarr.zarrs`. +- Rust crate / PyPI distribution: `zarrs-bindings` (PyPI name `zarrs` is taken + by the existing project); native module `_zarrs_bindings`. +- Function names follow the requested `create_new_*` / `create_overwrite_*` + pattern; reads are `decode_*` / `read_*`, writes `encode_*`. From 79c0bce73db09d73acbde329cbdd766e7fb4cdbf Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 08:16:03 +0200 Subject: [PATCH 02/41] docs: implementation plan for zarrs functional API phase 1 Co-Authored-By: Claude Fable 5 --- .../2026-06-12-zarrs-functional-api-phase1.md | 1796 +++++++++++++++++ 1 file changed, 1796 insertions(+) create mode 100644 docs/superpowers/plans/2026-06-12-zarrs-functional-api-phase1.md diff --git a/docs/superpowers/plans/2026-06-12-zarrs-functional-api-phase1.md b/docs/superpowers/plans/2026-06-12-zarrs-functional-api-phase1.md new file mode 100644 index 0000000000..364de7cebc --- /dev/null +++ b/docs/superpowers/plans/2026-06-12-zarrs-functional-api-phase1.md @@ -0,0 +1,1796 @@ +# zarrs functional API (Phase 1) Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** A new in-repo PyO3 crate `zarrs-bindings` plus a `zarr.zarrs` subpackage exposing an async functional API (node lifecycle + whole-chunk I/O) that delegates to the Rust `zarrs` crate, working against any zarr-python `Store`. + +**Architecture:** Two layers. The Rust crate (`zarrs-bindings/`, maturin/PyO3 abi3-py312, native module `_zarrs_bindings`) is a thin binding over `zarrs` ≈0.23: functions take metadata as JSON strings, a store object, and a node path. The Python subpackage `src/zarr/zarrs/` owns the public API: dict metadata documents, `Store` adaptation (native `LocalStore` fast path + a generic sync-shim callback bridge), numpy conversion, and error translation. Spec: `docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md`. + +**Tech Stack:** Rust 1.91+ (1.96 installed), zarrs 0.23 (default features), pyo3 0.28 (abi3-py312), maturin build backend driven by uv (no maturin CLI needed), pytest with `asyncio_mode = "auto"`. + +--- + +## Environment notes (read first) + +- **Python/pytest/mypy always via `uv run`** (user preference). +- **Build/refresh the extension:** `uv sync --group zarrs --reinstall-package zarrs-bindings`. Plain `uv run --group zarrs ...` does NOT reliably rebuild after Rust edits — always re-sync with `--reinstall-package zarrs-bindings` after touching `zarrs-bindings/`. +- **Fast Rust feedback:** `cargo check --manifest-path zarrs-bindings/Cargo.toml` (compiles without packaging a wheel). +- Builds need network access (crates.io for cargo, PyPI for maturin). The Claude Code sandbox on this host fails at bwrap init, so run build commands with the sandbox disabled. +- Pre-commit hooks (ruff format/check, mypy, codespell) run on `git commit`. If a hook modifies files, `git add` the changes and commit again. +- The Rust snippets below were written against verified zarrs 0.23.13 / zarrs_storage 0.4.3 signatures. If `cargo check` reports a mismatch (most likely candidates: the exact signature of `zarrs::node::node_exists`, the re-export path of `store_set_partial_many`, or `TryInto for &NodePath`), check https://docs.rs/zarrs/latest — the primitives all exist; only spelling may need adjustment. +- Docstrings use **markdown** (mkdocs), single backticks — not RST. + +## File structure + +``` +zarrs-bindings/ # new Rust crate (own wheel: zarrs-bindings / _zarrs_bindings) + Cargo.toml + pyproject.toml # maturin backend + src/lib.rs # pymodule, exceptions, shared error helpers + src/store.rs # PyStore bridge + store resolution + src/node.rs # group/array creation, read_metadata, delete_node, list_children + src/chunk.rs # retrieve/store/erase chunk, retrieve_encoded_chunk +src/zarr/zarrs/ # new Python subpackage (public API) + __init__.py # import guard + re-exports + _bridge.py # StoreShim (sync adapter over async Store), resolve_store + _api.py # async functional API, numpy/JSON conversion, error translation +tests/zarrs/ # new test directory (skips when bindings missing) + __init__.py + conftest.py # store fixtures, array_metadata helper + test_bridge.py + test_node.py + test_chunk.py +.github/workflows/zarrs.yml # new CI job +pyproject.toml # modified: zarrs dependency group, uv source, sdist exclude +.gitignore # modified: zarrs-bindings/target/ +changes/+zarrs-bindings.feature.md +``` + +--- + +### Task 1: Rust crate scaffolding + uv wiring + +**Files:** +- Create: `zarrs-bindings/Cargo.toml` +- Create: `zarrs-bindings/pyproject.toml` +- Create: `zarrs-bindings/src/lib.rs` +- Modify: `pyproject.toml` (root) +- Modify: `.gitignore` + +- [ ] **Step 1: Create `zarrs-bindings/Cargo.toml`** + +```toml +[package] +name = "zarrs-bindings" +version = "0.1.0" +edition = "2024" +rust-version = "1.91" +publish = false + +[lib] +name = "_zarrs_bindings" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = { version = "0.28", features = ["abi3-py312"] } +serde_json = "1" +zarrs = "0.23" + +[profile.release] +lto = "thin" +``` + +- [ ] **Step 2: Create `zarrs-bindings/pyproject.toml`** + +```toml +[build-system] +requires = ["maturin>=1.7,<2"] +build-backend = "maturin" + +[project] +name = "zarrs-bindings" +version = "0.1.0" +description = "PyO3 bindings to the zarrs Rust crate, consumed by zarr.zarrs" +requires-python = ">=3.12" +license = "MIT" + +[tool.maturin] +module-name = "_zarrs_bindings" +strip = true +``` + +- [ ] **Step 3: Create `zarrs-bindings/src/lib.rs`** (exceptions + version only for now) + +```rust +use pyo3::exceptions::{PyRuntimeError, PyValueError}; +use pyo3::prelude::*; + +pyo3::create_exception!( + _zarrs_bindings, + NodeExistsError, + PyValueError, + "A node already exists at the given path." +); +pyo3::create_exception!( + _zarrs_bindings, + NodeNotFoundError, + PyValueError, + "No node was found at the given path." +); + +pub(crate) fn runtime_err(err: impl std::fmt::Display) -> PyErr { + PyRuntimeError::new_err(err.to_string()) +} + +pub(crate) fn value_err(err: impl std::fmt::Display) -> PyErr { + PyValueError::new_err(err.to_string()) +} + +#[pyfunction] +fn version() -> &'static str { + env!("CARGO_PKG_VERSION") +} + +#[pymodule] +fn _zarrs_bindings(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add("NodeExistsError", m.py().get_type::())?; + m.add("NodeNotFoundError", m.py().get_type::())?; + m.add_function(wrap_pyfunction!(version, m)?)?; + Ok(()) +} +``` + +- [ ] **Step 4: Wire into the root `pyproject.toml`** + +Add to the `[dependency-groups]` table (after the `dev` group): + +```toml +zarrs = [ + {include-group = "test"}, + "zarrs-bindings", +] +``` + +Add a new section at the end of the file: + +```toml +[tool.uv.sources] +zarrs-bindings = { path = "zarrs-bindings" } +``` + +Add `"/zarrs-bindings",` to the `exclude` list under `[tool.hatch.build.targets.sdist]`. + +- [ ] **Step 5: Add `zarrs-bindings/target/` to `.gitignore`** + +- [ ] **Step 6: Lock, build, smoke-test** + +Run: `cargo check --manifest-path zarrs-bindings/Cargo.toml` +Expected: compiles clean (first run downloads ~zarrs dependency tree). + +Run: `uv lock && uv sync --group zarrs` +Expected: lockfile updated; `zarrs-bindings` builds via maturin and installs. + +Run: `uv run --group zarrs python -c "import _zarrs_bindings as z; print(z.version())"` +Expected: `0.1.0` + +- [ ] **Step 7: Commit** (include `zarrs-bindings/Cargo.lock`, which the build created) + +```bash +git add zarrs-bindings .gitignore pyproject.toml uv.lock +git commit -m "feat: scaffold zarrs-bindings PyO3 crate" +``` + +--- + +### Task 2: `zarr.zarrs` package skeleton + test scaffolding + +**Files:** +- Create: `src/zarr/zarrs/__init__.py` +- Create: `tests/zarrs/__init__.py` (empty) +- Create: `tests/zarrs/conftest.py` +- Test: `tests/zarrs/test_api.py` + +- [ ] **Step 1: Write the failing test** — `tests/zarrs/test_api.py` + +```python +from __future__ import annotations + + +def test_import() -> None: + import zarr.zarrs + + assert isinstance(zarr.zarrs.__version__, str) +``` + +- [ ] **Step 2: Create `tests/zarrs/__init__.py`** (empty file) **and `tests/zarrs/conftest.py`** + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import pytest + +pytest.importorskip("_zarrs_bindings", reason="zarrs-bindings is not installed") + +import zarr +from zarr.storage import LocalStore, MemoryStore + +if TYPE_CHECKING: + from pathlib import Path + + from zarr.abc.store import Store + + +@pytest.fixture(params=["memory", "local"]) +async def store(request: pytest.FixtureRequest, tmp_path: Path) -> Store: + """A writable store: MemoryStore exercises the generic Python-callback bridge, + LocalStore exercises the native zarrs filesystem store.""" + if request.param == "memory": + return await MemoryStore.open() + return await LocalStore.open(root=tmp_path / "store") + + +def array_metadata(**kwargs: Any) -> dict[str, Any]: + """Build an array metadata document using zarr-python itself, so the + documents fed to zarrs always match what zarr-python would write.""" + params: dict[str, Any] = { + "shape": (8, 8), + "chunks": (4, 4), + "dtype": "uint16", + "zarr_format": 3, + } | kwargs + arr = zarr.create_array(store=MemoryStore(), **params) + doc = dict(arr.metadata.to_dict()) + if params["zarr_format"] == 2: + # v2 attributes live in .zattrs, not in the .zarray document + doc.pop("attributes", None) + return doc +``` + +- [ ] **Step 3: Run the test to verify it fails** + +Run: `uv run --group zarrs pytest tests/zarrs -v` +Expected: FAIL with `ModuleNotFoundError: No module named 'zarr.zarrs'` + +- [ ] **Step 4: Create `src/zarr/zarrs/__init__.py`** + +```python +""" +Low-level functional API for zarr hierarchies, backed by the Rust +[`zarrs`](https://zarrs.dev) crate. + +This subpackage is experimental. It requires the `zarrs-bindings` package +(in-repo Rust crate; install for development with `uv sync --group zarrs`). + +All array routines take an explicit metadata document (a `dict` matching the +`zarr.json` / `.zarray` document) rather than reading metadata from the store, +which makes read-only and virtual views possible. +""" + +try: + import _zarrs_bindings +except ImportError as e: + raise ImportError( + "zarr.zarrs requires the `zarrs-bindings` package, which is not installed. " + "It is built from the zarr-python repository: run `uv sync --group zarrs`." + ) from e + +__version__: str = _zarrs_bindings.version() + +__all__ = ["__version__"] +``` + +- [ ] **Step 5: Run the test to verify it passes** + +Run: `uv run --group zarrs pytest tests/zarrs -v` +Expected: 1 passed. Also verify the skip path works in the default env: `uv run pytest tests/zarrs -v` → all skipped/deselected with "zarrs-bindings is not installed" (the default group lacks the bindings). + +- [ ] **Step 6: Commit** + +```bash +git add src/zarr/zarrs tests/zarrs +git commit -m "feat: add zarr.zarrs package skeleton and test scaffolding" +``` + +--- + +### Task 3: StoreShim — sync bridge over async stores (pure Python, TDD) + +**Files:** +- Create: `src/zarr/zarrs/_bridge.py` +- Test: `tests/zarrs/test_bridge.py` + +- [ ] **Step 1: Write the failing tests** — `tests/zarrs/test_bridge.py` + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING + +from zarr.storage import LocalStore, MemoryStore +from zarr.zarrs._bridge import StoreShim, resolve_store + +if TYPE_CHECKING: + from pathlib import Path + + +def test_shim_get_set_delete() -> None: + shim = StoreShim(MemoryStore()) + assert shim.get("a/b") is None + shim.set("a/b", b"xyz") + assert shim.get("a/b") == b"xyz" + assert shim.get_range("a/b", 1, 1) == b"y" + assert shim.get_range("a/b", 1, None) == b"yz" + assert shim.get_suffix("a/b", 2) == b"yz" + assert shim.getsize("a/b") == 3 + assert shim.getsize("missing") is None + shim.delete("a/b") + assert shim.get("a/b") is None + + +def test_shim_listing() -> None: + shim = StoreShim(MemoryStore()) + shim.set("zarr.json", b"{}") + shim.set("a/zarr.json", b"{}") + shim.set("a/c/0/0", b"\x00") + assert shim.list() == ["a/c/0/0", "a/zarr.json", "zarr.json"] + assert shim.list_prefix("a/") == ["a/c/0/0", "a/zarr.json"] + assert shim.list_dir("a/") == (["a/zarr.json"], ["a/c/"]) + assert shim.list_dir("") == (["zarr.json"], ["a/"]) + assert shim.getsize_prefix("a/") == 3 + shim.delete_prefix("a/") + assert shim.list() == ["zarr.json"] + + +def test_resolve_store(tmp_path: Path) -> None: + local = LocalStore(tmp_path) + assert resolve_store(local) == {"filesystem": str(tmp_path)} + # read-only LocalStore must go through the shim so writes are rejected in Python + assert isinstance(resolve_store(LocalStore(tmp_path, read_only=True)), StoreShim) + assert isinstance(resolve_store(MemoryStore()), StoreShim) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `uv run --group zarrs pytest tests/zarrs/test_bridge.py -v` +Expected: FAIL with `ModuleNotFoundError: No module named 'zarr.zarrs._bridge'` + +- [ ] **Step 3: Create `src/zarr/zarrs/_bridge.py`** + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING + +from zarr.abc.store import OffsetByteRequest, RangeByteRequest, SuffixByteRequest +from zarr.core.buffer.core import default_buffer_prototype +from zarr.core.sync import _collect_aiterator, sync +from zarr.storage import LocalStore + +if TYPE_CHECKING: + from zarr.abc.store import Store + + +class StoreShim: + """ + Synchronous adapter over an async `Store`, called from Rust worker threads. + + Each method blocks the calling thread by submitting a coroutine to the zarr + event-loop thread (`zarr.core.sync`). Methods must never be called from the + zarr event-loop thread itself; the Rust bindings only call them from + `asyncio.to_thread` worker threads. + """ + + def __init__(self, store: Store) -> None: + self._store = store + self._prototype = default_buffer_prototype() + + def get(self, key: str) -> bytes | None: + buf = sync(self._store.get(key, prototype=self._prototype)) + return None if buf is None else buf.to_bytes() + + def get_range(self, key: str, offset: int, length: int | None) -> bytes | None: + byte_range = ( + RangeByteRequest(offset, offset + length) + if length is not None + else OffsetByteRequest(offset) + ) + buf = sync(self._store.get(key, prototype=self._prototype, byte_range=byte_range)) + return None if buf is None else buf.to_bytes() + + def get_suffix(self, key: str, suffix: int) -> bytes | None: + buf = sync( + self._store.get(key, prototype=self._prototype, byte_range=SuffixByteRequest(suffix)) + ) + return None if buf is None else buf.to_bytes() + + def set(self, key: str, value: bytes) -> None: + sync(self._store.set(key, self._prototype.buffer.from_bytes(value))) + + def delete(self, key: str) -> None: + sync(self._store.delete(key)) + + def delete_prefix(self, prefix: str) -> None: + sync(self._store.delete_dir(prefix.rstrip("/"))) + + def getsize(self, key: str) -> int | None: + try: + return sync(self._store.getsize(key)) + except FileNotFoundError: + return None + + def getsize_prefix(self, prefix: str) -> int: + return sync(self._store.getsize_prefix(prefix.rstrip("/"))) + + def list(self) -> list[str]: + return sorted(sync(_collect_aiterator(self._store.list()))) + + def list_prefix(self, prefix: str) -> list[str]: + return sorted(sync(_collect_aiterator(self._store.list_prefix(prefix)))) + + def list_dir(self, prefix: str) -> tuple[list[str], list[str]]: + """Return `(keys, prefixes)` directly under `prefix`, as zarrs expects: + full keys, and child prefixes ending in `/`.""" + stripped = prefix.rstrip("/") + children = sorted(sync(_collect_aiterator(self._store.list_dir(stripped)))) + keys: list[str] = [] + prefixes: list[str] = [] + for child in children: + full = f"{stripped}/{child}" if stripped else child + if sync(self._store.exists(full)): + keys.append(full) + else: + prefixes.append(full + "/") + return keys, prefixes + + +def resolve_store(store: Store) -> StoreShim | dict[str, str]: + """ + Convert a zarr `Store` into the representation `_zarrs_bindings` expects: + a config dict for stores with a native Rust implementation, otherwise a + `StoreShim` that Rust calls back into. + """ + if isinstance(store, LocalStore) and not store.read_only: + return {"filesystem": str(store.root)} + return StoreShim(store) +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `uv run --group zarrs pytest tests/zarrs/test_bridge.py -v` +Expected: 3 passed. (If `list_dir`/`delete_dir`/`getsize_prefix` choke on the stripped prefix, check the `Store` ABC docstrings in `src/zarr/abc/store.py:348-501` — these methods take prefixes without trailing slashes.) + +- [ ] **Step 5: Commit** + +```bash +git add src/zarr/zarrs/_bridge.py tests/zarrs/test_bridge.py +git commit -m "feat: sync store bridge for zarrs bindings" +``` + +--- + +### Task 4: Rust store bridge + group creation, end to end + +**Files:** +- Create: `zarrs-bindings/src/store.rs` +- Create: `zarrs-bindings/src/node.rs` +- Modify: `zarrs-bindings/src/lib.rs` +- Create: `src/zarr/zarrs/_api.py` +- Modify: `src/zarr/zarrs/__init__.py` +- Test: `tests/zarrs/test_node.py` + +- [ ] **Step 1: Write the failing tests** — `tests/zarrs/test_node.py` + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import pytest + +import zarr +from zarr.core.buffer.core import default_buffer_prototype +from zarr.zarrs import NodeExistsError, create_new_group, create_overwrite_group + +if TYPE_CHECKING: + from zarr.abc.store import Store + +GROUP_META: dict[str, Any] = { + "zarr_format": 3, + "node_type": "group", + "attributes": {"answer": 42}, +} + + +async def test_create_new_group(store: Store) -> None: + await create_new_group(GROUP_META, store, "foo") + group = zarr.open_group(store=store, path="foo", mode="r") + assert dict(group.attrs) == {"answer": 42} + + +async def test_create_new_group_at_root(store: Store) -> None: + await create_new_group(GROUP_META, store, "") + group = zarr.open_group(store=store, mode="r") + assert dict(group.attrs) == {"answer": 42} + + +async def test_create_new_group_existing_node(store: Store) -> None: + await create_new_group(GROUP_META, store, "foo") + with pytest.raises(NodeExistsError): + await create_new_group(GROUP_META, store, "foo") + + +async def test_create_overwrite_group(store: Store) -> None: + # an array and its chunks previously occupied the path; overwrite removes both + arr = zarr.create_array(store=store, name="foo", shape=(4,), chunks=(2,), dtype="uint8") + arr[:] = 1 + assert await store.exists("foo/c/0") + await create_overwrite_group(GROUP_META, store, "foo") + group = zarr.open_group(store=store, path="foo", mode="r") + assert dict(group.attrs) == {"answer": 42} + assert not await store.exists("foo/c/0") + assert await store.get("foo/zarr.json", prototype=default_buffer_prototype()) is not None +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `uv run --group zarrs pytest tests/zarrs/test_node.py -v` +Expected: FAIL with `ImportError: cannot import name 'NodeExistsError' from 'zarr.zarrs'` + +- [ ] **Step 3: Create `zarrs-bindings/src/store.rs`** + +```rust +use std::sync::Arc; + +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use pyo3::types::{PyBytes, PyDict}; +use zarrs::filesystem::FilesystemStore; +use zarrs::storage::{ + Bytes, ByteRange, ByteRangeIterator, ListableStorageTraits, MaybeBytes, MaybeBytesIterator, + OffsetBytesIterator, ReadableStorageTraits, ReadableWritableListableStorage, StorageError, + StoreKey, StoreKeys, StoreKeysPrefixes, StorePrefix, WritableStorageTraits, +}; + +/// A zarrs store backed by a Python `zarr.zarrs._bridge.StoreShim`. +/// +/// Every method attaches to the Python interpreter and calls the shim, which +/// blocks on the zarr event loop. Blocking waits in Python release the GIL, so +/// the loop thread can make progress while a Rust worker waits here. +pub(crate) struct PyStore(Py); + +fn py_err(err: PyErr) -> StorageError { + StorageError::Other(err.to_string()) +} + +fn invalid(err: impl std::fmt::Display) -> StorageError { + StorageError::Other(err.to_string()) +} + +impl PyStore { + fn get_with_range( + &self, + key: &StoreKey, + range: Option<&ByteRange>, + ) -> Result { + Python::attach(|py| { + let shim = self.0.bind(py); + let result = match range { + None => shim.call_method1("get", (key.as_str(),)), + Some(ByteRange::FromStart(offset, length)) => { + shim.call_method1("get_range", (key.as_str(), *offset, *length)) + } + Some(ByteRange::Suffix(suffix)) => { + shim.call_method1("get_suffix", (key.as_str(), *suffix)) + } + } + .map_err(py_err)?; + if result.is_none() { + Ok(None) + } else { + let bytes: Vec = result.extract().map_err(py_err)?; + Ok(Some(Bytes::from(bytes))) + } + }) + } +} + +impl ReadableStorageTraits for PyStore { + fn get(&self, key: &StoreKey) -> Result { + self.get_with_range(key, None) + } + + fn get_partial_many<'a>( + &'a self, + key: &StoreKey, + byte_ranges: ByteRangeIterator<'a>, + ) -> Result, StorageError> { + let mut out = Vec::new(); + for byte_range in byte_ranges { + match self.get_with_range(key, Some(&byte_range))? { + Some(bytes) => out.push(Ok(bytes)), + None => return Ok(None), + } + } + Ok(Some(Box::new(out.into_iter()))) + } + + fn size_key(&self, key: &StoreKey) -> Result, StorageError> { + Python::attach(|py| { + self.0 + .bind(py) + .call_method1("getsize", (key.as_str(),)) + .map_err(py_err)? + .extract() + .map_err(py_err) + }) + } + + fn supports_get_partial(&self) -> bool { + true + } +} + +impl WritableStorageTraits for PyStore { + fn set(&self, key: &StoreKey, value: Bytes) -> Result<(), StorageError> { + Python::attach(|py| { + let data = PyBytes::new(py, &value); + self.0 + .bind(py) + .call_method1("set", (key.as_str(), data)) + .map_err(py_err)?; + Ok(()) + }) + } + + fn set_partial_many( + &self, + key: &StoreKey, + offset_values: OffsetBytesIterator, + ) -> Result<(), StorageError> { + // read-modify-write fallback provided by zarrs + zarrs::storage::store_set_partial_many(self, key, offset_values) + } + + fn supports_set_partial(&self) -> bool { + false + } + + fn erase(&self, key: &StoreKey) -> Result<(), StorageError> { + Python::attach(|py| { + self.0 + .bind(py) + .call_method1("delete", (key.as_str(),)) + .map_err(py_err)?; + Ok(()) + }) + } + + fn erase_prefix(&self, prefix: &StorePrefix) -> Result<(), StorageError> { + Python::attach(|py| { + self.0 + .bind(py) + .call_method1("delete_prefix", (prefix.as_str(),)) + .map_err(py_err)?; + Ok(()) + }) + } +} + +impl ListableStorageTraits for PyStore { + fn list(&self) -> Result { + Python::attach(|py| { + let keys: Vec = self + .0 + .bind(py) + .call_method0("list") + .map_err(py_err)? + .extract() + .map_err(py_err)?; + keys.into_iter() + .map(|k| StoreKey::new(k).map_err(invalid)) + .collect() + }) + } + + fn list_prefix(&self, prefix: &StorePrefix) -> Result { + Python::attach(|py| { + let keys: Vec = self + .0 + .bind(py) + .call_method1("list_prefix", (prefix.as_str(),)) + .map_err(py_err)? + .extract() + .map_err(py_err)?; + keys.into_iter() + .map(|k| StoreKey::new(k).map_err(invalid)) + .collect() + }) + } + + fn list_dir(&self, prefix: &StorePrefix) -> Result { + Python::attach(|py| { + let (keys, prefixes): (Vec, Vec) = self + .0 + .bind(py) + .call_method1("list_dir", (prefix.as_str(),)) + .map_err(py_err)? + .extract() + .map_err(py_err)?; + let keys = keys + .into_iter() + .map(|k| StoreKey::new(k).map_err(invalid)) + .collect::, StorageError>>()?; + let prefixes = prefixes + .into_iter() + .map(|p| StorePrefix::new(p).map_err(invalid)) + .collect::, StorageError>>()?; + Ok(StoreKeysPrefixes::new(keys, prefixes)) + }) + } + + fn size_prefix(&self, prefix: &StorePrefix) -> Result { + Python::attach(|py| { + self.0 + .bind(py) + .call_method1("getsize_prefix", (prefix.as_str(),)) + .map_err(py_err)? + .extract() + .map_err(py_err) + }) + } +} + +/// Convert the Python-side store representation (`zarr.zarrs._bridge.resolve_store` +/// output) into a zarrs storage handle. +pub(crate) fn resolve_store(obj: &Bound<'_, PyAny>) -> PyResult { + if let Ok(config) = obj.downcast::() { + if let Some(root) = config.get_item("filesystem")? { + let root: String = root.extract()?; + let store = + FilesystemStore::new(root).map_err(|e| PyValueError::new_err(e.to_string()))?; + return Ok(Arc::new(store)); + } + return Err(PyValueError::new_err("unrecognized store configuration")); + } + Ok(Arc::new(PyStore(obj.clone().unbind()))) +} +``` + +- [ ] **Step 4: Create `zarrs-bindings/src/node.rs`** (group functions only; later tasks extend this file) + +```rust +use pyo3::prelude::*; +use zarrs::group::Group; +use zarrs::metadata::GroupMetadata; +use zarrs::node::{node_exists, NodePath}; +use zarrs::storage::{ReadableWritableListableStorage, StorePrefix}; + +use crate::store::resolve_store; +use crate::{runtime_err, value_err, NodeExistsError}; + +pub(crate) fn parse_node_path(path: &str) -> PyResult { + NodePath::new(path).map_err(value_err) +} + +/// When a node exists at `node_path`: erase it (and everything under it) if +/// `overwrite`, otherwise raise `NodeExistsError`. +pub(crate) fn prepare_target( + storage: &ReadableWritableListableStorage, + node_path: &NodePath, + overwrite: bool, +) -> PyResult<()> { + if node_exists(storage, node_path).map_err(runtime_err)? { + if !overwrite { + return Err(NodeExistsError::new_err(format!( + "a node already exists at path {}", + node_path.as_str() + ))); + } + let prefix: StorePrefix = node_path.try_into().map_err(value_err)?; + storage.erase_prefix(&prefix).map_err(runtime_err)?; + } + Ok(()) +} + +#[pyfunction] +pub(crate) fn create_group( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + overwrite: bool, +) -> PyResult<()> { + let storage = resolve_store(store)?; + let metadata = GroupMetadata::try_from(metadata_json.as_str()).map_err(value_err)?; + py.detach(move || { + let node_path = parse_node_path(&path)?; + prepare_target(&storage, &node_path, overwrite)?; + let group = Group::new_with_metadata(storage, &path, metadata).map_err(value_err)?; + group.store_metadata().map_err(runtime_err) + }) +} +``` + +- [ ] **Step 5: Register in `zarrs-bindings/src/lib.rs`** + +Add after the `use` lines: + +```rust +mod node; +mod store; +``` + +Add to the `#[pymodule]` body before `Ok(())`: + +```rust + m.add_function(wrap_pyfunction!(node::create_group, m)?)?; +``` + +- [ ] **Step 6: Compile** + +Run: `cargo check --manifest-path zarrs-bindings/Cargo.toml` +Expected: success. If `node_exists` or `try_into::()` signatures mismatch, fix per https://docs.rs/zarrs/latest/zarrs/node/ (the helpers exist; argument form may differ, e.g. `node_exists(&storage, &node_path)` vs a `&Arc` receiver). + +- [ ] **Step 7: Create `src/zarr/zarrs/_api.py`** + +```python +from __future__ import annotations + +import asyncio +import json +from contextlib import contextmanager +from dataclasses import dataclass +from typing import TYPE_CHECKING + +import _zarrs_bindings as _zb + +from zarr.errors import NodeNotFoundError +from zarr.zarrs._bridge import resolve_store + +if TYPE_CHECKING: + from collections.abc import Iterator, Mapping + + from zarr.abc.store import Store + from zarr.core.common import JSON + +NodeExistsError = _zb.NodeExistsError +"""Raised by `create_new_*` when a node already exists at the target path.""" + + +@dataclass(frozen=True, slots=True) +class ZarrsOptions: + """Options for zarrs-backed operations. + + Currently empty: fields (concurrency limits, checksum validation) arrive in + a later phase. Accepting it now keeps signatures stable. + """ + + +def _node_path(path: str) -> str: + """Convert a zarr-python node path (`""`, `"foo/bar"`) to a zarrs node path + (`"/"`, `"/foo/bar"`).""" + return f"/{path.strip('/')}" + + +@contextmanager +def _translate_errors() -> Iterator[None]: + try: + yield + except _zb.NodeNotFoundError as err: + raise NodeNotFoundError(str(err)) from err + + +async def create_new_group( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> None: + """Create a group at `path` from a group metadata document. + + Raises `NodeExistsError` if any node already exists at `path`. + """ + with _translate_errors(): + await asyncio.to_thread( + _zb.create_group, resolve_store(store), _node_path(path), json.dumps(metadata), False + ) + + +async def create_overwrite_group( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> None: + """Create a group at `path`, deleting any existing node (and its children) first.""" + with _translate_errors(): + await asyncio.to_thread( + _zb.create_group, resolve_store(store), _node_path(path), json.dumps(metadata), True + ) +``` + +- [ ] **Step 8: Re-export from `src/zarr/zarrs/__init__.py`** + +Replace the `__version__`/`__all__` lines at the end with: + +```python +__version__: str = _zarrs_bindings.version() + +from zarr.zarrs._api import ( + NodeExistsError, + ZarrsOptions, + create_new_group, + create_overwrite_group, +) + +__all__ = [ + "NodeExistsError", + "ZarrsOptions", + "__version__", + "create_new_group", + "create_overwrite_group", +] +``` + +- [ ] **Step 9: Rebuild and run the tests** + +Run: `uv sync --group zarrs --reinstall-package zarrs-bindings` +Run: `uv run --group zarrs pytest tests/zarrs/test_node.py -v` +Expected: 8 passed (4 tests × 2 store params). The MemoryStore param proves the full Rust→Python callback bridge; LocalStore proves the native path. + +- [ ] **Step 10: Commit** + +```bash +git add zarrs-bindings/src src/zarr/zarrs tests/zarrs/test_node.py +git commit -m "feat: zarrs store bridge and group creation" +``` + +--- + +### Task 5: Array creation + read_metadata + +**Files:** +- Modify: `zarrs-bindings/src/node.rs` +- Modify: `zarrs-bindings/src/lib.rs` +- Modify: `src/zarr/zarrs/_api.py`, `src/zarr/zarrs/__init__.py` +- Test: `tests/zarrs/test_node.py` + +- [ ] **Step 1: Add failing tests to `tests/zarrs/test_node.py`** + +Extend the imports: + +```python +import json + +import numpy as np + +from tests.zarrs.conftest import array_metadata +from zarr.errors import NodeNotFoundError +from zarr.zarrs import create_new_array, create_overwrite_array, read_metadata +``` + +(If `from tests.zarrs.conftest import ...` fails at collection, use a relative import `from .conftest import array_metadata` — `tests` is a package.) + +Add tests: + +```python +async def test_create_new_array(store: Store) -> None: + await create_new_array(array_metadata(), store, "arr") + arr = zarr.open_array(store=store, path="arr", mode="r") + assert arr.shape == (8, 8) + assert arr.chunks == (4, 4) + assert arr.dtype == np.dtype("uint16") + + +async def test_create_new_array_existing_node(store: Store) -> None: + await create_new_array(array_metadata(), store, "arr") + with pytest.raises(NodeExistsError): + await create_new_array(array_metadata(), store, "arr") + + +async def test_create_overwrite_array(store: Store) -> None: + zarr.create_group(store=store, path="arr") + await create_overwrite_array(array_metadata(), store, "arr") + arr = zarr.open_array(store=store, path="arr", mode="r") + assert arr.shape == (8, 8) + + +async def test_read_metadata_matches_stored_document(store: Store) -> None: + await create_new_array(array_metadata(), store, "arr") + observed = await read_metadata(store, "arr") + raw = await store.get("arr/zarr.json", prototype=default_buffer_prototype()) + assert raw is not None + assert observed == json.loads(raw.to_bytes()) + + +async def test_read_metadata_zarr_python_group(store: Store) -> None: + zarr.create_group(store=store, path="g", attributes={"a": 1}) + observed = await read_metadata(store, "g") + assert observed["node_type"] == "group" + assert observed["attributes"] == {"a": 1} + + +async def test_read_metadata_missing(store: Store) -> None: + with pytest.raises(NodeNotFoundError): + await read_metadata(store, "nope") +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `uv run --group zarrs pytest tests/zarrs/test_node.py -v` +Expected: FAIL with `ImportError: cannot import name 'create_new_array'` + +- [ ] **Step 3: Add Rust functions to `zarrs-bindings/src/node.rs`** + +Extend the `use` block: + +```rust +use zarrs::array::Array; +use zarrs::metadata::ArrayMetadata; +use zarrs::node::Node; + +use crate::NodeNotFoundError; +``` + +Append: + +```rust +#[pyfunction] +pub(crate) fn create_array( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + overwrite: bool, +) -> PyResult<()> { + let storage = resolve_store(store)?; + let metadata = ArrayMetadata::try_from(metadata_json.as_str()).map_err(value_err)?; + py.detach(move || { + let node_path = parse_node_path(&path)?; + prepare_target(&storage, &node_path, overwrite)?; + let array = Array::new_with_metadata(storage, &path, metadata).map_err(value_err)?; + array.store_metadata().map_err(runtime_err) + }) +} + +#[pyfunction] +pub(crate) fn read_metadata( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, +) -> PyResult { + let storage = resolve_store(store)?; + py.detach(move || { + let node = Node::open(&storage, &path) + .map_err(|e| NodeNotFoundError::new_err(e.to_string()))?; + serde_json::to_string(node.metadata()).map_err(runtime_err) + }) +} +``` + +Register both in `lib.rs`: + +```rust + m.add_function(wrap_pyfunction!(node::create_array, m)?)?; + m.add_function(wrap_pyfunction!(node::read_metadata, m)?)?; +``` + +- [ ] **Step 4: Add Python wrappers to `src/zarr/zarrs/_api.py`** + +```python +async def create_new_array( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> None: + """Create an array at `path` from a v2 or v3 array metadata document. + + Raises `NodeExistsError` if any node already exists at `path`. + """ + with _translate_errors(): + await asyncio.to_thread( + _zb.create_array, resolve_store(store), _node_path(path), json.dumps(metadata), False + ) + + +async def create_overwrite_array( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> None: + """Create an array at `path`, deleting any existing node (and its children) first.""" + with _translate_errors(): + await asyncio.to_thread( + _zb.create_array, resolve_store(store), _node_path(path), json.dumps(metadata), True + ) + + +async def read_metadata( + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> dict[str, JSON]: + """Read the metadata document of the array or group at `path`. + + Raises `zarr.errors.NodeNotFoundError` if no node exists there. + """ + with _translate_errors(): + raw = await asyncio.to_thread(_zb.read_metadata, resolve_store(store), _node_path(path)) + result: dict[str, JSON] = json.loads(raw) + return result +``` + +Add `create_new_array`, `create_overwrite_array`, `read_metadata` to the `__init__.py` import and `__all__`. + +- [ ] **Step 5: Rebuild and test** + +Run: `cargo check --manifest-path zarrs-bindings/Cargo.toml` → success +Run: `uv sync --group zarrs --reinstall-package zarrs-bindings` +Run: `uv run --group zarrs pytest tests/zarrs/test_node.py -v` +Expected: all pass (20 = 10 tests × 2 stores). Note: `test_read_metadata_matches_stored_document` asserts zarrs round-trips the document zarrs itself wrote; if zarrs normalizes a field zarr-python emits differently (e.g. drops a `null` `dimension_names`), adjust the *fixture* (`array_metadata`) to drop the field, not the assertion. + +- [ ] **Step 6: Commit** + +```bash +git add zarrs-bindings/src src/zarr/zarrs tests/zarrs +git commit -m "feat: zarrs-backed array creation and metadata reads" +``` + +--- + +### Task 6: delete_node + list_children + +**Files:** +- Modify: `zarrs-bindings/src/node.rs`, `zarrs-bindings/src/lib.rs` +- Modify: `src/zarr/zarrs/_api.py`, `src/zarr/zarrs/__init__.py` +- Test: `tests/zarrs/test_node.py` + +- [ ] **Step 1: Add failing tests to `tests/zarrs/test_node.py`** + +```python +from zarr.zarrs import delete_node, list_children + + +async def test_delete_node(store: Store) -> None: + arr = zarr.create_array(store=store, name="doomed", shape=(4,), chunks=(2,), dtype="uint8") + arr[:] = 1 + await delete_node(store, "doomed") + assert not await store.exists("doomed/zarr.json") + assert not await store.exists("doomed/c/0") + + +async def test_delete_node_missing(store: Store) -> None: + with pytest.raises(NodeNotFoundError): + await delete_node(store, "nope") + + +async def test_list_children(store: Store) -> None: + root = zarr.create_group(store=store) + root.create_group("sub_group", attributes={"kind": "group"}) + root.create_array("sub_array", shape=(4,), chunks=(2,), dtype="uint8") + children = await list_children(store, "") + by_path = dict(children) + assert set(by_path) == {"sub_group", "sub_array"} + assert by_path["sub_group"]["node_type"] == "group" + assert by_path["sub_array"]["node_type"] == "array" + + +async def test_list_children_missing(store: Store) -> None: + with pytest.raises(NodeNotFoundError): + await list_children(store, "nope") +``` + +- [ ] **Step 2: Run to verify failure** — `uv run --group zarrs pytest tests/zarrs/test_node.py -v` → ImportError. + +- [ ] **Step 3: Add Rust functions to `node.rs`** + +```rust +#[pyfunction] +pub(crate) fn delete_node( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, +) -> PyResult<()> { + let storage = resolve_store(store)?; + py.detach(move || { + let node_path = parse_node_path(&path)?; + if !node_exists(&storage, &node_path).map_err(runtime_err)? { + return Err(NodeNotFoundError::new_err(format!( + "no node found at path {}", + node_path.as_str() + ))); + } + let prefix: StorePrefix = (&node_path).try_into().map_err(value_err)?; + storage.erase_prefix(&prefix).map_err(runtime_err) + }) +} + +#[pyfunction] +pub(crate) fn list_children( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, +) -> PyResult> { + let storage = resolve_store(store)?; + py.detach(move || { + let group = Group::open(storage, &path) + .map_err(|e| NodeNotFoundError::new_err(e.to_string()))?; + let children = group.children(false).map_err(runtime_err)?; + children + .into_iter() + .map(|node| { + let metadata = serde_json::to_string(node.metadata()).map_err(runtime_err)?; + Ok((node.path().as_str().to_string(), metadata)) + }) + .collect() + }) +} +``` + +Register both in `lib.rs` as before. + +- [ ] **Step 4: Add Python wrappers to `_api.py`** + +```python +async def delete_node( + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> None: + """Delete the node at `path`, including all keys and child nodes under it. + + Raises `zarr.errors.NodeNotFoundError` if no node exists there. Deleting the + root node (`path=""`) clears the entire store. + """ + with _translate_errors(): + await asyncio.to_thread(_zb.delete_node, resolve_store(store), _node_path(path)) + + +async def list_children( + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> list[tuple[str, dict[str, JSON]]]: + """List the direct children of the group at `path` as + `(path, metadata_document)` pairs. Paths are store-relative (no leading `/`). + + Raises `zarr.errors.NodeNotFoundError` if no group exists at `path`. + """ + with _translate_errors(): + raw = await asyncio.to_thread(_zb.list_children, resolve_store(store), _node_path(path)) + return [(child_path.lstrip("/"), json.loads(doc)) for child_path, doc in raw] +``` + +Export both from `__init__.py`. + +- [ ] **Step 5: Rebuild and test** + +Run: `uv sync --group zarrs --reinstall-package zarrs-bindings && uv run --group zarrs pytest tests/zarrs/test_node.py -v` +Expected: all pass. + +- [ ] **Step 6: Commit** + +```bash +git add zarrs-bindings/src src/zarr/zarrs tests/zarrs +git commit -m "feat: zarrs-backed node deletion and child listing" +``` + +--- + +### Task 7: Whole-chunk I/O (decode/encode/raw/erase) + +**Files:** +- Create: `zarrs-bindings/src/chunk.rs` +- Modify: `zarrs-bindings/src/lib.rs` +- Modify: `src/zarr/zarrs/_api.py`, `src/zarr/zarrs/__init__.py` +- Test: `tests/zarrs/test_chunk.py` + +- [ ] **Step 1: Write the failing tests** — `tests/zarrs/test_chunk.py` + +```python +from __future__ import annotations + +import copy +from typing import TYPE_CHECKING, Any + +import numpy as np +import pytest + +import zarr +from tests.zarrs.conftest import array_metadata +from zarr.codecs import BloscCodec, GzipCodec, ZstdCodec +from zarr.core.buffer.core import default_buffer_prototype +from zarr.zarrs import ( + create_new_array, + decode_chunk, + encode_chunk, + erase_chunk, + read_encoded_chunk, +) + +if TYPE_CHECKING: + from zarr.abc.store import Store + + +def _filled( + store: Store, **kwargs: Any +) -> tuple[np.ndarray[Any, np.dtype[Any]], dict[str, Any]]: + """Create an 8x8 array named 'a' via zarr-python, fill it with a ramp, and + return (data, metadata_document).""" + params: dict[str, Any] = {"shape": (8, 8), "chunks": (4, 4), "dtype": "uint16"} | kwargs + arr = zarr.create_array(store=store, name="a", **params) + data = np.arange(64, dtype=params["dtype"]).reshape(8, 8) + arr[:, :] = data + doc = dict(arr.metadata.to_dict()) + if params.get("zarr_format") == 2: + # v2 attributes live in .zattrs, not in the .zarray document + doc.pop("attributes", None) + return data, doc + + +@pytest.mark.parametrize("dtype", ["uint8", "int32", "float64"]) +async def test_decode_chunk_differential(store: Store, dtype: str) -> None: + data, meta = _filled(store, dtype=dtype) + observed = await decode_chunk(meta, store, "a", (1, 0)) + np.testing.assert_array_equal(observed, data[4:8, 0:4]) + + +@pytest.mark.parametrize( + "compressors", [None, (GzipCodec(),), (ZstdCodec(),), (BloscCodec(cname="lz4"),)] +) +async def test_decode_chunk_codecs(store: Store, compressors: Any) -> None: + data, meta = _filled(store, compressors=compressors) + observed = await decode_chunk(meta, store, "a", (0, 1)) + np.testing.assert_array_equal(observed, data[0:4, 4:8]) + + +async def test_decode_chunk_v2(store: Store) -> None: + data, meta = _filled(store, zarr_format=2) + observed = await decode_chunk(meta, store, "a", (1, 1)) + np.testing.assert_array_equal(observed, data[4:8, 4:8]) + + +async def test_decode_chunk_sharding(store: Store) -> None: + # with sharding, the metadata chunk grid is the shard grid + data, meta = _filled(store, chunks=(2, 2), shards=(4, 4)) + observed = await decode_chunk(meta, store, "a", (1, 1)) + np.testing.assert_array_equal(observed, data[4:8, 4:8]) + + +async def test_decode_chunk_missing_returns_fill_value(store: Store) -> None: + arr = zarr.create_array( + store=store, name="a", shape=(8, 8), chunks=(4, 4), dtype="uint16", fill_value=7 + ) + meta = dict(arr.metadata.to_dict()) + observed = await decode_chunk(meta, store, "a", (0, 0)) + np.testing.assert_array_equal(observed, np.full((4, 4), 7, dtype="uint16")) + + +async def test_decode_chunk_selection_not_implemented(store: Store) -> None: + _, meta = _filled(store) + with pytest.raises(NotImplementedError): + await decode_chunk(meta, store, "a", (0, 0), selection=(slice(0, 2), slice(0, 2))) + + +async def test_decode_chunk_metadata_view(store: Store) -> None: + # the read-only-view case: decode with a metadata document the store never saw + data, meta = _filled(store, dtype="uint16", compressors=None) + view = copy.deepcopy(meta) + view["data_type"] = "uint8" + view["shape"] = [8, 16] + view["chunk_grid"]["configuration"]["chunk_shape"] = [4, 8] + observed = await decode_chunk(view, store, "a", (1, 0)) + np.testing.assert_array_equal(observed, data[4:8, 0:4].view("uint8")) + + +async def test_encode_chunk_differential(store: Store) -> None: + meta = array_metadata() + await create_new_array(meta, store, "a") + value = np.arange(16, dtype="uint16").reshape(4, 4) + await encode_chunk(meta, store, "a", (0, 1), value) + arr = zarr.open_array(store=store, path="a", mode="r") + np.testing.assert_array_equal(arr[0:4, 4:8], value) + + +async def test_encode_chunk_shape_mismatch(store: Store) -> None: + meta = array_metadata() + await create_new_array(meta, store, "a") + with pytest.raises(ValueError, match="chunk shape"): + await encode_chunk(meta, store, "a", (0, 0), np.zeros((2, 2), dtype="uint16")) + + +async def test_read_encoded_chunk_matches_store(store: Store) -> None: + _, meta = _filled(store) + raw = await read_encoded_chunk(meta, store, "a", (0, 0)) + expected = await store.get("a/c/0/0", prototype=default_buffer_prototype()) + assert expected is not None + assert raw == expected.to_bytes() + + +async def test_read_encoded_chunk_missing_returns_none(store: Store) -> None: + arr = zarr.create_array(store=store, name="empty", shape=(8, 8), chunks=(4, 4), dtype="uint16") + meta = dict(arr.metadata.to_dict()) + assert await read_encoded_chunk(meta, store, "empty", (0, 0)) is None + + +async def test_erase_chunk(store: Store) -> None: + data, meta = _filled(store) + assert await store.exists("a/c/0/0") + await erase_chunk(meta, store, "a", (0, 0)) + assert not await store.exists("a/c/0/0") + arr = zarr.open_array(store=store, path="a", mode="r") + np.testing.assert_array_equal(arr[0:4, 0:4], np.zeros((4, 4), dtype="uint16")) +``` + +- [ ] **Step 2: Run to verify failure** — `uv run --group zarrs pytest tests/zarrs/test_chunk.py -v` → ImportError. + +- [ ] **Step 3: Create `zarrs-bindings/src/chunk.rs`** + +```rust +use pyo3::exceptions::PyNotImplementedError; +use pyo3::prelude::*; +use pyo3::types::PyBytes; +use zarrs::array::{Array, ArrayBytes}; +use zarrs::metadata::ArrayMetadata; +use zarrs::storage::ReadableWritableListableStorage; + +use crate::store::resolve_store; +use crate::{runtime_err, value_err}; + +type DynArray = Array; + +/// Construct an Array view from an explicit metadata document, without +/// consulting the store for metadata. +fn array_view( + storage: ReadableWritableListableStorage, + path: &str, + metadata_json: &str, +) -> PyResult { + let metadata = ArrayMetadata::try_from(metadata_json).map_err(value_err)?; + Array::new_with_metadata(storage, path, metadata).map_err(value_err) +} + +#[pyfunction] +pub(crate) fn retrieve_chunk( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + chunk_coords: Vec, +) -> PyResult> { + let storage = resolve_store(store)?; + let data = py.detach(move || -> PyResult> { + let array = array_view(storage, &path, &metadata_json)?; + let bytes: ArrayBytes<'static> = + array.retrieve_chunk(&chunk_coords).map_err(runtime_err)?; + let fixed = bytes.into_fixed().map_err(|_| { + PyNotImplementedError::new_err("variable-length data types are not supported") + })?; + Ok(fixed.into_owned()) + })?; + Ok(PyBytes::new(py, &data).unbind()) +} + +#[pyfunction] +pub(crate) fn retrieve_encoded_chunk( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + chunk_coords: Vec, +) -> PyResult>> { + let storage = resolve_store(store)?; + let data = py.detach(move || -> PyResult>> { + let array = array_view(storage, &path, &metadata_json)?; + array + .retrieve_encoded_chunk(&chunk_coords) + .map_err(runtime_err) + })?; + Ok(data.map(|d| PyBytes::new(py, &d).unbind())) +} + +#[pyfunction] +pub(crate) fn store_chunk( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + chunk_coords: Vec, + data: Vec, +) -> PyResult<()> { + let storage = resolve_store(store)?; + py.detach(move || { + let array = array_view(storage, &path, &metadata_json)?; + array + .store_chunk(&chunk_coords, ArrayBytes::new_flen(data)) + .map_err(runtime_err) + }) +} + +#[pyfunction] +pub(crate) fn erase_chunk( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + chunk_coords: Vec, +) -> PyResult<()> { + let storage = resolve_store(store)?; + py.detach(move || { + let array = array_view(storage, &path, &metadata_json)?; + array.erase_chunk(&chunk_coords).map_err(runtime_err) + }) +} +``` + +Register in `lib.rs`: add `mod chunk;` and + +```rust + m.add_function(wrap_pyfunction!(chunk::retrieve_chunk, m)?)?; + m.add_function(wrap_pyfunction!(chunk::retrieve_encoded_chunk, m)?)?; + m.add_function(wrap_pyfunction!(chunk::store_chunk, m)?)?; + m.add_function(wrap_pyfunction!(chunk::erase_chunk, m)?)?; +``` + +- [ ] **Step 4: Add Python wrappers to `_api.py`** + +Extend imports: + +```python +from typing import Any + +import numpy as np +import numpy.typing as npt +``` + +Add: + +```python +def _chunk_dtype_and_shape( + metadata: Mapping[str, JSON], +) -> tuple[np.dtype[Any], tuple[int, ...]]: + """Resolve the numpy dtype and chunk shape from a metadata document, using + zarr-python's own metadata parsing.""" + from zarr.core.metadata.v2 import ArrayV2Metadata + from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGridMetadata + + if metadata.get("zarr_format") == 3: + meta3 = ArrayV3Metadata.from_dict(dict(metadata)) + grid = meta3.chunk_grid + if not isinstance(grid, RegularChunkGridMetadata): + raise NotImplementedError("only regular chunk grids are supported") + return meta3.data_type.to_native_dtype(), grid.chunk_shape + meta2 = ArrayV2Metadata.from_dict(dict(metadata)) + return meta2.dtype.to_native_dtype(), meta2.chunks + + +async def decode_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + selection: tuple[slice | int, ...] | None = None, + options: ZarrsOptions | None = None, +) -> np.ndarray[Any, np.dtype[Any]]: + """Read and decode the chunk at `chunk_coords` of the array described by + `metadata`, located at `path` in `store`. + + The metadata document is authoritative: it is not read from the store. + Missing chunks decode to the fill value. `selection` (a chunk-relative + subset) is not implemented yet. + """ + if selection is not None: + raise NotImplementedError("chunk subset selection is not implemented yet") + raw = await asyncio.to_thread( + _zb.retrieve_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(chunk_coords), + ) + dtype, chunk_shape = _chunk_dtype_and_shape(metadata) + return np.frombuffer(raw, dtype=dtype).reshape(chunk_shape) + + +async def read_encoded_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: ZarrsOptions | None = None, +) -> bytes | None: + """Read the raw, still-encoded bytes of the chunk at `chunk_coords`, or + `None` if the chunk does not exist. No codecs are applied.""" + result: bytes | None = await asyncio.to_thread( + _zb.retrieve_encoded_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(chunk_coords), + ) + return result + + +async def encode_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + value: npt.ArrayLike, + *, + options: ZarrsOptions | None = None, +) -> None: + """Encode `value` with the codecs in `metadata` and store it as the chunk + at `chunk_coords`. `value` must match the chunk shape exactly.""" + dtype, chunk_shape = _chunk_dtype_and_shape(metadata) + arr = np.ascontiguousarray(np.asarray(value, dtype=dtype)) + if arr.shape != chunk_shape: + raise ValueError(f"value shape {arr.shape} does not match chunk shape {chunk_shape}") + await asyncio.to_thread( + _zb.store_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(chunk_coords), + arr.tobytes(), + ) + + +async def erase_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: ZarrsOptions | None = None, +) -> None: + """Delete the chunk at `chunk_coords`. Deleting a missing chunk is a no-op.""" + await asyncio.to_thread( + _zb.erase_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(chunk_coords), + ) +``` + +Export `decode_chunk`, `read_encoded_chunk`, `encode_chunk`, `erase_chunk` from `__init__.py`. + +- [ ] **Step 5: Rebuild and test** + +Run: `cargo check --manifest-path zarrs-bindings/Cargo.toml` → success +Run: `uv sync --group zarrs --reinstall-package zarrs-bindings` +Run: `uv run --group zarrs pytest tests/zarrs/test_chunk.py -v` +Expected: all pass. Likely first-run issues and their fixes: + - v2 differential test fails on dtype byte order → constrain the v2 test to `dtype=" None` annotations, which the code above has). + +- [ ] **Step 3: Re-run the full zarrs suite** — `uv run --group zarrs pytest tests/zarrs -v` → all pass. + +- [ ] **Step 4: Verify the rest of the test suite is unaffected** + +Run: `uv run pytest tests/test_array.py tests/test_group.py -x -q` +Expected: pass (no production code outside `src/zarr/zarrs/` changed). + +- [ ] **Step 5: Commit** + +```bash +git add -A +git commit -m "chore: lint fixes and changelog for zarr.zarrs" +``` + +--- + +### Task 9: CI workflow + +**Files:** +- Create: `.github/workflows/zarrs.yml` + +- [ ] **Step 1: Create `.github/workflows/zarrs.yml`** (action SHAs copied from `.github/workflows/test.yml` — keep them identical so dependabot groups them) + +```yaml +name: Zarrs bindings + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + fetch-depth: 0 # hatch-vcs needs tags to compute zarr's version + persist-credentials: false + - name: Install uv + uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0 + with: + python-version: '3.12' + - name: Run zarrs bindings tests + # the ubuntu runner image ships a Rust toolchain; the maturin build + # backend is fetched by uv on demand + run: uv run --group zarrs pytest tests/zarrs -v +``` + +- [ ] **Step 2: Validate the workflow** + +Run: `uvx zizmor .github/workflows/zarrs.yml` +Expected: no findings (matches the repo's zizmor policy). + +- [ ] **Step 3: Commit** + +```bash +git add .github/workflows/zarrs.yml +git commit -m "ci: test job for zarrs bindings" +``` + +--- + +## Out of scope for this plan (later phases, per spec) + +- `decode_region` / `encode_region` and chunk-subset `selection` (Phase 2: zarrs `retrieve_array_subset` / `partial_decoder`). +- `ZarrsOptions` fields (concurrency, checksum validation, direct IO), obstore native path, benchmarks (Phase 3). +- Variable-length data types, non-regular chunk grids, fancy indexing. +- Publishing the `zarrs-bindings` wheel / a `zarr[zarrs]` extra on PyPI. From 2eb3b6fff2bb61ecca9885bc19b22d36b3e54136 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 08:26:10 +0200 Subject: [PATCH 03/41] feat: scaffold zarrs-bindings PyO3 crate Create zarrs-bindings/ with Cargo.toml, pyproject.toml (maturin build backend), and src/lib.rs (exceptions + version function). Wire into the root pyproject.toml via a new `zarrs` dependency group and [tool.uv.sources]. Add zarrs-bindings/target/ to .gitignore. Co-Authored-By: Claude Fable 5 --- .gitignore | 3 + pyproject.toml | 8 + uv.lock | 35 + zarrs-bindings/Cargo.lock | 1819 +++++++++++++++++++++++++++++++++ zarrs-bindings/Cargo.toml | 18 + zarrs-bindings/pyproject.toml | 14 + zarrs-bindings/src/lib.rs | 38 + 7 files changed, 1935 insertions(+) create mode 100644 zarrs-bindings/Cargo.lock create mode 100644 zarrs-bindings/Cargo.toml create mode 100644 zarrs-bindings/pyproject.toml create mode 100644 zarrs-bindings/src/lib.rs diff --git a/.gitignore b/.gitignore index 3284865d6c..e5474b7c1c 100644 --- a/.gitignore +++ b/.gitignore @@ -94,3 +94,6 @@ zarr.egg-info/ # zarr-metadata package lockfile (a library, not an app) packages/zarr-metadata/uv.lock + +# zarrs-bindings Rust build artifacts +zarrs-bindings/target/ diff --git a/pyproject.toml b/pyproject.toml index 9f6005f981..90368ed37a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,7 @@ exclude = [ "/.github", "/bench", "/docs", + "/zarrs-bindings", ] [project] @@ -134,6 +135,10 @@ dev = [ "universal-pathlib", "mypy", ] +zarrs = [ + {include-group = "test"}, + "zarrs-bindings", +] [tool.coverage.report] exclude_also = [ @@ -491,3 +496,6 @@ ignore-words-list = "astroid" [project.entry-points.pytest11] zarr = "zarr.testing" + +[tool.uv.sources] +zarrs-bindings = { path = "zarrs-bindings" } diff --git a/uv.lock b/uv.lock index ab2ff8b1e7..2b188848a9 100644 --- a/uv.lock +++ b/uv.lock @@ -4057,6 +4057,21 @@ test = [ { name = "tomlkit" }, { name = "uv" }, ] +zarrs = [ + { name = "coverage" }, + { name = "hypothesis" }, + { name = "numpydoc" }, + { name = "pytest" }, + { name = "pytest-accept" }, + { name = "pytest-asyncio" }, + { name = "pytest-benchmark" }, + { name = "pytest-codspeed" }, + { name = "pytest-cov" }, + { name = "pytest-xdist" }, + { name = "tomlkit" }, + { name = "uv" }, + { name = "zarrs-bindings" }, +] [package.metadata] requires-dist = [ @@ -4162,3 +4177,23 @@ test = [ { name = "tomlkit" }, { name = "uv" }, ] +zarrs = [ + { name = "coverage", specifier = ">=7.10" }, + { name = "hypothesis" }, + { name = "numpydoc" }, + { name = "pytest" }, + { name = "pytest-accept" }, + { name = "pytest-asyncio" }, + { name = "pytest-benchmark" }, + { name = "pytest-codspeed" }, + { name = "pytest-cov" }, + { name = "pytest-xdist" }, + { name = "tomlkit" }, + { name = "uv" }, + { name = "zarrs-bindings", directory = "zarrs-bindings" }, +] + +[[package]] +name = "zarrs-bindings" +version = "0.1.0" +source = { directory = "zarrs-bindings" } diff --git a/zarrs-bindings/Cargo.lock b/zarrs-bindings/Cargo.lock new file mode 100644 index 0000000000..a86d4e26e2 --- /dev/null +++ b/zarrs-bindings/Cargo.lock @@ -0,0 +1,1819 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "async-lock" +version = "3.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" +dependencies = [ + "event-listener", + "event-listener-strategy", + "pin-project-lite", +] + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "auto_impl" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffdcb70bdbc4d478427380519163274ac86e52916e10f0a8889adf0f96d3fee7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "autocfg" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bitflags" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8" + +[[package]] +name = "blosc-src" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9046dd58971db0226346fde214143d16a6eb12f535b5320d0ea94fcea420631" +dependencies = [ + "cc", + "libz-sys", + "lz4-sys", + "snappy_src", + "zstd-sys", +] + +[[package]] +name = "blusc" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4e0c17eaa785d2673fe58c22fc817946c2330ed47f3d9f79835d65950d32a45" +dependencies = [ + "flate2", + "lz4_flex", + "pkg-config", + "snap", + "zstd", +] + +[[package]] +name = "bumpalo" +version = "3.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" + +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + +[[package]] +name = "cc" +version = "1.2.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "convert_case" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "crc32c" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" +dependencies = [ + "rustc_version", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "derive_more" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn", + "unicode-xid", +] + +[[package]] +name = "either" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "event-listener" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener-strategy" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" +dependencies = [ + "event-listener", + "pin-project-lite", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "slab", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi 5.3.0", + "wasip2", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "bytemuck", + "cfg-if", + "crunchy", + "zerocopy", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash 0.1.5", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] + +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.1", + "serde", + "serde_core", +] + +[[package]] +name = "inventory" +version = "0.3.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4f0c30c76f2f4ccee3fe55a2435f691ca00c0e4bd87abe4f4a851b1d4dac39b" +dependencies = [ + "rustversion", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2025f20d7a4fa7785846e7b63d10a76d3f1cee98ee5cb79ea59703f95e42162" +dependencies = [ + "cfg-if", + "futures-util", + "wasm-bindgen", +] + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "libz-sys" +version = "1.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85bc9657773828b90eeb625adff10eeac83cc21bbfd8e23a03eaa8a33c9e28d9" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "link-cplusplus" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f78c730aaa7d0b9336a299029ea49f9ee53b0ed06e9202e8cb7db9bae7b8c82" +dependencies = [ + "cc", +] + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" + +[[package]] +name = "lru" +version = "0.16.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f66e8d5d03f609abc3a39e6f08e4164ebf1447a732906d39eb9b99b7919ef39" +dependencies = [ + "hashbrown 0.16.1", +] + +[[package]] +name = "lz4-sys" +version = "1.11.1+lz4-1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "lz4_flex" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90071f8077f8e40adfc4b7fe9cd495ce316263f19e75c2211eeff3fdf475a3d9" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "matrixmultiply" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" +dependencies = [ + "autocfg", + "rawpointer", +] + +[[package]] +name = "memchr" +version = "2.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "moka" +version = "0.12.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046" +dependencies = [ + "crossbeam-channel", + "crossbeam-epoch", + "crossbeam-utils", + "equivalent", + "parking_lot", + "portable-atomic", + "smallvec", + "tagptr", + "uuid", +] + +[[package]] +name = "monostate" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb4cc965c89dd0615a9e822ff8002f7633d2466143d51bd58693e4b2c75aabad" +dependencies = [ + "monostate-impl", + "serde", + "serde_core", +] + +[[package]] +name = "monostate-impl" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23f5b99488110875b5904839d396c2cdfaf241ff6622638acb879cc7effad5de" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "ndarray" +version = "0.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "portable-atomic", + "portable-atomic-util", + "rawpointer", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "bytemuck", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "page_size" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pathdiff" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pkg-config" +version = "0.3.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "portable-atomic-util" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a106d1259c23fac8e543272398ae0e3c0b8d33c88ed73d0cc71b0f1d902618" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "positioned-io" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4ec4b80060f033312b99b6874025d9503d2af87aef2dd4c516e253fbfcdada7" +dependencies = [ + "byteorder", + "libc", + "winapi", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.28.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12" +dependencies = [ + "libc", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", +] + +[[package]] +name = "pyo3-build-config" +version = "0.28.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e368e7ddfdeb98c9bca7f8383be1648fd84ab466bf2bc015e94008db6d35611e" +dependencies = [ + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.28.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f29e10af80b1f7ccaf7f69eace800a03ecd13e883acfacc1e5d0988605f651e" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.28.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df6e520eff47c45997d2fc7dd8214b25dd1310918bbb2642156ef66a67f29813" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.28.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4cdc218d835738f81c2338f822078af45b4afdf8b2e33cbb5916f108b813acb" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "quick_cache" +version = "0.6.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a3db184a8b66cfe87f0263a1de147a6b554c864d1767c6f7fa4eb0e5497b565" +dependencies = [ + "ahash", + "equivalent", + "hashbrown 0.16.1", + "parking_lot", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "rayon" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "rayon_iter_concurrent_limit" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d09ee01023de07fa073ce14c37cbe0a9e099c6b0b60a29cf4af6d04d9553fed7" +dependencies = [ + "rayon", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.150" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" +dependencies = [ + "indexmap", + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "shlex" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" + +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90" + +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + +[[package]] +name = "snappy_src" +version = "0.2.5+snappy.1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e1432067a55bcfb1fd522d2aca6537a4fcea32bba87ea86921226d14f9bad53" +dependencies = [ + "cc", + "link-cplusplus", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tagptr" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" + +[[package]] +name = "target-lexicon" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "tinyvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "twox-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-segmentation" +version = "1.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6f5d3c3b1bf09027a88a6bc961fc00497d651009560b5463668dc81b0fa87a8" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "unsafe_cell_slice" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6659959f702dcdaad77bd6e42a9409a32ceccc06943ec93c8a4306be00eb6cf1" + +[[package]] +name = "uuid" +version = "1.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "144d6b123cef80b301b8f72a9e2ca4370ddec21950d0a103dd22c437006d2db7" +dependencies = [ + "getrandom 0.4.2", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasip2" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen 0.57.1", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.123" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a254a4b10c19a76f09a27640e7ffbf9bc30bf67e16a3bf28aaefa4920fe81563" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.123" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24a40fc75b0ec6f3746ceb10d36f53a93dcd68a93b11b6445983945d79eba0dc" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.123" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "908f34bd9b9ce3d4caf07b72dfab63d61504d156856c6bd3cd87fa350cf3985b" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.123" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7acbf7616c27b194bbb550bf77ed0c2c3e5b7fd1260a93082b95fb7f47959b92" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "zarrs" +version = "0.23.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8132307b8fc041fd21f68c7987103fb6e038b11f9838c16ec43b798f5480ccf5" +dependencies = [ + "async-lock", + "base64", + "blosc-src", + "blusc", + "bytemuck", + "bytes", + "crc32c", + "derive_more", + "flate2", + "getrandom 0.3.4", + "half", + "inventory", + "itertools", + "itoa", + "libz-sys", + "log", + "lru", + "moka", + "ndarray", + "num", + "num-complex", + "paste", + "quick_cache", + "rayon", + "rayon_iter_concurrent_limit", + "serde", + "serde_json", + "thiserror", + "thread_local", + "unsafe_cell_slice", + "uuid", + "zarrs_chunk_grid", + "zarrs_chunk_key_encoding", + "zarrs_codec", + "zarrs_data_type", + "zarrs_filesystem", + "zarrs_metadata", + "zarrs_metadata_ext", + "zarrs_plugin", + "zarrs_storage", + "zstd", +] + +[[package]] +name = "zarrs-bindings" +version = "0.1.0" +dependencies = [ + "pyo3", + "serde_json", + "zarrs", +] + +[[package]] +name = "zarrs_chunk_grid" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cf67386fd96a0336cd3e5ab5ca6cb14e0e05aee80f1acae8c4d3cf562a8bb65" +dependencies = [ + "derive_more", + "inventory", + "itertools", + "rayon", + "thiserror", + "tinyvec", + "zarrs_metadata", + "zarrs_plugin", +] + +[[package]] +name = "zarrs_chunk_key_encoding" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9040e7feaa92d1904d492acd0cd91b97214f1791c5b5738e6c05b2ca4145a382" +dependencies = [ + "derive_more", + "inventory", + "zarrs_metadata", + "zarrs_plugin", + "zarrs_storage", +] + +[[package]] +name = "zarrs_codec" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "383a129a6a0cbb2c80cdba23809e5cab85159756464b7d0f112468a495c128da" +dependencies = [ + "async-trait", + "bytemuck", + "derive_more", + "futures", + "inventory", + "itertools", + "rayon", + "thiserror", + "unsafe_cell_slice", + "zarrs_chunk_grid", + "zarrs_data_type", + "zarrs_metadata", + "zarrs_plugin", + "zarrs_storage", +] + +[[package]] +name = "zarrs_data_type" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc7c594c9363278fcd9db4c205514f009944206eb093ea7ad40b85f50009f31" +dependencies = [ + "derive_more", + "half", + "inventory", + "num", + "paste", + "serde", + "serde_json", + "thiserror", + "zarrs_metadata", + "zarrs_plugin", +] + +[[package]] +name = "zarrs_filesystem" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "270efeb0181651aee5460b3232f2fc83e91bd646cefe75001d1c8f9a4f3abf81" +dependencies = [ + "bytes", + "derive_more", + "itertools", + "libc", + "page_size", + "pathdiff", + "positioned-io", + "thiserror", + "walkdir", + "zarrs_storage", +] + +[[package]] +name = "zarrs_metadata" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d60c4c363a8a302d7babb3c29017850a7b4e0af6ca5f9ba2946263a185b62fea" +dependencies = [ + "derive_more", + "half", + "monostate", + "serde", + "serde_json", + "thiserror", +] + +[[package]] +name = "zarrs_metadata_ext" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2048e07848ca99c7450518e0584929300b1b6a3cf442f18b26ffd3520814bd5b" +dependencies = [ + "derive_more", + "monostate", + "num", + "serde", + "serde_json", + "serde_repr", + "thiserror", + "zarrs_metadata", +] + +[[package]] +name = "zarrs_plugin" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cbe0ed432aee86856f70ca33be36eaf4a0dae21ab730750d9280a7ca1e95046" +dependencies = [ + "paste", + "regex", + "serde_json", + "thiserror", +] + +[[package]] +name = "zarrs_storage" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d098796d2ed4cf94896569615101e0432e870a7665396da5cc32300fb68f7c1" +dependencies = [ + "auto_impl", + "bytes", + "derive_more", + "itertools", + "thiserror", + "unsafe_cell_slice", +] + +[[package]] +name = "zerocopy" +version = "0.8.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce1022995ff5ff5d841ad7d994facc23098cd40152f2c1d11cd607c6f530653f" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ae7f38b72ec2a254e2b87ef277cf2cd4fb97cbebf944faa6f33354da0867930" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/zarrs-bindings/Cargo.toml b/zarrs-bindings/Cargo.toml new file mode 100644 index 0000000000..be97fa4b5a --- /dev/null +++ b/zarrs-bindings/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "zarrs-bindings" +version = "0.1.0" +edition = "2024" +rust-version = "1.91" +publish = false + +[lib] +name = "_zarrs_bindings" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = { version = "0.28", features = ["abi3-py312"] } +serde_json = "1" +zarrs = "0.23" + +[profile.release] +lto = "thin" diff --git a/zarrs-bindings/pyproject.toml b/zarrs-bindings/pyproject.toml new file mode 100644 index 0000000000..66eff31f36 --- /dev/null +++ b/zarrs-bindings/pyproject.toml @@ -0,0 +1,14 @@ +[build-system] +requires = ["maturin>=1.7,<2"] +build-backend = "maturin" + +[project] +name = "zarrs-bindings" +version = "0.1.0" +description = "PyO3 bindings to the zarrs Rust crate, consumed by zarr.zarrs" +requires-python = ">=3.12" +license = "MIT" + +[tool.maturin] +module-name = "_zarrs_bindings" +strip = true diff --git a/zarrs-bindings/src/lib.rs b/zarrs-bindings/src/lib.rs new file mode 100644 index 0000000000..803104a63c --- /dev/null +++ b/zarrs-bindings/src/lib.rs @@ -0,0 +1,38 @@ +use pyo3::exceptions::{PyRuntimeError, PyValueError}; +use pyo3::prelude::*; + +pyo3::create_exception!( + _zarrs_bindings, + NodeExistsError, + PyValueError, + "A node already exists at the given path." +); +pyo3::create_exception!( + _zarrs_bindings, + NodeNotFoundError, + PyValueError, + "No node was found at the given path." +); + +#[allow(dead_code)] +pub(crate) fn runtime_err(err: impl std::fmt::Display) -> PyErr { + PyRuntimeError::new_err(err.to_string()) +} + +#[allow(dead_code)] +pub(crate) fn value_err(err: impl std::fmt::Display) -> PyErr { + PyValueError::new_err(err.to_string()) +} + +#[pyfunction] +fn version() -> &'static str { + env!("CARGO_PKG_VERSION") +} + +#[pymodule] +fn _zarrs_bindings(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add("NodeExistsError", m.py().get_type::())?; + m.add("NodeNotFoundError", m.py().get_type::())?; + m.add_function(wrap_pyfunction!(version, m)?)?; + Ok(()) +} From bfc0b645ad7645fb02616fc9e8414475c62339da Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 09:07:26 +0200 Subject: [PATCH 04/41] fix: single-source zarrs-bindings version from Cargo.toml Co-Authored-By: Claude Fable 5 --- uv.lock | 1 - zarrs-bindings/Cargo.toml | 2 ++ zarrs-bindings/pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/uv.lock b/uv.lock index 2b188848a9..a1c94a2d4b 100644 --- a/uv.lock +++ b/uv.lock @@ -4195,5 +4195,4 @@ zarrs = [ [[package]] name = "zarrs-bindings" -version = "0.1.0" source = { directory = "zarrs-bindings" } diff --git a/zarrs-bindings/Cargo.toml b/zarrs-bindings/Cargo.toml index be97fa4b5a..e0f381f416 100644 --- a/zarrs-bindings/Cargo.toml +++ b/zarrs-bindings/Cargo.toml @@ -4,6 +4,8 @@ version = "0.1.0" edition = "2024" rust-version = "1.91" publish = false +license = "MIT" +description = "PyO3 bindings to the zarrs Rust crate, consumed by zarr.zarrs" [lib] name = "_zarrs_bindings" diff --git a/zarrs-bindings/pyproject.toml b/zarrs-bindings/pyproject.toml index 66eff31f36..4212a64b56 100644 --- a/zarrs-bindings/pyproject.toml +++ b/zarrs-bindings/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "zarrs-bindings" -version = "0.1.0" +dynamic = ["version"] description = "PyO3 bindings to the zarrs Rust crate, consumed by zarr.zarrs" requires-python = ">=3.12" license = "MIT" From 5982c04d7658e194e46cfa204c8f0b9e07154c32 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 09:11:23 +0200 Subject: [PATCH 05/41] feat: add zarr.zarrs package skeleton and test scaffolding Co-Authored-By: Claude Fable 5 --- src/zarr/zarrs/__init__.py | 23 +++++++++++++++++++++ tests/zarrs/__init__.py | 0 tests/zarrs/conftest.py | 41 ++++++++++++++++++++++++++++++++++++++ tests/zarrs/test_api.py | 7 +++++++ 4 files changed, 71 insertions(+) create mode 100644 src/zarr/zarrs/__init__.py create mode 100644 tests/zarrs/__init__.py create mode 100644 tests/zarrs/conftest.py create mode 100644 tests/zarrs/test_api.py diff --git a/src/zarr/zarrs/__init__.py b/src/zarr/zarrs/__init__.py new file mode 100644 index 0000000000..3aa871bf32 --- /dev/null +++ b/src/zarr/zarrs/__init__.py @@ -0,0 +1,23 @@ +""" +Low-level functional API for zarr hierarchies, backed by the Rust +[`zarrs`](https://zarrs.dev) crate. + +This subpackage is experimental. It requires the `zarrs-bindings` package +(in-repo Rust crate; install for development with `uv sync --group zarrs`). + +All array routines take an explicit metadata document (a `dict` matching the +`zarr.json` / `.zarray` document) rather than reading metadata from the store, +which makes read-only and virtual views possible. +""" + +try: + import _zarrs_bindings +except ImportError as e: + raise ImportError( + "zarr.zarrs requires the `zarrs-bindings` package, which is not installed. " + "It is built from the zarr-python repository: run `uv sync --group zarrs`." + ) from e + +__version__: str = _zarrs_bindings.version() + +__all__ = ["__version__"] diff --git a/tests/zarrs/__init__.py b/tests/zarrs/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/zarrs/conftest.py b/tests/zarrs/conftest.py new file mode 100644 index 0000000000..f54758d59f --- /dev/null +++ b/tests/zarrs/conftest.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import pytest + +pytest.importorskip("_zarrs_bindings", reason="zarrs-bindings is not installed") + +import zarr +from zarr.storage import LocalStore, MemoryStore + +if TYPE_CHECKING: + from pathlib import Path + + from zarr.abc.store import Store + + +@pytest.fixture(params=["memory", "local"]) +async def store(request: pytest.FixtureRequest, tmp_path: Path) -> Store: + """A writable store: MemoryStore exercises the generic Python-callback bridge, + LocalStore exercises the native zarrs filesystem store.""" + if request.param == "memory": + return await MemoryStore.open() + return await LocalStore.open(root=tmp_path / "store") + + +def array_metadata(**kwargs: Any) -> dict[str, Any]: + """Build an array metadata document using zarr-python itself, so the + documents fed to zarrs always match what zarr-python would write.""" + params: dict[str, Any] = { + "shape": (8, 8), + "chunks": (4, 4), + "dtype": "uint16", + "zarr_format": 3, + } | kwargs + arr = zarr.create_array(store=MemoryStore(), **params) + doc = dict(arr.metadata.to_dict()) + if params["zarr_format"] == 2: + # v2 attributes live in .zattrs, not in the .zarray document + doc.pop("attributes", None) + return doc diff --git a/tests/zarrs/test_api.py b/tests/zarrs/test_api.py new file mode 100644 index 0000000000..da1a9ecda8 --- /dev/null +++ b/tests/zarrs/test_api.py @@ -0,0 +1,7 @@ +from __future__ import annotations + + +def test_import() -> None: + import zarr.zarrs + + assert isinstance(zarr.zarrs.__version__, str) From e821d70915e2116fd54166770eaab18670bb8a5e Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 09:19:00 +0200 Subject: [PATCH 06/41] fix: pass exc_type to importorskip in zarrs conftest Co-Authored-By: Claude Fable 5 --- tests/zarrs/conftest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/zarrs/conftest.py b/tests/zarrs/conftest.py index f54758d59f..b9b3bcbab1 100644 --- a/tests/zarrs/conftest.py +++ b/tests/zarrs/conftest.py @@ -4,7 +4,9 @@ import pytest -pytest.importorskip("_zarrs_bindings", reason="zarrs-bindings is not installed") +pytest.importorskip( + "_zarrs_bindings", reason="zarrs-bindings is not installed", exc_type=ImportError +) import zarr from zarr.storage import LocalStore, MemoryStore From 88c0c4d1424d6b73396553d678bc7d3a087e9ae8 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 09:47:54 +0200 Subject: [PATCH 07/41] feat: sync store bridge for zarrs bindings Add StoreShim, a synchronous adapter over zarr's async Store ABC, and resolve_store, which maps a Store to either a native config dict (for LocalStore) or a StoreShim for Rust to call back into. Also convert the store fixture in tests/zarrs/conftest.py to an async generator with teardown so stores are properly closed after each test. Co-Authored-By: Claude Fable 5 --- src/zarr/zarrs/_bridge.py | 100 +++++++++++++++++++++++++++++++++++++ tests/zarrs/conftest.py | 13 +++-- tests/zarrs/test_bridge.py | 45 +++++++++++++++++ 3 files changed, 155 insertions(+), 3 deletions(-) create mode 100644 src/zarr/zarrs/_bridge.py create mode 100644 tests/zarrs/test_bridge.py diff --git a/src/zarr/zarrs/_bridge.py b/src/zarr/zarrs/_bridge.py new file mode 100644 index 0000000000..d68c5df76f --- /dev/null +++ b/src/zarr/zarrs/_bridge.py @@ -0,0 +1,100 @@ +from __future__ import annotations + +import builtins +from typing import TYPE_CHECKING + +from zarr.abc.store import OffsetByteRequest, RangeByteRequest, SuffixByteRequest +from zarr.core.buffer.core import default_buffer_prototype +from zarr.core.sync import _collect_aiterator, sync +from zarr.storage import LocalStore + +if TYPE_CHECKING: + from zarr.abc.store import Store + +# Alias to avoid shadowing the `list` builtin with the `StoreShim.list` method +# in mypy's class-scope name resolution. +_list = builtins.list + + +class StoreShim: + """ + Synchronous adapter over an async `Store`, called from Rust worker threads. + + Each method blocks the calling thread by submitting a coroutine to the zarr + event-loop thread (`zarr.core.sync`). Methods must never be called from the + zarr event-loop thread itself; the Rust bindings only call them from + `asyncio.to_thread` worker threads. + """ + + def __init__(self, store: Store) -> None: + self._store = store + self._prototype = default_buffer_prototype() + + def get(self, key: str) -> bytes | None: + buf = sync(self._store.get(key, prototype=self._prototype)) + return None if buf is None else buf.to_bytes() + + def get_range(self, key: str, offset: int, length: int | None) -> bytes | None: + byte_range = ( + RangeByteRequest(offset, offset + length) + if length is not None + else OffsetByteRequest(offset) + ) + buf = sync(self._store.get(key, prototype=self._prototype, byte_range=byte_range)) + return None if buf is None else buf.to_bytes() + + def get_suffix(self, key: str, suffix: int) -> bytes | None: + buf = sync( + self._store.get(key, prototype=self._prototype, byte_range=SuffixByteRequest(suffix)) + ) + return None if buf is None else buf.to_bytes() + + def set(self, key: str, value: bytes) -> None: + sync(self._store.set(key, self._prototype.buffer.from_bytes(value))) + + def delete(self, key: str) -> None: + sync(self._store.delete(key)) + + def delete_prefix(self, prefix: str) -> None: + sync(self._store.delete_dir(prefix.rstrip("/"))) + + def getsize(self, key: str) -> int | None: + try: + return sync(self._store.getsize(key)) + except FileNotFoundError: + return None + + def getsize_prefix(self, prefix: str) -> int: + return sync(self._store.getsize_prefix(prefix.rstrip("/"))) + + def list(self) -> _list[str]: + return sorted(sync(_collect_aiterator(self._store.list()))) + + def list_prefix(self, prefix: str) -> _list[str]: + return sorted(sync(_collect_aiterator(self._store.list_prefix(prefix)))) + + def list_dir(self, prefix: str) -> tuple[_list[str], _list[str]]: + """Return `(keys, prefixes)` directly under `prefix`, as zarrs expects: + full keys, and child prefixes ending in `/`.""" + stripped = prefix.rstrip("/") + children = sorted(sync(_collect_aiterator(self._store.list_dir(stripped)))) + keys: _list[str] = [] + prefixes: _list[str] = [] + for child in children: + full = f"{stripped}/{child}" if stripped else child + if sync(self._store.exists(full)): + keys.append(full) + else: + prefixes.append(full + "/") + return keys, prefixes + + +def resolve_store(store: Store) -> StoreShim | dict[str, str]: + """ + Convert a zarr `Store` into the representation `_zarrs_bindings` expects: + a config dict for stores with a native Rust implementation, otherwise a + `StoreShim` that Rust calls back into. + """ + if isinstance(store, LocalStore) and not store.read_only: + return {"filesystem": str(store.root)} + return StoreShim(store) diff --git a/tests/zarrs/conftest.py b/tests/zarrs/conftest.py index b9b3bcbab1..b65cca447b 100644 --- a/tests/zarrs/conftest.py +++ b/tests/zarrs/conftest.py @@ -12,18 +12,25 @@ from zarr.storage import LocalStore, MemoryStore if TYPE_CHECKING: + from collections.abc import AsyncGenerator from pathlib import Path from zarr.abc.store import Store @pytest.fixture(params=["memory", "local"]) -async def store(request: pytest.FixtureRequest, tmp_path: Path) -> Store: +async def store(request: pytest.FixtureRequest, tmp_path: Path) -> AsyncGenerator[Store, None]: """A writable store: MemoryStore exercises the generic Python-callback bridge, LocalStore exercises the native zarrs filesystem store.""" + s: Store if request.param == "memory": - return await MemoryStore.open() - return await LocalStore.open(root=tmp_path / "store") + s = await MemoryStore.open() + else: + s = await LocalStore.open(root=tmp_path / "store") + try: + yield s + finally: + s.close() def array_metadata(**kwargs: Any) -> dict[str, Any]: diff --git a/tests/zarrs/test_bridge.py b/tests/zarrs/test_bridge.py new file mode 100644 index 0000000000..12fe01c7df --- /dev/null +++ b/tests/zarrs/test_bridge.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from zarr.storage import LocalStore, MemoryStore +from zarr.zarrs._bridge import StoreShim, resolve_store + +if TYPE_CHECKING: + from pathlib import Path + + +def test_shim_get_set_delete() -> None: + shim = StoreShim(MemoryStore()) + assert shim.get("a/b") is None + shim.set("a/b", b"xyz") + assert shim.get("a/b") == b"xyz" + assert shim.get_range("a/b", 1, 1) == b"y" + assert shim.get_range("a/b", 1, None) == b"yz" + assert shim.get_suffix("a/b", 2) == b"yz" + assert shim.getsize("a/b") == 3 + assert shim.getsize("missing") is None + shim.delete("a/b") + assert shim.get("a/b") is None + + +def test_shim_listing() -> None: + shim = StoreShim(MemoryStore()) + shim.set("zarr.json", b"{}") + shim.set("a/zarr.json", b"{}") + shim.set("a/c/0/0", b"\x00") + assert shim.list() == ["a/c/0/0", "a/zarr.json", "zarr.json"] + assert shim.list_prefix("a/") == ["a/c/0/0", "a/zarr.json"] + assert shim.list_dir("a/") == (["a/zarr.json"], ["a/c/"]) + assert shim.list_dir("") == (["zarr.json"], ["a/"]) + assert shim.getsize_prefix("a/") == 3 + shim.delete_prefix("a/") + assert shim.list() == ["zarr.json"] + + +def test_resolve_store(tmp_path: Path) -> None: + local = LocalStore(tmp_path) + assert resolve_store(local) == {"filesystem": str(tmp_path)} + # read-only LocalStore must go through the shim so writes are rejected in Python + assert isinstance(resolve_store(LocalStore(tmp_path, read_only=True)), StoreShim) + assert isinstance(resolve_store(MemoryStore()), StoreShim) From 737e82e39bc593332d87bbe0e557f895624a045c Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 10:03:30 +0200 Subject: [PATCH 08/41] refactor: review polish for zarrs store bridge Co-Authored-By: Claude Fable 5 --- src/zarr/zarrs/_bridge.py | 5 +++++ tests/zarrs/conftest.py | 4 ++-- tests/zarrs/test_bridge.py | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/zarr/zarrs/_bridge.py b/src/zarr/zarrs/_bridge.py index d68c5df76f..e7632647ad 100644 --- a/src/zarr/zarrs/_bridge.py +++ b/src/zarr/zarrs/_bridge.py @@ -80,6 +80,11 @@ def list_dir(self, prefix: str) -> tuple[_list[str], _list[str]]: children = sorted(sync(_collect_aiterator(self._store.list_dir(stripped)))) keys: _list[str] = [] prefixes: _list[str] = [] + # A child is classified as a key iff it exists as one. Zarr hierarchies + # never store a bare key alongside same-named subkeys (e.g. "a" and + # "a/b"), so a name is never both a key and a prefix. + # TODO: replace the per-child exists() round-trip with a single listing + # pass when this becomes a bottleneck (remote stores). for child in children: full = f"{stripped}/{child}" if stripped else child if sync(self._store.exists(full)): diff --git a/tests/zarrs/conftest.py b/tests/zarrs/conftest.py index b65cca447b..678065e5e1 100644 --- a/tests/zarrs/conftest.py +++ b/tests/zarrs/conftest.py @@ -12,14 +12,14 @@ from zarr.storage import LocalStore, MemoryStore if TYPE_CHECKING: - from collections.abc import AsyncGenerator + from collections.abc import AsyncIterator from pathlib import Path from zarr.abc.store import Store @pytest.fixture(params=["memory", "local"]) -async def store(request: pytest.FixtureRequest, tmp_path: Path) -> AsyncGenerator[Store, None]: +async def store(request: pytest.FixtureRequest, tmp_path: Path) -> AsyncIterator[Store]: """A writable store: MemoryStore exercises the generic Python-callback bridge, LocalStore exercises the native zarrs filesystem store.""" s: Store diff --git a/tests/zarrs/test_bridge.py b/tests/zarrs/test_bridge.py index 12fe01c7df..2b88c047dd 100644 --- a/tests/zarrs/test_bridge.py +++ b/tests/zarrs/test_bridge.py @@ -19,6 +19,8 @@ def test_shim_get_set_delete() -> None: assert shim.get_suffix("a/b", 2) == b"yz" assert shim.getsize("a/b") == 3 assert shim.getsize("missing") is None + assert shim.get_range("missing", 0, 1) is None + assert shim.get_suffix("missing", 1) is None shim.delete("a/b") assert shim.get("a/b") is None From 6b8f60388fd4ea5ae1f372b09eec57cd1633d5aa Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 10:22:46 +0200 Subject: [PATCH 09/41] feat: zarrs store bridge and group creation Co-Authored-By: Claude Fable 5 --- src/zarr/zarrs/__init__.py | 15 ++- src/zarr/zarrs/_api.py | 75 +++++++++++++ tests/zarrs/test_node.py | 48 ++++++++ zarrs-bindings/src/lib.rs | 6 +- zarrs-bindings/src/node.rs | 50 +++++++++ zarrs-bindings/src/store.rs | 216 ++++++++++++++++++++++++++++++++++++ 6 files changed, 407 insertions(+), 3 deletions(-) create mode 100644 src/zarr/zarrs/_api.py create mode 100644 tests/zarrs/test_node.py create mode 100644 zarrs-bindings/src/node.rs create mode 100644 zarrs-bindings/src/store.rs diff --git a/src/zarr/zarrs/__init__.py b/src/zarr/zarrs/__init__.py index 3aa871bf32..7e7af7644e 100644 --- a/src/zarr/zarrs/__init__.py +++ b/src/zarr/zarrs/__init__.py @@ -20,4 +20,17 @@ __version__: str = _zarrs_bindings.version() -__all__ = ["__version__"] +from zarr.zarrs._api import ( + NodeExistsError, + ZarrsOptions, + create_new_group, + create_overwrite_group, +) + +__all__ = [ + "NodeExistsError", + "ZarrsOptions", + "__version__", + "create_new_group", + "create_overwrite_group", +] diff --git a/src/zarr/zarrs/_api.py b/src/zarr/zarrs/_api.py new file mode 100644 index 0000000000..cfe3f6ec2c --- /dev/null +++ b/src/zarr/zarrs/_api.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +import asyncio +import json +from contextlib import contextmanager +from dataclasses import dataclass +from typing import TYPE_CHECKING + +import _zarrs_bindings as _zb + +from zarr.errors import NodeNotFoundError +from zarr.zarrs._bridge import resolve_store + +if TYPE_CHECKING: + from collections.abc import Iterator, Mapping + + from zarr.abc.store import Store + from zarr.core.common import JSON + +NodeExistsError = _zb.NodeExistsError +"""Raised by `create_new_*` when a node already exists at the target path.""" + + +@dataclass(frozen=True, slots=True) +class ZarrsOptions: + """Options for zarrs-backed operations. + + Currently empty: fields (concurrency limits, checksum validation) arrive in + a later phase. Accepting it now keeps signatures stable. + """ + + +def _node_path(path: str) -> str: + """Convert a zarr-python node path (`""`, `"foo/bar"`) to a zarrs node path + (`"/"`, `"/foo/bar"`).""" + return f"/{path.strip('/')}" + + +@contextmanager +def _translate_errors() -> Iterator[None]: + try: + yield + except _zb.NodeNotFoundError as err: + raise NodeNotFoundError(str(err)) from err + + +async def create_new_group( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> None: + """Create a group at `path` from a group metadata document. + + Raises `NodeExistsError` if any node already exists at `path`. + """ + with _translate_errors(): + await asyncio.to_thread( + _zb.create_group, resolve_store(store), _node_path(path), json.dumps(metadata), False + ) + + +async def create_overwrite_group( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> None: + """Create a group at `path`, deleting any existing node (and its children) first.""" + with _translate_errors(): + await asyncio.to_thread( + _zb.create_group, resolve_store(store), _node_path(path), json.dumps(metadata), True + ) diff --git a/tests/zarrs/test_node.py b/tests/zarrs/test_node.py new file mode 100644 index 0000000000..19e9654ded --- /dev/null +++ b/tests/zarrs/test_node.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import pytest + +import zarr +from zarr.core.buffer.core import default_buffer_prototype +from zarr.zarrs import NodeExistsError, create_new_group, create_overwrite_group + +if TYPE_CHECKING: + from zarr.abc.store import Store + +GROUP_META: dict[str, Any] = { + "zarr_format": 3, + "node_type": "group", + "attributes": {"answer": 42}, +} + + +async def test_create_new_group(store: Store) -> None: + await create_new_group(GROUP_META, store, "foo") + group = zarr.open_group(store=store, path="foo", mode="r") + assert dict(group.attrs) == {"answer": 42} + + +async def test_create_new_group_at_root(store: Store) -> None: + await create_new_group(GROUP_META, store, "") + group = zarr.open_group(store=store, mode="r") + assert dict(group.attrs) == {"answer": 42} + + +async def test_create_new_group_existing_node(store: Store) -> None: + await create_new_group(GROUP_META, store, "foo") + with pytest.raises(NodeExistsError): + await create_new_group(GROUP_META, store, "foo") + + +async def test_create_overwrite_group(store: Store) -> None: + # an array and its chunks previously occupied the path; overwrite removes both + arr = zarr.create_array(store=store, name="foo", shape=(4,), chunks=(2,), dtype="uint8") + arr[:] = 1 + assert await store.exists("foo/c/0") + await create_overwrite_group(GROUP_META, store, "foo") + group = zarr.open_group(store=store, path="foo", mode="r") + assert dict(group.attrs) == {"answer": 42} + assert not await store.exists("foo/c/0") + assert await store.get("foo/zarr.json", prototype=default_buffer_prototype()) is not None diff --git a/zarrs-bindings/src/lib.rs b/zarrs-bindings/src/lib.rs index 803104a63c..e83a71e568 100644 --- a/zarrs-bindings/src/lib.rs +++ b/zarrs-bindings/src/lib.rs @@ -1,6 +1,9 @@ use pyo3::exceptions::{PyRuntimeError, PyValueError}; use pyo3::prelude::*; +mod node; +mod store; + pyo3::create_exception!( _zarrs_bindings, NodeExistsError, @@ -14,12 +17,10 @@ pyo3::create_exception!( "No node was found at the given path." ); -#[allow(dead_code)] pub(crate) fn runtime_err(err: impl std::fmt::Display) -> PyErr { PyRuntimeError::new_err(err.to_string()) } -#[allow(dead_code)] pub(crate) fn value_err(err: impl std::fmt::Display) -> PyErr { PyValueError::new_err(err.to_string()) } @@ -34,5 +35,6 @@ fn _zarrs_bindings(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add("NodeExistsError", m.py().get_type::())?; m.add("NodeNotFoundError", m.py().get_type::())?; m.add_function(wrap_pyfunction!(version, m)?)?; + m.add_function(wrap_pyfunction!(node::create_group, m)?)?; Ok(()) } diff --git a/zarrs-bindings/src/node.rs b/zarrs-bindings/src/node.rs new file mode 100644 index 0000000000..c557c66409 --- /dev/null +++ b/zarrs-bindings/src/node.rs @@ -0,0 +1,50 @@ +use pyo3::prelude::*; +use zarrs::group::Group; +use zarrs::metadata::GroupMetadata; +use zarrs::node::{node_exists, NodePath}; +use zarrs::storage::{ReadableWritableListableStorage, StorePrefix}; + +use crate::store::resolve_store; +use crate::{runtime_err, value_err, NodeExistsError}; + +pub(crate) fn parse_node_path(path: &str) -> PyResult { + NodePath::new(path).map_err(value_err) +} + +/// When a node exists at `node_path`: erase it (and everything under it) if +/// `overwrite`, otherwise raise `NodeExistsError`. +pub(crate) fn prepare_target( + storage: &ReadableWritableListableStorage, + node_path: &NodePath, + overwrite: bool, +) -> PyResult<()> { + if node_exists(storage, node_path).map_err(runtime_err)? { + if !overwrite { + return Err(NodeExistsError::new_err(format!( + "a node already exists at path {}", + node_path.as_str() + ))); + } + let prefix: StorePrefix = node_path.try_into().map_err(value_err)?; + storage.erase_prefix(&prefix).map_err(runtime_err)?; + } + Ok(()) +} + +#[pyfunction] +pub(crate) fn create_group( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + overwrite: bool, +) -> PyResult<()> { + let storage = resolve_store(store)?; + let metadata = GroupMetadata::try_from(metadata_json.as_str()).map_err(value_err)?; + py.detach(move || { + let node_path = parse_node_path(&path)?; + prepare_target(&storage, &node_path, overwrite)?; + let group = Group::new_with_metadata(storage, &path, metadata).map_err(value_err)?; + group.store_metadata().map_err(runtime_err) + }) +} diff --git a/zarrs-bindings/src/store.rs b/zarrs-bindings/src/store.rs new file mode 100644 index 0000000000..3ab8312061 --- /dev/null +++ b/zarrs-bindings/src/store.rs @@ -0,0 +1,216 @@ +use std::sync::Arc; + +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use pyo3::types::{PyBytes, PyDict}; +use zarrs::filesystem::FilesystemStore; +use zarrs::storage::byte_range::{ByteRange, ByteRangeIterator}; +use zarrs::storage::{ + Bytes, ListableStorageTraits, MaybeBytes, MaybeBytesIterator, OffsetBytesIterator, + ReadableStorageTraits, ReadableWritableListableStorage, StorageError, StoreKey, StoreKeys, + StoreKeysPrefixes, StorePrefix, WritableStorageTraits, +}; + +/// A zarrs store backed by a Python `zarr.zarrs._bridge.StoreShim`. +/// +/// Every method attaches to the Python interpreter and calls the shim, which +/// blocks on the zarr event loop. Blocking waits in Python release the GIL, so +/// the loop thread can make progress while a Rust worker waits here. +pub(crate) struct PyStore(Py); + +fn py_err(err: PyErr) -> StorageError { + StorageError::Other(err.to_string()) +} + +fn invalid(err: impl std::fmt::Display) -> StorageError { + StorageError::Other(err.to_string()) +} + +impl PyStore { + fn get_with_range( + &self, + key: &StoreKey, + range: Option<&ByteRange>, + ) -> Result { + Python::attach(|py| { + let shim = self.0.bind(py); + let result = match range { + None => shim.call_method1("get", (key.as_str(),)), + Some(ByteRange::FromStart(offset, length)) => { + shim.call_method1("get_range", (key.as_str(), *offset, *length)) + } + Some(ByteRange::Suffix(suffix)) => { + shim.call_method1("get_suffix", (key.as_str(), *suffix)) + } + } + .map_err(py_err)?; + if result.is_none() { + Ok(None) + } else { + let bytes: Vec = result.extract().map_err(py_err)?; + Ok(Some(Bytes::from(bytes))) + } + }) + } +} + +impl ReadableStorageTraits for PyStore { + fn get(&self, key: &StoreKey) -> Result { + self.get_with_range(key, None) + } + + fn get_partial_many<'a>( + &'a self, + key: &StoreKey, + byte_ranges: ByteRangeIterator<'a>, + ) -> Result, StorageError> { + let mut out = Vec::new(); + for byte_range in byte_ranges { + match self.get_with_range(key, Some(&byte_range))? { + Some(bytes) => out.push(Ok(bytes)), + None => return Ok(None), + } + } + Ok(Some(Box::new(out.into_iter()))) + } + + fn size_key(&self, key: &StoreKey) -> Result, StorageError> { + Python::attach(|py| { + self.0 + .bind(py) + .call_method1("getsize", (key.as_str(),)) + .map_err(py_err)? + .extract() + .map_err(py_err) + }) + } + + fn supports_get_partial(&self) -> bool { + true + } +} + +impl WritableStorageTraits for PyStore { + fn set(&self, key: &StoreKey, value: Bytes) -> Result<(), StorageError> { + Python::attach(|py| { + let data = PyBytes::new(py, &value); + self.0 + .bind(py) + .call_method1("set", (key.as_str(), data)) + .map_err(py_err)?; + Ok(()) + }) + } + + fn set_partial_many( + &self, + key: &StoreKey, + offset_values: OffsetBytesIterator, + ) -> Result<(), StorageError> { + // read-modify-write fallback provided by zarrs + zarrs::storage::store_set_partial_many(self, key, offset_values) + } + + fn supports_set_partial(&self) -> bool { + false + } + + fn erase(&self, key: &StoreKey) -> Result<(), StorageError> { + Python::attach(|py| { + self.0 + .bind(py) + .call_method1("delete", (key.as_str(),)) + .map_err(py_err)?; + Ok(()) + }) + } + + fn erase_prefix(&self, prefix: &StorePrefix) -> Result<(), StorageError> { + Python::attach(|py| { + self.0 + .bind(py) + .call_method1("delete_prefix", (prefix.as_str(),)) + .map_err(py_err)?; + Ok(()) + }) + } +} + +impl ListableStorageTraits for PyStore { + fn list(&self) -> Result { + Python::attach(|py| { + let keys: Vec = self + .0 + .bind(py) + .call_method0("list") + .map_err(py_err)? + .extract() + .map_err(py_err)?; + keys.into_iter() + .map(|k| StoreKey::new(k).map_err(invalid)) + .collect() + }) + } + + fn list_prefix(&self, prefix: &StorePrefix) -> Result { + Python::attach(|py| { + let keys: Vec = self + .0 + .bind(py) + .call_method1("list_prefix", (prefix.as_str(),)) + .map_err(py_err)? + .extract() + .map_err(py_err)?; + keys.into_iter() + .map(|k| StoreKey::new(k).map_err(invalid)) + .collect() + }) + } + + fn list_dir(&self, prefix: &StorePrefix) -> Result { + Python::attach(|py| { + let (keys, prefixes): (Vec, Vec) = self + .0 + .bind(py) + .call_method1("list_dir", (prefix.as_str(),)) + .map_err(py_err)? + .extract() + .map_err(py_err)?; + let keys = keys + .into_iter() + .map(|k| StoreKey::new(k).map_err(invalid)) + .collect::, StorageError>>()?; + let prefixes = prefixes + .into_iter() + .map(|p| StorePrefix::new(p).map_err(invalid)) + .collect::, StorageError>>()?; + Ok(StoreKeysPrefixes::new(keys, prefixes)) + }) + } + + fn size_prefix(&self, prefix: &StorePrefix) -> Result { + Python::attach(|py| { + self.0 + .bind(py) + .call_method1("getsize_prefix", (prefix.as_str(),)) + .map_err(py_err)? + .extract() + .map_err(py_err) + }) + } +} + +/// Convert the Python-side store representation (`zarr.zarrs._bridge.resolve_store` +/// output) into a zarrs storage handle. +pub(crate) fn resolve_store(obj: &Bound<'_, PyAny>) -> PyResult { + if let Ok(config) = obj.cast::() { + if let Some(root) = config.get_item("filesystem")? { + let root: String = root.extract()?; + let store = + FilesystemStore::new(root).map_err(|e| PyValueError::new_err(e.to_string()))?; + return Ok(Arc::new(store)); + } + return Err(PyValueError::new_err("unrecognized store configuration")); + } + Ok(Arc::new(PyStore(obj.clone().unbind()))) +} From 32573fd56f46bf6e06911a8958ce473901edd314 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 11:24:49 +0200 Subject: [PATCH 10/41] docs: document phase-1 error fidelity and creation race; cargo fmt Co-Authored-By: Claude Fable 5 --- .../specs/2026-06-11-zarrs-functional-api-design.md | 12 ++++++++---- src/zarr/zarrs/_api.py | 8 +++++++- zarrs-bindings/src/node.rs | 4 ++-- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md b/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md index ad28c987f8..ebf2854611 100644 --- a/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md +++ b/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md @@ -140,10 +140,14 @@ side releases the GIL during I/O and compute (reacquiring it only inside The binding layer raises a small set of typed exceptions defined in one place: `NodeExistsError`, `NodeNotFoundError`, and `ValueError` subclasses for -metadata-parse and decode failures. `zarr.zarrs` translates to zarr-python -native exception types where an obvious equivalent exists (e.g. -`zarr.errors.ContainsArrayError`). Store-callback exceptions from Python -propagate through Rust unchanged. +metadata-parse failures. In Phase 1 the translation surface is deliberately +small: `zarr.zarrs` re-raises the bindings' `NodeNotFoundError` as +`zarr.errors.NodeNotFoundError`; `NodeExistsError` is exposed as +`zarr.zarrs.NodeExistsError`. Exceptions raised by Python store callbacks are +flattened to a `RuntimeError` carrying the original message — the original +exception type and traceback are lost crossing the Rust boundary. Faithful +propagation of store-callback exceptions (and richer mapping onto +`zarr.errors` types) is deferred to a later phase. ## Testing diff --git a/src/zarr/zarrs/_api.py b/src/zarr/zarrs/_api.py index cfe3f6ec2c..a125cc2688 100644 --- a/src/zarr/zarrs/_api.py +++ b/src/zarr/zarrs/_api.py @@ -54,6 +54,8 @@ async def create_new_group( """Create a group at `path` from a group metadata document. Raises `NodeExistsError` if any node already exists at `path`. + Creation is not atomic with respect to concurrent writers: a concurrent + creation at the same path can race the existence check. """ with _translate_errors(): await asyncio.to_thread( @@ -68,7 +70,11 @@ async def create_overwrite_group( *, options: ZarrsOptions | None = None, ) -> None: - """Create a group at `path`, deleting any existing node (and its children) first.""" + """Create a group at `path`, deleting any existing node (and its children) first. + + Creation is not atomic with respect to concurrent writers: a concurrent + creation at the same path can race the existence check. + """ with _translate_errors(): await asyncio.to_thread( _zb.create_group, resolve_store(store), _node_path(path), json.dumps(metadata), True diff --git a/zarrs-bindings/src/node.rs b/zarrs-bindings/src/node.rs index c557c66409..3daa6f3666 100644 --- a/zarrs-bindings/src/node.rs +++ b/zarrs-bindings/src/node.rs @@ -1,11 +1,11 @@ use pyo3::prelude::*; use zarrs::group::Group; use zarrs::metadata::GroupMetadata; -use zarrs::node::{node_exists, NodePath}; +use zarrs::node::{NodePath, node_exists}; use zarrs::storage::{ReadableWritableListableStorage, StorePrefix}; use crate::store::resolve_store; -use crate::{runtime_err, value_err, NodeExistsError}; +use crate::{NodeExistsError, runtime_err, value_err}; pub(crate) fn parse_node_path(path: &str) -> PyResult { NodePath::new(path).map_err(value_err) From f6a95e428b9a146344988bbd057592a55156ed63 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 11:42:09 +0200 Subject: [PATCH 11/41] feat: zarrs-backed array creation and metadata reads Adds `create_array` and `read_metadata` pyfunctions to the zarrs-bindings crate, and exposes them as `create_new_array`, `create_overwrite_array`, and `read_metadata` in the `zarr.zarrs` subpackage. Co-Authored-By: Claude Fable 5 --- src/zarr/zarrs/__init__.py | 6 +++++ src/zarr/zarrs/_api.py | 52 ++++++++++++++++++++++++++++++++++++ tests/zarrs/test_node.py | 54 +++++++++++++++++++++++++++++++++++++- zarrs-bindings/src/lib.rs | 2 ++ zarrs-bindings/src/node.rs | 39 ++++++++++++++++++++++++--- 5 files changed, 149 insertions(+), 4 deletions(-) diff --git a/src/zarr/zarrs/__init__.py b/src/zarr/zarrs/__init__.py index 7e7af7644e..2e8fd4b107 100644 --- a/src/zarr/zarrs/__init__.py +++ b/src/zarr/zarrs/__init__.py @@ -23,14 +23,20 @@ from zarr.zarrs._api import ( NodeExistsError, ZarrsOptions, + create_new_array, create_new_group, + create_overwrite_array, create_overwrite_group, + read_metadata, ) __all__ = [ "NodeExistsError", "ZarrsOptions", "__version__", + "create_new_array", "create_new_group", + "create_overwrite_array", "create_overwrite_group", + "read_metadata", ] diff --git a/src/zarr/zarrs/_api.py b/src/zarr/zarrs/_api.py index a125cc2688..c4851641c8 100644 --- a/src/zarr/zarrs/_api.py +++ b/src/zarr/zarrs/_api.py @@ -79,3 +79,55 @@ async def create_overwrite_group( await asyncio.to_thread( _zb.create_group, resolve_store(store), _node_path(path), json.dumps(metadata), True ) + + +async def create_new_array( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> None: + """Create an array at `path` from a v2 or v3 array metadata document. + + Raises `NodeExistsError` if any node already exists at `path`. Creation is + not atomic with respect to concurrent writers: a concurrent creation at the + same path can race the existence check. + """ + with _translate_errors(): + await asyncio.to_thread( + _zb.create_array, resolve_store(store), _node_path(path), json.dumps(metadata), False + ) + + +async def create_overwrite_array( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> None: + """Create an array at `path`, deleting any existing node (and its children) + first. The delete-then-create sequence is not atomic with respect to + concurrent writers. + """ + with _translate_errors(): + await asyncio.to_thread( + _zb.create_array, resolve_store(store), _node_path(path), json.dumps(metadata), True + ) + + +async def read_metadata( + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> dict[str, JSON]: + """Read the metadata document of the array or group at `path`. + + Raises `zarr.errors.NodeNotFoundError` if no node exists there. + """ + with _translate_errors(): + raw = await asyncio.to_thread(_zb.read_metadata, resolve_store(store), _node_path(path)) + result: dict[str, JSON] = json.loads(raw) + return result diff --git a/tests/zarrs/test_node.py b/tests/zarrs/test_node.py index 19e9654ded..7601e7c2ea 100644 --- a/tests/zarrs/test_node.py +++ b/tests/zarrs/test_node.py @@ -1,12 +1,23 @@ from __future__ import annotations +import json from typing import TYPE_CHECKING, Any +import numpy as np import pytest import zarr +from tests.zarrs.conftest import array_metadata from zarr.core.buffer.core import default_buffer_prototype -from zarr.zarrs import NodeExistsError, create_new_group, create_overwrite_group +from zarr.errors import NodeNotFoundError +from zarr.zarrs import ( + NodeExistsError, + create_new_array, + create_new_group, + create_overwrite_array, + create_overwrite_group, + read_metadata, +) if TYPE_CHECKING: from zarr.abc.store import Store @@ -46,3 +57,44 @@ async def test_create_overwrite_group(store: Store) -> None: assert dict(group.attrs) == {"answer": 42} assert not await store.exists("foo/c/0") assert await store.get("foo/zarr.json", prototype=default_buffer_prototype()) is not None + + +async def test_create_new_array(store: Store) -> None: + await create_new_array(array_metadata(), store, "arr") + arr = zarr.open_array(store=store, path="arr", mode="r") + assert arr.shape == (8, 8) + assert arr.chunks == (4, 4) + assert arr.dtype == np.dtype("uint16") + + +async def test_create_new_array_existing_node(store: Store) -> None: + await create_new_array(array_metadata(), store, "arr") + with pytest.raises(NodeExistsError): + await create_new_array(array_metadata(), store, "arr") + + +async def test_create_overwrite_array(store: Store) -> None: + zarr.create_group(store=store, path="arr") + await create_overwrite_array(array_metadata(), store, "arr") + arr = zarr.open_array(store=store, path="arr", mode="r") + assert arr.shape == (8, 8) + + +async def test_read_metadata_matches_stored_document(store: Store) -> None: + await create_new_array(array_metadata(), store, "arr") + observed = await read_metadata(store, "arr") + raw = await store.get("arr/zarr.json", prototype=default_buffer_prototype()) + assert raw is not None + assert observed == json.loads(raw.to_bytes()) + + +async def test_read_metadata_zarr_python_group(store: Store) -> None: + zarr.create_group(store=store, path="g", attributes={"a": 1}) + observed = await read_metadata(store, "g") + assert observed["node_type"] == "group" + assert observed["attributes"] == {"a": 1} + + +async def test_read_metadata_missing(store: Store) -> None: + with pytest.raises(NodeNotFoundError): + await read_metadata(store, "nope") diff --git a/zarrs-bindings/src/lib.rs b/zarrs-bindings/src/lib.rs index e83a71e568..3c03578a61 100644 --- a/zarrs-bindings/src/lib.rs +++ b/zarrs-bindings/src/lib.rs @@ -35,6 +35,8 @@ fn _zarrs_bindings(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add("NodeExistsError", m.py().get_type::())?; m.add("NodeNotFoundError", m.py().get_type::())?; m.add_function(wrap_pyfunction!(version, m)?)?; + m.add_function(wrap_pyfunction!(node::create_array, m)?)?; m.add_function(wrap_pyfunction!(node::create_group, m)?)?; + m.add_function(wrap_pyfunction!(node::read_metadata, m)?)?; Ok(()) } diff --git a/zarrs-bindings/src/node.rs b/zarrs-bindings/src/node.rs index 3daa6f3666..95642d45b8 100644 --- a/zarrs-bindings/src/node.rs +++ b/zarrs-bindings/src/node.rs @@ -1,11 +1,12 @@ use pyo3::prelude::*; +use zarrs::array::Array; use zarrs::group::Group; -use zarrs::metadata::GroupMetadata; -use zarrs::node::{NodePath, node_exists}; +use zarrs::metadata::{ArrayMetadata, GroupMetadata}; +use zarrs::node::{Node, NodePath, node_exists}; use zarrs::storage::{ReadableWritableListableStorage, StorePrefix}; use crate::store::resolve_store; -use crate::{NodeExistsError, runtime_err, value_err}; +use crate::{NodeExistsError, NodeNotFoundError, runtime_err, value_err}; pub(crate) fn parse_node_path(path: &str) -> PyResult { NodePath::new(path).map_err(value_err) @@ -48,3 +49,35 @@ pub(crate) fn create_group( group.store_metadata().map_err(runtime_err) }) } + +#[pyfunction] +pub(crate) fn create_array( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + overwrite: bool, +) -> PyResult<()> { + let storage = resolve_store(store)?; + let metadata = ArrayMetadata::try_from(metadata_json.as_str()).map_err(value_err)?; + py.detach(move || { + let node_path = parse_node_path(&path)?; + prepare_target(&storage, &node_path, overwrite)?; + let array = Array::new_with_metadata(storage, &path, metadata).map_err(value_err)?; + array.store_metadata().map_err(runtime_err) + }) +} + +#[pyfunction] +pub(crate) fn read_metadata( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, +) -> PyResult { + let storage = resolve_store(store)?; + py.detach(move || { + let node = + Node::open(&storage, &path).map_err(|e| NodeNotFoundError::new_err(e.to_string()))?; + serde_json::to_string(node.metadata()).map_err(runtime_err) + }) +} From 59fd58d09a891ad29b506fdca7cee9c81cd5f201 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 11:51:34 +0200 Subject: [PATCH 12/41] test: v2 array creation via zarrs; review polish Co-Authored-By: Claude Fable 5 --- src/zarr/zarrs/_api.py | 5 ++--- tests/zarrs/test_node.py | 7 +++++++ zarrs-bindings/src/node.rs | 2 ++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/zarr/zarrs/_api.py b/src/zarr/zarrs/_api.py index c4851641c8..598a435653 100644 --- a/src/zarr/zarrs/_api.py +++ b/src/zarr/zarrs/_api.py @@ -4,7 +4,7 @@ import json from contextlib import contextmanager from dataclasses import dataclass -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast import _zarrs_bindings as _zb @@ -129,5 +129,4 @@ async def read_metadata( """ with _translate_errors(): raw = await asyncio.to_thread(_zb.read_metadata, resolve_store(store), _node_path(path)) - result: dict[str, JSON] = json.loads(raw) - return result + return cast("dict[str, JSON]", json.loads(raw)) diff --git a/tests/zarrs/test_node.py b/tests/zarrs/test_node.py index 7601e7c2ea..6a178d2947 100644 --- a/tests/zarrs/test_node.py +++ b/tests/zarrs/test_node.py @@ -80,6 +80,13 @@ async def test_create_overwrite_array(store: Store) -> None: assert arr.shape == (8, 8) +async def test_create_new_array_v2(store: Store) -> None: + await create_new_array(array_metadata(zarr_format=2), store, "arr") + arr = zarr.open_array(store=store, path="arr", mode="r") + assert arr.metadata.zarr_format == 2 + assert arr.shape == (8, 8) + + async def test_read_metadata_matches_stored_document(store: Store) -> None: await create_new_array(array_metadata(), store, "arr") observed = await read_metadata(store, "arr") diff --git a/zarrs-bindings/src/node.rs b/zarrs-bindings/src/node.rs index 95642d45b8..c7833246e6 100644 --- a/zarrs-bindings/src/node.rs +++ b/zarrs-bindings/src/node.rs @@ -8,6 +8,8 @@ use zarrs::storage::{ReadableWritableListableStorage, StorePrefix}; use crate::store::resolve_store; use crate::{NodeExistsError, NodeNotFoundError, runtime_err, value_err}; +/// `path` arguments throughout this module are zarrs node paths, e.g. "/" or +/// "/foo/bar" (already normalized by the Python layer's `_node_path`). pub(crate) fn parse_node_path(path: &str) -> PyResult { NodePath::new(path).map_err(value_err) } From 2faace584d3708ecf4a596d371e76ba095074778 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 11:57:16 +0200 Subject: [PATCH 13/41] feat: zarrs-backed node deletion and child listing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `delete_node` and `list_children` to the zarrs-bindings Rust crate and the `zarr.zarrs` Python subpackage. `delete_node` erases the node prefix via `erase_prefix`, raising `NodeNotFoundError` when the node is absent. `list_children` opens the target as a `Group` and returns direct children as `(path, metadata_document)` pairs. Both are covered by 4 new tests (× 2 stores = 8 parametrized cases); total suite: 34 passed. Co-Authored-By: Claude Fable 5 --- src/zarr/zarrs/__init__.py | 4 ++++ src/zarr/zarrs/_api.py | 37 +++++++++++++++++++++++++++++++++++++ tests/zarrs/test_node.py | 31 +++++++++++++++++++++++++++++++ zarrs-bindings/src/lib.rs | 2 ++ zarrs-bindings/src/node.rs | 37 +++++++++++++++++++++++++++++++++++++ 5 files changed, 111 insertions(+) diff --git a/src/zarr/zarrs/__init__.py b/src/zarr/zarrs/__init__.py index 2e8fd4b107..7d4968cd40 100644 --- a/src/zarr/zarrs/__init__.py +++ b/src/zarr/zarrs/__init__.py @@ -27,6 +27,8 @@ create_new_group, create_overwrite_array, create_overwrite_group, + delete_node, + list_children, read_metadata, ) @@ -38,5 +40,7 @@ "create_new_group", "create_overwrite_array", "create_overwrite_group", + "delete_node", + "list_children", "read_metadata", ] diff --git a/src/zarr/zarrs/_api.py b/src/zarr/zarrs/_api.py index 598a435653..09c58d6585 100644 --- a/src/zarr/zarrs/_api.py +++ b/src/zarr/zarrs/_api.py @@ -130,3 +130,40 @@ async def read_metadata( with _translate_errors(): raw = await asyncio.to_thread(_zb.read_metadata, resolve_store(store), _node_path(path)) return cast("dict[str, JSON]", json.loads(raw)) + + +async def delete_node( + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> None: + """Delete the node at `path`, including all keys and child nodes under it. + + Raises `zarr.errors.NodeNotFoundError` if no node exists there. Deleting + the root node (`path=""`) clears the entire store. + """ + with _translate_errors(): + await asyncio.to_thread(_zb.delete_node, resolve_store(store), _node_path(path)) + + +async def list_children( + store: Store, + path: str, + *, + options: ZarrsOptions | None = None, +) -> list[tuple[str, dict[str, JSON]]]: + """List the direct children of the group at `path` as + `(path, metadata_document)` pairs. Paths are store-relative (no leading + `/`). + + Raises `zarr.errors.NodeNotFoundError` if no group exists at `path`. + """ + with _translate_errors(): + raw: list[tuple[str, str]] = await asyncio.to_thread( + _zb.list_children, resolve_store(store), _node_path(path) + ) + return [ + (child_path.lstrip("/"), cast("dict[str, JSON]", json.loads(doc))) + for child_path, doc in raw + ] diff --git a/tests/zarrs/test_node.py b/tests/zarrs/test_node.py index 6a178d2947..c5d46f472f 100644 --- a/tests/zarrs/test_node.py +++ b/tests/zarrs/test_node.py @@ -16,6 +16,8 @@ create_new_group, create_overwrite_array, create_overwrite_group, + delete_node, + list_children, read_metadata, ) @@ -105,3 +107,32 @@ async def test_read_metadata_zarr_python_group(store: Store) -> None: async def test_read_metadata_missing(store: Store) -> None: with pytest.raises(NodeNotFoundError): await read_metadata(store, "nope") + + +async def test_delete_node(store: Store) -> None: + arr = zarr.create_array(store=store, name="doomed", shape=(4,), chunks=(2,), dtype="uint8") + arr[:] = 1 + await delete_node(store, "doomed") + assert not await store.exists("doomed/zarr.json") + assert not await store.exists("doomed/c/0") + + +async def test_delete_node_missing(store: Store) -> None: + with pytest.raises(NodeNotFoundError): + await delete_node(store, "nope") + + +async def test_list_children(store: Store) -> None: + root = zarr.create_group(store=store) + root.create_group("sub_group", attributes={"kind": "group"}) + root.create_array("sub_array", shape=(4,), chunks=(2,), dtype="uint8") + children = await list_children(store, "") + by_path = dict(children) + assert set(by_path) == {"sub_group", "sub_array"} + assert by_path["sub_group"]["node_type"] == "group" + assert by_path["sub_array"]["node_type"] == "array" + + +async def test_list_children_missing(store: Store) -> None: + with pytest.raises(NodeNotFoundError): + await list_children(store, "nope") diff --git a/zarrs-bindings/src/lib.rs b/zarrs-bindings/src/lib.rs index 3c03578a61..6a98d4ec53 100644 --- a/zarrs-bindings/src/lib.rs +++ b/zarrs-bindings/src/lib.rs @@ -37,6 +37,8 @@ fn _zarrs_bindings(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(version, m)?)?; m.add_function(wrap_pyfunction!(node::create_array, m)?)?; m.add_function(wrap_pyfunction!(node::create_group, m)?)?; + m.add_function(wrap_pyfunction!(node::delete_node, m)?)?; + m.add_function(wrap_pyfunction!(node::list_children, m)?)?; m.add_function(wrap_pyfunction!(node::read_metadata, m)?)?; Ok(()) } diff --git a/zarrs-bindings/src/node.rs b/zarrs-bindings/src/node.rs index c7833246e6..35a057ab31 100644 --- a/zarrs-bindings/src/node.rs +++ b/zarrs-bindings/src/node.rs @@ -83,3 +83,40 @@ pub(crate) fn read_metadata( serde_json::to_string(node.metadata()).map_err(runtime_err) }) } + +#[pyfunction] +pub(crate) fn delete_node(py: Python<'_>, store: &Bound<'_, PyAny>, path: String) -> PyResult<()> { + let storage = resolve_store(store)?; + py.detach(move || { + let node_path = parse_node_path(&path)?; + if !node_exists(&storage, &node_path).map_err(runtime_err)? { + return Err(NodeNotFoundError::new_err(format!( + "no node found at path {}", + node_path.as_str() + ))); + } + let prefix: StorePrefix = (&node_path).try_into().map_err(value_err)?; + storage.erase_prefix(&prefix).map_err(runtime_err) + }) +} + +#[pyfunction] +pub(crate) fn list_children( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, +) -> PyResult> { + let storage = resolve_store(store)?; + py.detach(move || { + let group = + Group::open(storage, &path).map_err(|e| NodeNotFoundError::new_err(e.to_string()))?; + let children = group.children(false).map_err(runtime_err)?; + children + .into_iter() + .map(|node| { + let metadata = serde_json::to_string(node.metadata()).map_err(runtime_err)?; + Ok((node.path().as_str().to_string(), metadata)) + }) + .collect() + }) +} From dcaf5ef399e3117b6ffeb500b79fa71833d7e91f Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 14:27:48 +0200 Subject: [PATCH 14/41] feat: zarrs-backed whole-chunk decode/encode/raw-read/erase Co-Authored-By: Claude Fable 5 --- src/zarr/zarrs/__init__.py | 8 +++ src/zarr/zarrs/_api.py | 118 +++++++++++++++++++++++++++++++- tests/zarrs/test_chunk.py | 130 ++++++++++++++++++++++++++++++++++++ tests/zarrs/test_node.py | 1 + zarrs-bindings/src/chunk.rs | 94 ++++++++++++++++++++++++++ zarrs-bindings/src/lib.rs | 5 ++ 6 files changed, 354 insertions(+), 2 deletions(-) create mode 100644 tests/zarrs/test_chunk.py create mode 100644 zarrs-bindings/src/chunk.rs diff --git a/src/zarr/zarrs/__init__.py b/src/zarr/zarrs/__init__.py index 7d4968cd40..d439e9d7c7 100644 --- a/src/zarr/zarrs/__init__.py +++ b/src/zarr/zarrs/__init__.py @@ -27,8 +27,12 @@ create_new_group, create_overwrite_array, create_overwrite_group, + decode_chunk, delete_node, + encode_chunk, + erase_chunk, list_children, + read_encoded_chunk, read_metadata, ) @@ -40,7 +44,11 @@ "create_new_group", "create_overwrite_array", "create_overwrite_group", + "decode_chunk", "delete_node", + "encode_chunk", + "erase_chunk", "list_children", + "read_encoded_chunk", "read_metadata", ] diff --git a/src/zarr/zarrs/_api.py b/src/zarr/zarrs/_api.py index 09c58d6585..8a021faa75 100644 --- a/src/zarr/zarrs/_api.py +++ b/src/zarr/zarrs/_api.py @@ -4,9 +4,10 @@ import json from contextlib import contextmanager from dataclasses import dataclass -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING, Any, cast import _zarrs_bindings as _zb +import numpy as np from zarr.errors import NodeNotFoundError from zarr.zarrs._bridge import resolve_store @@ -14,6 +15,8 @@ if TYPE_CHECKING: from collections.abc import Iterator, Mapping + import numpy.typing as npt + from zarr.abc.store import Store from zarr.core.common import JSON @@ -157,7 +160,8 @@ async def list_children( `(path, metadata_document)` pairs. Paths are store-relative (no leading `/`). - Raises `zarr.errors.NodeNotFoundError` if no group exists at `path`. + Raises `zarr.errors.NodeNotFoundError` if no *group* exists at `path` -- + including when `path` holds an array. """ with _translate_errors(): raw: list[tuple[str, str]] = await asyncio.to_thread( @@ -167,3 +171,113 @@ async def list_children( (child_path.lstrip("/"), cast("dict[str, JSON]", json.loads(doc))) for child_path, doc in raw ] + + +def _chunk_dtype_and_shape( + metadata: Mapping[str, JSON], +) -> tuple[np.dtype[Any], tuple[int, ...]]: + """Resolve the numpy dtype and chunk shape from a metadata document, using + zarr-python's own metadata parsing.""" + from zarr.core.metadata.v2 import ArrayV2Metadata + from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGridMetadata + + if metadata.get("zarr_format") == 3: + meta3 = ArrayV3Metadata.from_dict(dict(metadata)) + grid = meta3.chunk_grid + if not isinstance(grid, RegularChunkGridMetadata): + raise NotImplementedError("only regular chunk grids are supported") + return meta3.data_type.to_native_dtype(), grid.chunk_shape + meta2 = ArrayV2Metadata.from_dict(dict(metadata)) + return meta2.dtype.to_native_dtype(), meta2.chunks + + +async def decode_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + selection: tuple[slice | int, ...] | None = None, + options: ZarrsOptions | None = None, +) -> np.ndarray[Any, np.dtype[Any]]: + """Read and decode the chunk at `chunk_coords` of the array described by + `metadata`, located at `path` in `store`. + + The metadata document is authoritative: it is not read from the store. + Missing chunks decode to the fill value. `selection` (a chunk-relative + subset) is not implemented yet. + """ + if selection is not None: + raise NotImplementedError("chunk subset selection is not implemented yet") + raw = await asyncio.to_thread( + _zb.retrieve_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(chunk_coords), + ) + dtype, chunk_shape = _chunk_dtype_and_shape(metadata) + return np.frombuffer(raw, dtype=dtype).reshape(chunk_shape) + + +async def read_encoded_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: ZarrsOptions | None = None, +) -> bytes | None: + """Read the raw, still-encoded bytes of the chunk at `chunk_coords`, or + `None` if the chunk does not exist. No codecs are applied.""" + result: bytes | None = await asyncio.to_thread( + _zb.retrieve_encoded_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(chunk_coords), + ) + return result + + +async def encode_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + value: npt.ArrayLike, + *, + options: ZarrsOptions | None = None, +) -> None: + """Encode `value` with the codecs in `metadata` and store it as the chunk + at `chunk_coords`. `value` must match the chunk shape exactly.""" + dtype, chunk_shape = _chunk_dtype_and_shape(metadata) + arr = np.ascontiguousarray(np.asarray(value, dtype=dtype)) + if arr.shape != chunk_shape: + raise ValueError(f"value shape {arr.shape} does not match chunk shape {chunk_shape}") + await asyncio.to_thread( + _zb.store_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(chunk_coords), + arr.tobytes(), + ) + + +async def erase_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: ZarrsOptions | None = None, +) -> None: + """Delete the chunk at `chunk_coords`. Deleting a missing chunk is a no-op.""" + await asyncio.to_thread( + _zb.erase_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(chunk_coords), + ) diff --git a/tests/zarrs/test_chunk.py b/tests/zarrs/test_chunk.py new file mode 100644 index 0000000000..abcc45a751 --- /dev/null +++ b/tests/zarrs/test_chunk.py @@ -0,0 +1,130 @@ +from __future__ import annotations + +import copy +from typing import TYPE_CHECKING, Any + +import numpy as np +import pytest + +import zarr +from tests.zarrs.conftest import array_metadata +from zarr.codecs import BloscCodec, GzipCodec, ZstdCodec +from zarr.core.buffer.core import default_buffer_prototype +from zarr.zarrs import ( + create_new_array, + decode_chunk, + encode_chunk, + erase_chunk, + read_encoded_chunk, +) + +if TYPE_CHECKING: + from zarr.abc.store import Store + + +def _filled(store: Store, **kwargs: Any) -> tuple[np.ndarray[Any, np.dtype[Any]], dict[str, Any]]: + """Create an 8x8 array named 'a' via zarr-python, fill it with a ramp, and + return (data, metadata_document).""" + params: dict[str, Any] = {"shape": (8, 8), "chunks": (4, 4), "dtype": "uint16"} | kwargs + arr = zarr.create_array(store=store, name="a", **params) + data = np.arange(64, dtype=params["dtype"]).reshape(8, 8) + arr[:, :] = data + doc = dict(arr.metadata.to_dict()) + if params.get("zarr_format") == 2: + # v2 attributes live in .zattrs, not in the .zarray document + doc.pop("attributes", None) + return data, doc + + +@pytest.mark.parametrize("dtype", ["uint8", "int32", "float64"]) +async def test_decode_chunk_differential(store: Store, dtype: str) -> None: + data, meta = _filled(store, dtype=dtype) + observed = await decode_chunk(meta, store, "a", (1, 0)) + np.testing.assert_array_equal(observed, data[4:8, 0:4]) + + +@pytest.mark.parametrize( + "compressors", [None, (GzipCodec(),), (ZstdCodec(),), (BloscCodec(cname="lz4"),)] +) +async def test_decode_chunk_codecs(store: Store, compressors: Any) -> None: + data, meta = _filled(store, compressors=compressors) + observed = await decode_chunk(meta, store, "a", (0, 1)) + np.testing.assert_array_equal(observed, data[0:4, 4:8]) + + +async def test_decode_chunk_v2(store: Store) -> None: + data, meta = _filled(store, zarr_format=2) + observed = await decode_chunk(meta, store, "a", (1, 1)) + np.testing.assert_array_equal(observed, data[4:8, 4:8]) + + +async def test_decode_chunk_sharding(store: Store) -> None: + # with sharding, the metadata chunk grid is the shard grid + data, meta = _filled(store, chunks=(2, 2), shards=(4, 4)) + observed = await decode_chunk(meta, store, "a", (1, 1)) + np.testing.assert_array_equal(observed, data[4:8, 4:8]) + + +async def test_decode_chunk_missing_returns_fill_value(store: Store) -> None: + arr = zarr.create_array( + store=store, name="a", shape=(8, 8), chunks=(4, 4), dtype="uint16", fill_value=7 + ) + meta = dict(arr.metadata.to_dict()) + observed = await decode_chunk(meta, store, "a", (0, 0)) + np.testing.assert_array_equal(observed, np.full((4, 4), 7, dtype="uint16")) + + +async def test_decode_chunk_selection_not_implemented(store: Store) -> None: + _, meta = _filled(store) + with pytest.raises(NotImplementedError): + await decode_chunk(meta, store, "a", (0, 0), selection=(slice(0, 2), slice(0, 2))) + + +async def test_decode_chunk_metadata_view(store: Store) -> None: + # the read-only-view case: decode with a metadata document the store never saw + data, meta = _filled(store, dtype="uint16", compressors=None) + view = copy.deepcopy(meta) + view["data_type"] = "uint8" + view["shape"] = [8, 16] + view["chunk_grid"]["configuration"]["chunk_shape"] = [4, 8] + observed = await decode_chunk(view, store, "a", (1, 0)) + np.testing.assert_array_equal(observed, data[4:8, 0:4].view("uint8")) + + +async def test_encode_chunk_differential(store: Store) -> None: + meta = array_metadata() + await create_new_array(meta, store, "a") + value = np.arange(16, dtype="uint16").reshape(4, 4) + await encode_chunk(meta, store, "a", (0, 1), value) + arr = zarr.open_array(store=store, path="a", mode="r") + np.testing.assert_array_equal(arr[0:4, 4:8], value) + + +async def test_encode_chunk_shape_mismatch(store: Store) -> None: + meta = array_metadata() + await create_new_array(meta, store, "a") + with pytest.raises(ValueError, match="chunk shape"): + await encode_chunk(meta, store, "a", (0, 0), np.zeros((2, 2), dtype="uint16")) + + +async def test_read_encoded_chunk_matches_store(store: Store) -> None: + _, meta = _filled(store) + raw = await read_encoded_chunk(meta, store, "a", (0, 0)) + expected = await store.get("a/c/0/0", prototype=default_buffer_prototype()) + assert expected is not None + assert raw == expected.to_bytes() + + +async def test_read_encoded_chunk_missing_returns_none(store: Store) -> None: + arr = zarr.create_array(store=store, name="empty", shape=(8, 8), chunks=(4, 4), dtype="uint16") + meta = dict(arr.metadata.to_dict()) + assert await read_encoded_chunk(meta, store, "empty", (0, 0)) is None + + +async def test_erase_chunk(store: Store) -> None: + _, meta = _filled(store) + assert await store.exists("a/c/0/0") + await erase_chunk(meta, store, "a", (0, 0)) + assert not await store.exists("a/c/0/0") + arr = zarr.open_array(store=store, path="a", mode="r") + np.testing.assert_array_equal(arr[0:4, 0:4], np.zeros((4, 4), dtype="uint16")) diff --git a/tests/zarrs/test_node.py b/tests/zarrs/test_node.py index c5d46f472f..749f71ddf5 100644 --- a/tests/zarrs/test_node.py +++ b/tests/zarrs/test_node.py @@ -129,6 +129,7 @@ async def test_list_children(store: Store) -> None: children = await list_children(store, "") by_path = dict(children) assert set(by_path) == {"sub_group", "sub_array"} + assert not any(p.startswith("/") for p in by_path) assert by_path["sub_group"]["node_type"] == "group" assert by_path["sub_array"]["node_type"] == "array" diff --git a/zarrs-bindings/src/chunk.rs b/zarrs-bindings/src/chunk.rs new file mode 100644 index 0000000000..246a198768 --- /dev/null +++ b/zarrs-bindings/src/chunk.rs @@ -0,0 +1,94 @@ +use pyo3::exceptions::PyNotImplementedError; +use pyo3::prelude::*; +use pyo3::types::PyBytes; +use zarrs::array::{Array, ArrayBytes}; +use zarrs::metadata::ArrayMetadata; +use zarrs::storage::ReadableWritableListableStorage; + +use crate::store::resolve_store; +use crate::{runtime_err, value_err}; + +type DynArray = Array; + +/// Construct an Array view from an explicit metadata document, without +/// consulting the store for metadata. +fn array_view( + storage: ReadableWritableListableStorage, + path: &str, + metadata_json: &str, +) -> PyResult { + let metadata = ArrayMetadata::try_from(metadata_json).map_err(value_err)?; + Array::new_with_metadata(storage, path, metadata).map_err(value_err) +} + +#[pyfunction] +pub(crate) fn retrieve_chunk( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + chunk_coords: Vec, +) -> PyResult> { + let storage = resolve_store(store)?; + let data = py.detach(move || -> PyResult> { + let array = array_view(storage, &path, &metadata_json)?; + let bytes: ArrayBytes<'static> = + array.retrieve_chunk(&chunk_coords).map_err(runtime_err)?; + let fixed = bytes.into_fixed().map_err(|_| { + PyNotImplementedError::new_err("variable-length data types are not supported") + })?; + Ok(fixed.into_owned()) + })?; + Ok(PyBytes::new(py, &data).unbind()) +} + +#[pyfunction] +pub(crate) fn retrieve_encoded_chunk( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + chunk_coords: Vec, +) -> PyResult>> { + let storage = resolve_store(store)?; + let data = py.detach(move || -> PyResult>> { + let array = array_view(storage, &path, &metadata_json)?; + array + .retrieve_encoded_chunk(&chunk_coords) + .map_err(runtime_err) + })?; + Ok(data.map(|d| PyBytes::new(py, &d).unbind())) +} + +#[pyfunction] +pub(crate) fn store_chunk( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + chunk_coords: Vec, + data: Vec, +) -> PyResult<()> { + let storage = resolve_store(store)?; + py.detach(move || { + let array = array_view(storage, &path, &metadata_json)?; + array + .store_chunk(&chunk_coords, ArrayBytes::new_flen(data)) + .map_err(runtime_err) + }) +} + +#[pyfunction] +pub(crate) fn erase_chunk( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + chunk_coords: Vec, +) -> PyResult<()> { + let storage = resolve_store(store)?; + py.detach(move || { + let array = array_view(storage, &path, &metadata_json)?; + array.erase_chunk(&chunk_coords).map_err(runtime_err) + }) +} diff --git a/zarrs-bindings/src/lib.rs b/zarrs-bindings/src/lib.rs index 6a98d4ec53..2e62b6538f 100644 --- a/zarrs-bindings/src/lib.rs +++ b/zarrs-bindings/src/lib.rs @@ -1,6 +1,7 @@ use pyo3::exceptions::{PyRuntimeError, PyValueError}; use pyo3::prelude::*; +mod chunk; mod node; mod store; @@ -40,5 +41,9 @@ fn _zarrs_bindings(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(node::delete_node, m)?)?; m.add_function(wrap_pyfunction!(node::list_children, m)?)?; m.add_function(wrap_pyfunction!(node::read_metadata, m)?)?; + m.add_function(wrap_pyfunction!(chunk::retrieve_chunk, m)?)?; + m.add_function(wrap_pyfunction!(chunk::retrieve_encoded_chunk, m)?)?; + m.add_function(wrap_pyfunction!(chunk::store_chunk, m)?)?; + m.add_function(wrap_pyfunction!(chunk::erase_chunk, m)?)?; Ok(()) } From 63429b5be111879c4004d22d8355e04d8add6ab3 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 18:38:23 +0200 Subject: [PATCH 15/41] fix: coerce chunk dtype to native byte order for zarrs I/O Co-Authored-By: Claude Fable 5 --- src/zarr/zarrs/_api.py | 14 +++++++++++--- tests/zarrs/test_chunk.py | 21 +++++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/zarr/zarrs/_api.py b/src/zarr/zarrs/_api.py index 8a021faa75..318c39348c 100644 --- a/src/zarr/zarrs/_api.py +++ b/src/zarr/zarrs/_api.py @@ -177,7 +177,12 @@ def _chunk_dtype_and_shape( metadata: Mapping[str, JSON], ) -> tuple[np.dtype[Any], tuple[int, ...]]: """Resolve the numpy dtype and chunk shape from a metadata document, using - zarr-python's own metadata parsing.""" + zarr-python's own metadata parsing. + + The dtype is coerced to native byte order: zarrs always decodes to (and + encodes from) the native in-memory representation, applying any byte-order + codec itself. + """ from zarr.core.metadata.v2 import ArrayV2Metadata from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGridMetadata @@ -186,9 +191,9 @@ def _chunk_dtype_and_shape( grid = meta3.chunk_grid if not isinstance(grid, RegularChunkGridMetadata): raise NotImplementedError("only regular chunk grids are supported") - return meta3.data_type.to_native_dtype(), grid.chunk_shape + return meta3.data_type.to_native_dtype().newbyteorder("="), grid.chunk_shape meta2 = ArrayV2Metadata.from_dict(dict(metadata)) - return meta2.dtype.to_native_dtype(), meta2.chunks + return meta2.dtype.to_native_dtype().newbyteorder("="), meta2.chunks async def decode_chunk( @@ -206,6 +211,9 @@ async def decode_chunk( The metadata document is authoritative: it is not read from the store. Missing chunks decode to the fill value. `selection` (a chunk-relative subset) is not implemented yet. + + The returned array is a read-only, zero-copy view over the decoded bytes; + call `.copy()` if you need a writable array. """ if selection is not None: raise NotImplementedError("chunk subset selection is not implemented yet") diff --git a/tests/zarrs/test_chunk.py b/tests/zarrs/test_chunk.py index abcc45a751..64456fe954 100644 --- a/tests/zarrs/test_chunk.py +++ b/tests/zarrs/test_chunk.py @@ -58,6 +58,27 @@ async def test_decode_chunk_v2(store: Store) -> None: np.testing.assert_array_equal(observed, data[4:8, 4:8]) +async def test_decode_chunk_v2_big_endian(store: Store) -> None: + data, meta = _filled(store, dtype=">u2", zarr_format=2) + observed = await decode_chunk(meta, store, "a", (1, 1)) + np.testing.assert_array_equal(observed, data[4:8, 4:8]) + + +async def test_encode_chunk_v2_big_endian(store: Store) -> None: + meta = array_metadata(dtype=">u2", zarr_format=2) + await create_new_array(meta, store, "a") + value = np.arange(16, dtype="uint16").reshape(4, 4) + await encode_chunk(meta, store, "a", (0, 1), value) + arr = zarr.open_array(store=store, path="a", mode="r") + np.testing.assert_array_equal(arr[0:4, 4:8], value) + + +async def test_decode_chunk_readonly(store: Store) -> None: + _, meta = _filled(store) + observed = await decode_chunk(meta, store, "a", (0, 0)) + assert not observed.flags.writeable + + async def test_decode_chunk_sharding(store: Store) -> None: # with sharding, the metadata chunk grid is the shard grid data, meta = _filled(store, chunks=(2, 2), shards=(4, 4)) From e5dc4825852cc22578589de359012219b9d8186d Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 18:41:17 +0200 Subject: [PATCH 16/41] chore: lint fixes and changelog for zarr.zarrs Co-Authored-By: Claude Fable 5 --- changes/+zarrs-bindings.feature.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 changes/+zarrs-bindings.feature.md diff --git a/changes/+zarrs-bindings.feature.md b/changes/+zarrs-bindings.feature.md new file mode 100644 index 0000000000..0b0e7ee384 --- /dev/null +++ b/changes/+zarrs-bindings.feature.md @@ -0,0 +1,6 @@ +Added `zarr.zarrs`, an experimental low-level functional API for zarr hierarchy +CRUD backed by the Rust [zarrs](https://zarrs.dev) crate via the new in-repo +`zarrs-bindings` PyO3 crate. Array routines take an explicit metadata document, +enabling read-only views such as decoding chunks with externally supplied +metadata or reading raw encoded chunk bytes. Build for development with +`uv sync --group zarrs`. From 9dd5abdbad8a4e4690deabd055452c2f7704c36b Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 18:50:51 +0200 Subject: [PATCH 17/41] ci: test job for zarrs bindings Co-Authored-By: Claude Fable 5 --- .github/workflows/zarrs.yml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 .github/workflows/zarrs.yml diff --git a/.github/workflows/zarrs.yml b/.github/workflows/zarrs.yml new file mode 100644 index 0000000000..2df419772d --- /dev/null +++ b/.github/workflows/zarrs.yml @@ -0,0 +1,32 @@ +name: Zarrs bindings + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + fetch-depth: 0 # hatch-vcs needs tags to compute zarr's version + persist-credentials: false + - name: Install uv + uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0 + with: + python-version: '3.12' + - name: Run zarrs bindings tests + # the ubuntu runner image ships a Rust toolchain; the maturin build + # backend is fetched by uv on demand + run: uv run --group zarrs pytest tests/zarrs -v From 7226b8d4914848334ff2068becb9bcefdbcdc94c Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 20:11:14 +0200 Subject: [PATCH 18/41] fix: keep zarr.zarrs out of doctest collection; pin Rust toolchain in CI - Add --ignore=src/zarr/zarrs to pytest addopts so --doctest-modules doesn't attempt to import the bindings module when zarrs-bindings is not installed, preventing a collection ERROR in jobs that don't use the zarrs dependency group. - Add dtolnay/rust-toolchain step (SHA-pinned, stable) to zarrs.yml CI so the build is not reliant on whatever Rust version the runner image ships; ensures rust-version = "1.91" in the crate is satisfied. - Fix spec: abi3-py311 -> abi3-py312 to match zarr's requires-python >=3.12. Co-Authored-By: Claude Fable 5 --- .github/workflows/zarrs.yml | 2 ++ .../superpowers/specs/2026-06-11-zarrs-functional-api-design.md | 2 +- pyproject.toml | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/zarrs.yml b/.github/workflows/zarrs.yml index 2df419772d..954dcda79b 100644 --- a/.github/workflows/zarrs.yml +++ b/.github/workflows/zarrs.yml @@ -26,6 +26,8 @@ jobs: uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0 with: python-version: '3.12' + - name: Install Rust + uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable - name: Run zarrs bindings tests # the ubuntu runner image ships a Rust toolchain; the maturin build # backend is fetched by uv on demand diff --git a/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md b/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md index ebf2854611..091c8cb211 100644 --- a/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md +++ b/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md @@ -40,7 +40,7 @@ codec-pipeline registry through this API (possible later), fancy Two distributions in this repo, hard boundary between them: 1. **Rust crate `zarrs-bindings`** at the repo root (`zarrs-bindings/`), - built with maturin (PyO3, `abi3-py311`), publishing wheel `zarrs-bindings` + built with maturin (PyO3, `abi3-py312`), publishing wheel `zarrs-bindings` with native module `_zarrs_bindings`. It is a thin, mechanical binding over `zarrs`: functions/pyclasses take metadata as a **JSON string**, a store-config object, a node path, and return bytes / numpy arrays. It knows diff --git a/pyproject.toml b/pyproject.toml index 90368ed37a..d24917bc5f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -441,6 +441,7 @@ addopts = [ "--doctest-modules", "--ignore=tests/test_regression/scripts", "--ignore=src/zarr/_cli", + "--ignore=src/zarr/zarrs", ] filterwarnings = [ "error", From 2b2a6b5692342f7535aed9d331f117a2489a1aeb Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 21:35:48 +0200 Subject: [PATCH 19/41] fix: make zarrs test skip xdist-compatible Move the pytest.importorskip("_zarrs_bindings") guard from conftest.py (module-level) to the top of each test module. When the bindings are absent, xdist workers would previously raise Skipped while importing the conftest, causing "Different tests were collected between gw0 and gwX" failures. Per-module guards are the standard xdist-safe pattern. Co-Authored-By: Claude Fable 5 --- tests/zarrs/conftest.py | 4 ---- tests/zarrs/test_api.py | 6 ++++++ tests/zarrs/test_bridge.py | 6 ++++++ tests/zarrs/test_chunk.py | 4 ++++ tests/zarrs/test_node.py | 4 ++++ 5 files changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/zarrs/conftest.py b/tests/zarrs/conftest.py index 678065e5e1..092bce5473 100644 --- a/tests/zarrs/conftest.py +++ b/tests/zarrs/conftest.py @@ -4,10 +4,6 @@ import pytest -pytest.importorskip( - "_zarrs_bindings", reason="zarrs-bindings is not installed", exc_type=ImportError -) - import zarr from zarr.storage import LocalStore, MemoryStore diff --git a/tests/zarrs/test_api.py b/tests/zarrs/test_api.py index da1a9ecda8..1a3e9005e2 100644 --- a/tests/zarrs/test_api.py +++ b/tests/zarrs/test_api.py @@ -1,5 +1,11 @@ from __future__ import annotations +import pytest + +pytest.importorskip( + "_zarrs_bindings", reason="zarrs-bindings is not installed", exc_type=ImportError +) + def test_import() -> None: import zarr.zarrs diff --git a/tests/zarrs/test_bridge.py b/tests/zarrs/test_bridge.py index 2b88c047dd..f997b052f2 100644 --- a/tests/zarrs/test_bridge.py +++ b/tests/zarrs/test_bridge.py @@ -2,6 +2,12 @@ from typing import TYPE_CHECKING +import pytest + +pytest.importorskip( + "_zarrs_bindings", reason="zarrs-bindings is not installed", exc_type=ImportError +) + from zarr.storage import LocalStore, MemoryStore from zarr.zarrs._bridge import StoreShim, resolve_store diff --git a/tests/zarrs/test_chunk.py b/tests/zarrs/test_chunk.py index 64456fe954..74a494770f 100644 --- a/tests/zarrs/test_chunk.py +++ b/tests/zarrs/test_chunk.py @@ -6,6 +6,10 @@ import numpy as np import pytest +pytest.importorskip( + "_zarrs_bindings", reason="zarrs-bindings is not installed", exc_type=ImportError +) + import zarr from tests.zarrs.conftest import array_metadata from zarr.codecs import BloscCodec, GzipCodec, ZstdCodec diff --git a/tests/zarrs/test_node.py b/tests/zarrs/test_node.py index 749f71ddf5..eefb4cf137 100644 --- a/tests/zarrs/test_node.py +++ b/tests/zarrs/test_node.py @@ -6,6 +6,10 @@ import numpy as np import pytest +pytest.importorskip( + "_zarrs_bindings", reason="zarrs-bindings is not installed", exc_type=ImportError +) + import zarr from tests.zarrs.conftest import array_metadata from zarr.core.buffer.core import default_buffer_prototype From 100a7cbc33320dd70533befceb1f8b90b931a250 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 12 Jun 2026 22:19:14 +0200 Subject: [PATCH 20/41] feat: decode_region with numpy-style basic indexing Add `retrieve_array_subset` Rust binding and `decode_region` Python API function. Selection normaliser maps integers/slices/Ellipsis to a step-1 bounding box fetched in one zarrs call; strides, reversals, and integer-axis removal are applied as numpy views on the result. Co-Authored-By: Claude Fable 5 --- .../2026-06-11-zarrs-functional-api-design.md | 7 +- src/zarr/zarrs/__init__.py | 2 + src/zarr/zarrs/_api.py | 133 ++++++++++++++++++ tests/zarrs/test_chunk.py | 69 +++++++++ zarrs-bindings/src/chunk.rs | 25 +++- zarrs-bindings/src/lib.rs | 1 + 6 files changed, 233 insertions(+), 4 deletions(-) diff --git a/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md b/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md index 091c8cb211..bfafd59aae 100644 --- a/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md +++ b/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md @@ -65,7 +65,7 @@ All functions are `async def`. Parameters: - `store`: `zarr.abc.store.Store`. - `path`: node path within the store (str, `""` = root). - `chunk_coords`: `tuple[int, ...]` grid coordinates. -- `selection`: tuple of `slice`/`int` only (v1 restriction). +- `selection`: numpy-style basic indexing — integers, slices (including steps; strided/reversed selections fetch the step-1 bounding box in one call and apply numpy views), and `Ellipsis`. Fancy indexing (integer/boolean arrays) and `np.newaxis` are not supported. - `options`: every function also accepts keyword-only `options: ZarrsOptions | None = None` (omitted from the signatures below for brevity) — a dataclass holding concurrency limits and checksum validation @@ -170,8 +170,9 @@ propagation of store-callback exceptions (and richer mapping onto 1. **Phase 1**: crate scaffolding (maturin, CI build), store bridge (native LocalStore + generic PyStore), node lifecycle functions, whole-chunk `decode_chunk` / `read_encoded_chunk` / `encode_chunk` / `erase_chunk`. -2. **Phase 2**: `decode_region` / `encode_region`, chunk-subset `selection` - via partial decoders. +2. **Phase 2**: `decode_region` (read side of region I/O) is implemented on + this branch. `encode_region` and chunk-subset `selection` for `decode_chunk` + via partial decoders remain Phase 2. 3. **Phase 3**: `ZarrsOptions` surface (concurrency, checksum validation, direct IO), obstore native path, benchmarks vs. the pure-Python pipeline. diff --git a/src/zarr/zarrs/__init__.py b/src/zarr/zarrs/__init__.py index d439e9d7c7..1e287b9cfa 100644 --- a/src/zarr/zarrs/__init__.py +++ b/src/zarr/zarrs/__init__.py @@ -28,6 +28,7 @@ create_overwrite_array, create_overwrite_group, decode_chunk, + decode_region, delete_node, encode_chunk, erase_chunk, @@ -45,6 +46,7 @@ "create_overwrite_array", "create_overwrite_group", "decode_chunk", + "decode_region", "delete_node", "encode_chunk", "erase_chunk", diff --git a/src/zarr/zarrs/_api.py b/src/zarr/zarrs/_api.py index 318c39348c..d97acd0984 100644 --- a/src/zarr/zarrs/_api.py +++ b/src/zarr/zarrs/_api.py @@ -2,6 +2,9 @@ import asyncio import json +import operator +import types +from collections.abc import Sequence from contextlib import contextmanager from dataclasses import dataclass from typing import TYPE_CHECKING, Any, cast @@ -33,6 +36,10 @@ class ZarrsOptions: """ +BasicIndex = int | slice | types.EllipsisType +BasicSelection = BasicIndex | tuple[BasicIndex, ...] + + def _node_path(path: str) -> str: """Convert a zarr-python node path (`""`, `"foo/bar"`) to a zarrs node path (`"/"`, `"/foo/bar"`).""" @@ -173,6 +180,93 @@ async def list_children( ] +def _array_shape(metadata: Mapping[str, JSON]) -> tuple[int, ...]: + """Resolve the array shape from a metadata document.""" + shape = metadata.get("shape") + if not isinstance(shape, Sequence) or isinstance(shape, str): + raise TypeError("metadata document has no valid 'shape'") + result: list[int] = [] + for s in shape: + if not isinstance(s, (int, float)): + raise TypeError(f"shape element {s!r} is not a number") + result.append(int(s)) + return tuple(result) + + +def _normalize_selection( + selection: BasicSelection, shape: tuple[int, ...] +) -> tuple[list[int], list[int], tuple[slice | int, ...]]: + """Normalize a numpy-style basic-indexing selection against `shape`. + + Returns `(start, bounding_shape, post_index)`: the step-1 bounding box to + fetch (per-dimension start and length), and the numpy index to apply to + the fetched block to produce the final result (strides, reversals, and + integer-axis removal). Only integers, slices, and `Ellipsis` are + supported; fancy indexing raises `TypeError`. + """ + sel_tuple = selection if isinstance(selection, tuple) else (selection,) + + n_ellipsis = sum(1 for s in sel_tuple if s is Ellipsis) + if n_ellipsis > 1: + raise IndexError("an index can only have a single ellipsis ('...')") + if n_ellipsis == 1: + i = sel_tuple.index(Ellipsis) + n_fill = len(shape) - (len(sel_tuple) - 1) + if n_fill < 0: + raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional") + sel_tuple = sel_tuple[:i] + (slice(None),) * n_fill + sel_tuple[i + 1 :] + if len(sel_tuple) > len(shape): + raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional") + sel_tuple = sel_tuple + (slice(None),) * (len(shape) - len(sel_tuple)) + + starts: list[int] = [] + lengths: list[int] = [] + post: list[slice | int] = [] + for dim, (sel, size) in enumerate(zip(sel_tuple, shape, strict=True)): + if isinstance(sel, slice): + start, stop, step = sel.indices(size) + n = len(range(start, stop, step)) + if n == 0: + starts.append(0) + lengths.append(0) + post.append(slice(None)) + elif step > 0: + last = start + (n - 1) * step + starts.append(start) + lengths.append(last - start + 1) + post.append(slice(None, None, step)) + else: + # descending: bounding box is [last, start], ascending in store + # order; slice(None, None, step) over the block starts at its + # final element (global `start`) and lands exactly on index 0 + # (global `last`) because the block length is (n-1)*|step| + 1. + last = start + (n - 1) * step + starts.append(last) + lengths.append(start - last + 1) + post.append(slice(None, None, step)) + else: + if isinstance(sel, types.EllipsisType): + raise TypeError( + "unsupported selection element " + f"{sel!r}: only integers, slices, and Ellipsis are supported" + ) + try: + idx = operator.index(sel) + except TypeError: + raise TypeError( + "unsupported selection element " + f"{sel!r}: only integers, slices, and Ellipsis are supported" + ) from None + if idx < 0: + idx += size + if not 0 <= idx < size: + raise IndexError(f"index {sel} is out of bounds for axis {dim} with size {size}") + starts.append(idx) + lengths.append(1) + post.append(0) + return starts, lengths, tuple(post) + + def _chunk_dtype_and_shape( metadata: Mapping[str, JSON], ) -> tuple[np.dtype[Any], tuple[int, ...]]: @@ -289,3 +383,42 @@ async def erase_chunk( json.dumps(metadata), list(chunk_coords), ) + + +async def decode_region( + metadata: Mapping[str, JSON], + store: Store, + path: str, + selection: BasicSelection, + *, + options: ZarrsOptions | None = None, +) -> np.ndarray[Any, np.dtype[Any]]: + """Read and decode the region of the array described by `metadata` given + by a numpy-style basic-indexing `selection` (integers, slices including + steps, `Ellipsis`). + + The metadata document is authoritative: it is not read from the store. + One zarrs call fetches the step-1 bounding box of the selection (decoding + all overlapping chunks, in parallel for multi-chunk regions); strides, + reversals, and integer-axis removal are applied as numpy views on the + result. Missing chunks decode to the fill value. Fancy indexing (integer + or boolean arrays) is not supported and raises `TypeError`. The returned + array is a read-only view; call `.copy()` if you need a writable array. + """ + dtype, _ = _chunk_dtype_and_shape(metadata) + shape = _array_shape(metadata) + starts, lengths, post_index = _normalize_selection(selection, shape) + if 0 in lengths: + block = np.empty(lengths, dtype=dtype) + else: + raw = await asyncio.to_thread( + _zb.retrieve_array_subset, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + starts, + lengths, + ) + block = np.frombuffer(raw, dtype=dtype).reshape(lengths) + result: np.ndarray[Any, np.dtype[Any]] = block[post_index] + return result diff --git a/tests/zarrs/test_chunk.py b/tests/zarrs/test_chunk.py index 74a494770f..89cf10304d 100644 --- a/tests/zarrs/test_chunk.py +++ b/tests/zarrs/test_chunk.py @@ -17,6 +17,7 @@ from zarr.zarrs import ( create_new_array, decode_chunk, + decode_region, encode_chunk, erase_chunk, read_encoded_chunk, @@ -153,3 +154,71 @@ async def test_erase_chunk(store: Store) -> None: assert not await store.exists("a/c/0/0") arr = zarr.open_array(store=store, path="a", mode="r") np.testing.assert_array_equal(arr[0:4, 0:4], np.zeros((4, 4), dtype="uint16")) + + +SELECTIONS: list[Any] = [ + (slice(None), slice(None)), + (slice(2, 7), slice(1, 5)), # crosses chunk boundaries + (slice(None), 3), + (5, slice(None)), + (3, 4), # fully scalar -> 0-d result + (slice(1, 8, 2), slice(None)), + (slice(None), slice(6, 1, -2)), # negative step + (slice(-3, None), slice(None, -1)), # negative bounds + ..., # Ellipsis alone + (..., slice(2, 4)), + (slice(0, 0), slice(None)), # empty + (slice(2, 6),), # partial selection, missing trailing dims +] + + +@pytest.mark.parametrize("sel", SELECTIONS) +async def test_decode_region_differential(store: Store, sel: Any) -> None: + data, meta = _filled(store) + observed = await decode_region(meta, store, "a", sel) + np.testing.assert_array_equal(observed, data[sel]) + + +async def test_decode_region_sharding(store: Store) -> None: + data, meta = _filled(store, chunks=(2, 2), shards=(4, 4)) + observed = await decode_region(meta, store, "a", (slice(1, 7), slice(3, 8))) + np.testing.assert_array_equal(observed, data[1:7, 3:8]) + + +async def test_decode_region_v2(store: Store) -> None: + data, meta = _filled(store, zarr_format=2) + observed = await decode_region(meta, store, "a", (slice(2, 7), slice(None, None, 3))) + np.testing.assert_array_equal(observed, data[2:7, ::3]) + + +async def test_decode_region_missing_chunks_fill_value(store: Store) -> None: + arr = zarr.create_array( + store=store, name="a", shape=(8, 8), chunks=(4, 4), dtype="uint16", fill_value=7 + ) + meta = dict(arr.metadata.to_dict()) + observed = await decode_region(meta, store, "a", (slice(2, 6), slice(2, 6))) + np.testing.assert_array_equal(observed, np.full((4, 4), 7, dtype="uint16")) + + +async def test_decode_region_out_of_bounds(store: Store) -> None: + _, meta = _filled(store) + with pytest.raises(IndexError, match="out of bounds"): + await decode_region(meta, store, "a", (8, slice(None))) + + +async def test_decode_region_too_many_indices(store: Store) -> None: + _, meta = _filled(store) + with pytest.raises(IndexError, match="too many indices"): + await decode_region(meta, store, "a", (0, 0, 0)) + + +async def test_decode_region_fancy_indexing_rejected(store: Store) -> None: + _, meta = _filled(store) + with pytest.raises(TypeError, match="only integers, slices"): + await decode_region(meta, store, "a", ([0, 1], slice(None))) # type: ignore[arg-type] + + +async def test_decode_region_readonly(store: Store) -> None: + _, meta = _filled(store) + observed = await decode_region(meta, store, "a", (slice(0, 4), slice(0, 4))) + assert not observed.flags.writeable diff --git a/zarrs-bindings/src/chunk.rs b/zarrs-bindings/src/chunk.rs index 246a198768..30c0b04554 100644 --- a/zarrs-bindings/src/chunk.rs +++ b/zarrs-bindings/src/chunk.rs @@ -1,7 +1,7 @@ use pyo3::exceptions::PyNotImplementedError; use pyo3::prelude::*; use pyo3::types::PyBytes; -use zarrs::array::{Array, ArrayBytes}; +use zarrs::array::{Array, ArrayBytes, ArraySubset}; use zarrs::metadata::ArrayMetadata; use zarrs::storage::ReadableWritableListableStorage; @@ -92,3 +92,26 @@ pub(crate) fn erase_chunk( array.erase_chunk(&chunk_coords).map_err(runtime_err) }) } + +#[pyfunction] +pub(crate) fn retrieve_array_subset( + py: Python<'_>, + store: &Bound<'_, PyAny>, + path: String, + metadata_json: String, + start: Vec, + shape: Vec, +) -> PyResult> { + let storage = resolve_store(store)?; + let data = py.detach(move || -> PyResult> { + let array = array_view(storage, &path, &metadata_json)?; + let subset = ArraySubset::new_with_start_shape(start, shape).map_err(value_err)?; + let bytes: ArrayBytes<'static> = + array.retrieve_array_subset(&subset).map_err(runtime_err)?; + let fixed = bytes.into_fixed().map_err(|_| { + PyNotImplementedError::new_err("variable-length data types are not supported") + })?; + Ok(fixed.into_owned()) + })?; + Ok(PyBytes::new(py, &data).unbind()) +} diff --git a/zarrs-bindings/src/lib.rs b/zarrs-bindings/src/lib.rs index 2e62b6538f..aa4552c897 100644 --- a/zarrs-bindings/src/lib.rs +++ b/zarrs-bindings/src/lib.rs @@ -45,5 +45,6 @@ fn _zarrs_bindings(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(chunk::retrieve_encoded_chunk, m)?)?; m.add_function(wrap_pyfunction!(chunk::store_chunk, m)?)?; m.add_function(wrap_pyfunction!(chunk::erase_chunk, m)?)?; + m.add_function(wrap_pyfunction!(chunk::retrieve_array_subset, m)?)?; Ok(()) } From ccbf9624169b6f20b5c298de99bdcd82fdc832c0 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sat, 13 Jun 2026 23:57:36 +0200 Subject: [PATCH 21/41] fix: keep empty-selection decode_region result read-only Co-Authored-By: Claude Fable 5 --- src/zarr/zarrs/_api.py | 1 + tests/zarrs/test_chunk.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/zarr/zarrs/_api.py b/src/zarr/zarrs/_api.py index d97acd0984..40efe13af8 100644 --- a/src/zarr/zarrs/_api.py +++ b/src/zarr/zarrs/_api.py @@ -410,6 +410,7 @@ async def decode_region( starts, lengths, post_index = _normalize_selection(selection, shape) if 0 in lengths: block = np.empty(lengths, dtype=dtype) + block.flags.writeable = False else: raw = await asyncio.to_thread( _zb.retrieve_array_subset, diff --git a/tests/zarrs/test_chunk.py b/tests/zarrs/test_chunk.py index 89cf10304d..6b4964413c 100644 --- a/tests/zarrs/test_chunk.py +++ b/tests/zarrs/test_chunk.py @@ -222,3 +222,5 @@ async def test_decode_region_readonly(store: Store) -> None: _, meta = _filled(store) observed = await decode_region(meta, store, "a", (slice(0, 4), slice(0, 4))) assert not observed.flags.writeable + empty = await decode_region(meta, store, "a", (slice(0, 0), slice(None))) + assert not empty.flags.writeable From 8895224aac6a3087d5c8a624f3ba532fffbdaa5b Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 09:55:15 +0200 Subject: [PATCH 22/41] docs: note decode_region overread; tidy selection normalizer - Add docstring note to decode_region warning that zarrs fetches the step-1 bounding box, so strided selections read O(span) bytes. - Remove dead isinstance(sel, EllipsisType) raise in _normalize_selection (Ellipsis is expanded to slice(None) before the per-dimension loop); replace with an assert to preserve mypy type narrowing. - Guard non-integral float shape elements in _array_shape so shape=[1.5] raises TypeError instead of silently truncating to 1. Co-Authored-By: Claude Fable 5 --- src/zarr/zarrs/_api.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/zarr/zarrs/_api.py b/src/zarr/zarrs/_api.py index 40efe13af8..df0804f719 100644 --- a/src/zarr/zarrs/_api.py +++ b/src/zarr/zarrs/_api.py @@ -189,6 +189,8 @@ def _array_shape(metadata: Mapping[str, JSON]) -> tuple[int, ...]: for s in shape: if not isinstance(s, (int, float)): raise TypeError(f"shape element {s!r} is not a number") + if isinstance(s, float) and not s.is_integer(): + raise TypeError(f"shape element {s!r} is not an integer") result.append(int(s)) return tuple(result) @@ -245,11 +247,7 @@ def _normalize_selection( lengths.append(start - last + 1) post.append(slice(None, None, step)) else: - if isinstance(sel, types.EllipsisType): - raise TypeError( - "unsupported selection element " - f"{sel!r}: only integers, slices, and Ellipsis are supported" - ) + assert not isinstance(sel, types.EllipsisType), "Ellipsis already expanded above" try: idx = operator.index(sel) except TypeError: @@ -404,6 +402,11 @@ async def decode_region( result. Missing chunks decode to the fill value. Fancy indexing (integer or boolean arrays) is not supported and raises `TypeError`. The returned array is a read-only view; call `.copy()` if you need a writable array. + + Note: zarrs fetches the step-1 bounding box of the selection. A selection + like `slice(0, N, step)` reads `O(N)` bytes from the store even though only + `O(N / step)` are returned; for sparse selections over large arrays, prefer + reading per-chunk with `decode_chunk`. """ dtype, _ = _chunk_dtype_and_shape(metadata) shape = _array_shape(metadata) From 5faf19846f26e5bd5506dd9950a0c959e57892fa Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 10:00:23 +0200 Subject: [PATCH 23/41] perf: cache constructed Arrays for native filesystem stores Add a process-wide LRU cache (capacity 128) in chunk.rs keyed on (filesystem root, node path, metadata JSON) that memoises the result of Array::new_with_metadata for the native FilesystemStore path. Generic Python-backed stores (MemoryStore, ZipStore, custom) are not cached. The cache key encodes root + path + metadata so an Array is never reused for a different store or different codec chain; chunk data continues to flow through the store on every call, so the cache cannot return stale data. Two test-hook pyfunctions (array_cache_len / clear_array_cache) are exposed on _zarrs_bindings; five correctness tests in tests/zarrs/test_cache.py cover population, non-caching of MemoryStore, distinct-metadata entries, root-keying, and write visibility. All 117 tests pass; cargo clippy -D warnings clean. Co-Authored-By: Claude Fable 5 --- tests/zarrs/test_cache.py | 86 +++++++++++++++++++++++++++++++++++++ zarrs-bindings/Cargo.lock | 14 +++++- zarrs-bindings/Cargo.toml | 1 + zarrs-bindings/src/chunk.rs | 72 +++++++++++++++++++++++++------ zarrs-bindings/src/lib.rs | 2 + zarrs-bindings/src/store.rs | 21 ++++++--- 6 files changed, 175 insertions(+), 21 deletions(-) create mode 100644 tests/zarrs/test_cache.py diff --git a/tests/zarrs/test_cache.py b/tests/zarrs/test_cache.py new file mode 100644 index 0000000000..41e3e24f29 --- /dev/null +++ b/tests/zarrs/test_cache.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import numpy as np +import pytest + +pytest.importorskip( + "_zarrs_bindings", reason="zarrs-bindings is not installed", exc_type=ImportError +) + +import _zarrs_bindings as zb + +import zarr +from zarr.storage import LocalStore, MemoryStore +from zarr.zarrs import decode_chunk, encode_chunk + +if TYPE_CHECKING: + from pathlib import Path + + +def _meta(store: Any, name: str = "a") -> dict[str, Any]: + arr = zarr.create_array(store=store, name=name, shape=(8, 8), chunks=(4, 4), dtype="uint16") + arr[:, :] = np.arange(64, dtype="uint16").reshape(8, 8) + return dict(arr.metadata.to_dict()) + + +@pytest.fixture(autouse=True) +def _clear_cache() -> None: + zb.clear_array_cache() + + +async def test_localstore_populates_cache(tmp_path: Path) -> None: + store = await LocalStore.open(root=tmp_path / "s") + meta = _meta(store) + assert zb.array_cache_len() == 0 + await decode_chunk(meta, store, "a", (0, 0)) + assert zb.array_cache_len() == 1 + # second op on the SAME array reuses the entry, does not grow the cache + await decode_chunk(meta, store, "a", (1, 1)) + assert zb.array_cache_len() == 1 + + +async def test_memorystore_is_not_cached() -> None: + store = MemoryStore() + meta = _meta(store) + await decode_chunk(meta, store, "a", (0, 0)) + assert zb.array_cache_len() == 0 + + +async def test_distinct_metadata_distinct_entries(tmp_path: Path) -> None: + store = await LocalStore.open(root=tmp_path / "s") + meta_a = _meta(store, "a") + meta_b = _meta(store, "b") + await decode_chunk(meta_a, store, "a", (0, 0)) + await decode_chunk(meta_b, store, "b", (0, 0)) + assert zb.array_cache_len() == 2 + + +async def test_cache_keyed_on_root_not_just_metadata(tmp_path: Path) -> None: + # two stores at different roots, identical metadata + path, different data. + # A correct cache (keyed on root) must return each store's own data. + s1 = await LocalStore.open(root=tmp_path / "s1") + s2 = await LocalStore.open(root=tmp_path / "s2") + a1 = zarr.create_array(store=s1, name="a", shape=(4, 4), chunks=(4, 4), dtype="uint16") + a1[:, :] = 1 + a2 = zarr.create_array(store=s2, name="a", shape=(4, 4), chunks=(4, 4), dtype="uint16") + a2[:, :] = 2 + meta = dict(a1.metadata.to_dict()) # identical metadata document + out1 = await decode_chunk(meta, s1, "a", (0, 0)) + out2 = await decode_chunk(meta, s2, "a", (0, 0)) + np.testing.assert_array_equal(out1, np.full((4, 4), 1, dtype="uint16")) + np.testing.assert_array_equal(out2, np.full((4, 4), 2, dtype="uint16")) + assert zb.array_cache_len() == 2 + + +async def test_cache_reflects_writes_through_store(tmp_path: Path) -> None: + # after the Array is cached, a write via the cached Array must be visible to + # a subsequent read (proves the cache does not stale-cache chunk data) + store = await LocalStore.open(root=tmp_path / "s") + meta = _meta(store) + await decode_chunk(meta, store, "a", (0, 0)) # caches the Array + new = np.full((4, 4), 99, dtype="uint16") + await encode_chunk(meta, store, "a", (0, 0), new) # write via (cached) Array + out = await decode_chunk(meta, store, "a", (0, 0)) + np.testing.assert_array_equal(out, new) diff --git a/zarrs-bindings/Cargo.lock b/zarrs-bindings/Cargo.lock index a86d4e26e2..9c9a91f203 100644 --- a/zarrs-bindings/Cargo.lock +++ b/zarrs-bindings/Cargo.lock @@ -468,6 +468,8 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ + "allocator-api2", + "equivalent", "foldhash 0.1.5", ] @@ -605,6 +607,15 @@ version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.5", +] + [[package]] name = "lru" version = "0.16.4" @@ -1585,7 +1596,7 @@ dependencies = [ "itoa", "libz-sys", "log", - "lru", + "lru 0.16.4", "moka", "ndarray", "num", @@ -1616,6 +1627,7 @@ dependencies = [ name = "zarrs-bindings" version = "0.1.0" dependencies = [ + "lru 0.12.5", "pyo3", "serde_json", "zarrs", diff --git a/zarrs-bindings/Cargo.toml b/zarrs-bindings/Cargo.toml index e0f381f416..ab06ef3517 100644 --- a/zarrs-bindings/Cargo.toml +++ b/zarrs-bindings/Cargo.toml @@ -12,6 +12,7 @@ name = "_zarrs_bindings" crate-type = ["cdylib"] [dependencies] +lru = "0.12" pyo3 = { version = "0.28", features = ["abi3-py312"] } serde_json = "1" zarrs = "0.23" diff --git a/zarrs-bindings/src/chunk.rs b/zarrs-bindings/src/chunk.rs index 30c0b04554..7d77f10512 100644 --- a/zarrs-bindings/src/chunk.rs +++ b/zarrs-bindings/src/chunk.rs @@ -1,3 +1,7 @@ +use std::num::NonZeroUsize; +use std::sync::{Arc, Mutex, OnceLock}; + +use lru::LruCache; use pyo3::exceptions::PyNotImplementedError; use pyo3::prelude::*; use pyo3::types::PyBytes; @@ -5,14 +9,22 @@ use zarrs::array::{Array, ArrayBytes, ArraySubset}; use zarrs::metadata::ArrayMetadata; use zarrs::storage::ReadableWritableListableStorage; -use crate::store::resolve_store; +use crate::store::resolve_store_with_key; use crate::{runtime_err, value_err}; type DynArray = Array; -/// Construct an Array view from an explicit metadata document, without -/// consulting the store for metadata. -fn array_view( +/// Cache of constructed Arrays keyed by (filesystem root, node path, metadata +/// JSON). Only native filesystem stores are cached (see `resolve_store_with_key`). +/// Bounded by an LRU; entries hold only a filesystem path + codec chain, no data. +type CacheKey = (String, String, String); +static ARRAY_CACHE: OnceLock>>> = OnceLock::new(); + +fn array_cache() -> &'static Mutex>> { + ARRAY_CACHE.get_or_init(|| Mutex::new(LruCache::new(NonZeroUsize::new(128).unwrap()))) +} + +fn build_array( storage: ReadableWritableListableStorage, path: &str, metadata_json: &str, @@ -21,6 +33,38 @@ fn array_view( Array::new_with_metadata(storage, path, metadata).map_err(value_err) } +/// Construct (or fetch from cache) an Array view from an explicit metadata +/// document, without consulting the store for metadata. When `cache_key` is +/// `Some(root)` the result is memoized on (root, path, metadata_json). +fn array_view( + storage: ReadableWritableListableStorage, + cache_key: Option, + path: &str, + metadata_json: &str, +) -> PyResult> { + if let Some(root) = cache_key { + let key = (root, path.to_string(), metadata_json.to_string()); + if let Some(array) = array_cache().lock().unwrap().get(&key).cloned() { + return Ok(array); + } + let array = Arc::new(build_array(storage, path, metadata_json)?); + array_cache().lock().unwrap().put(key, Arc::clone(&array)); + Ok(array) + } else { + Ok(Arc::new(build_array(storage, path, metadata_json)?)) + } +} + +#[pyfunction] +pub(crate) fn array_cache_len() -> usize { + array_cache().lock().unwrap().len() +} + +#[pyfunction] +pub(crate) fn clear_array_cache() { + array_cache().lock().unwrap().clear(); +} + #[pyfunction] pub(crate) fn retrieve_chunk( py: Python<'_>, @@ -29,9 +73,9 @@ pub(crate) fn retrieve_chunk( metadata_json: String, chunk_coords: Vec, ) -> PyResult> { - let storage = resolve_store(store)?; + let (storage, cache_key) = resolve_store_with_key(store)?; let data = py.detach(move || -> PyResult> { - let array = array_view(storage, &path, &metadata_json)?; + let array = array_view(storage, cache_key, &path, &metadata_json)?; let bytes: ArrayBytes<'static> = array.retrieve_chunk(&chunk_coords).map_err(runtime_err)?; let fixed = bytes.into_fixed().map_err(|_| { @@ -50,9 +94,9 @@ pub(crate) fn retrieve_encoded_chunk( metadata_json: String, chunk_coords: Vec, ) -> PyResult>> { - let storage = resolve_store(store)?; + let (storage, cache_key) = resolve_store_with_key(store)?; let data = py.detach(move || -> PyResult>> { - let array = array_view(storage, &path, &metadata_json)?; + let array = array_view(storage, cache_key, &path, &metadata_json)?; array .retrieve_encoded_chunk(&chunk_coords) .map_err(runtime_err) @@ -69,9 +113,9 @@ pub(crate) fn store_chunk( chunk_coords: Vec, data: Vec, ) -> PyResult<()> { - let storage = resolve_store(store)?; + let (storage, cache_key) = resolve_store_with_key(store)?; py.detach(move || { - let array = array_view(storage, &path, &metadata_json)?; + let array = array_view(storage, cache_key, &path, &metadata_json)?; array .store_chunk(&chunk_coords, ArrayBytes::new_flen(data)) .map_err(runtime_err) @@ -86,9 +130,9 @@ pub(crate) fn erase_chunk( metadata_json: String, chunk_coords: Vec, ) -> PyResult<()> { - let storage = resolve_store(store)?; + let (storage, cache_key) = resolve_store_with_key(store)?; py.detach(move || { - let array = array_view(storage, &path, &metadata_json)?; + let array = array_view(storage, cache_key, &path, &metadata_json)?; array.erase_chunk(&chunk_coords).map_err(runtime_err) }) } @@ -102,9 +146,9 @@ pub(crate) fn retrieve_array_subset( start: Vec, shape: Vec, ) -> PyResult> { - let storage = resolve_store(store)?; + let (storage, cache_key) = resolve_store_with_key(store)?; let data = py.detach(move || -> PyResult> { - let array = array_view(storage, &path, &metadata_json)?; + let array = array_view(storage, cache_key, &path, &metadata_json)?; let subset = ArraySubset::new_with_start_shape(start, shape).map_err(value_err)?; let bytes: ArrayBytes<'static> = array.retrieve_array_subset(&subset).map_err(runtime_err)?; diff --git a/zarrs-bindings/src/lib.rs b/zarrs-bindings/src/lib.rs index aa4552c897..61f947480f 100644 --- a/zarrs-bindings/src/lib.rs +++ b/zarrs-bindings/src/lib.rs @@ -46,5 +46,7 @@ fn _zarrs_bindings(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(chunk::store_chunk, m)?)?; m.add_function(wrap_pyfunction!(chunk::erase_chunk, m)?)?; m.add_function(wrap_pyfunction!(chunk::retrieve_array_subset, m)?)?; + m.add_function(wrap_pyfunction!(chunk::array_cache_len, m)?)?; + m.add_function(wrap_pyfunction!(chunk::clear_array_cache, m)?)?; Ok(()) } diff --git a/zarrs-bindings/src/store.rs b/zarrs-bindings/src/store.rs index 3ab8312061..c58de37387 100644 --- a/zarrs-bindings/src/store.rs +++ b/zarrs-bindings/src/store.rs @@ -200,17 +200,26 @@ impl ListableStorageTraits for PyStore { } } -/// Convert the Python-side store representation (`zarr.zarrs._bridge.resolve_store` -/// output) into a zarrs storage handle. -pub(crate) fn resolve_store(obj: &Bound<'_, PyAny>) -> PyResult { +/// Like `resolve_store`, but also returns a cache key for the constructed +/// storage: `Some(root)` for native filesystem stores (which are safe to key an +/// Array cache on), `None` for the generic Python-callback path (uncached). +pub(crate) fn resolve_store_with_key( + obj: &Bound<'_, PyAny>, +) -> PyResult<(ReadableWritableListableStorage, Option)> { if let Ok(config) = obj.cast::() { if let Some(root) = config.get_item("filesystem")? { let root: String = root.extract()?; let store = - FilesystemStore::new(root).map_err(|e| PyValueError::new_err(e.to_string()))?; - return Ok(Arc::new(store)); + FilesystemStore::new(&root).map_err(|e| PyValueError::new_err(e.to_string()))?; + return Ok((Arc::new(store), Some(root))); } return Err(PyValueError::new_err("unrecognized store configuration")); } - Ok(Arc::new(PyStore(obj.clone().unbind()))) + Ok((Arc::new(PyStore(obj.clone().unbind())), None)) +} + +/// Convert the Python-side store representation (`zarr.zarrs._bridge.resolve_store` +/// output) into a zarrs storage handle. +pub(crate) fn resolve_store(obj: &Bound<'_, PyAny>) -> PyResult { + Ok(resolve_store_with_key(obj)?.0) } From 983afbbf34c6436c8dc1ffb02024665cd7ae0b60 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 11:17:50 +0200 Subject: [PATCH 24/41] fix: recover from poisoned Array-cache mutex instead of wedging Replace all four `array_cache().lock().unwrap()` call sites with a `lock_cache()` helper that uses `.unwrap_or_else(|e| e.into_inner())`. If a thread panics while holding the mutex the lock is now recovered (worst case: a stale cache entry) rather than poisoning every subsequent lock call and wedging all array I/O permanently. Co-Authored-By: Claude Fable 5 --- zarrs-bindings/src/chunk.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/zarrs-bindings/src/chunk.rs b/zarrs-bindings/src/chunk.rs index 7d77f10512..556995924b 100644 --- a/zarrs-bindings/src/chunk.rs +++ b/zarrs-bindings/src/chunk.rs @@ -24,6 +24,13 @@ fn array_cache() -> &'static Mutex>> { ARRAY_CACHE.get_or_init(|| Mutex::new(LruCache::new(NonZeroUsize::new(128).unwrap()))) } +/// Acquire the array cache lock, recovering gracefully from a poisoned mutex +/// (e.g. a thread panicked while holding it). The worst case is a stale or +/// partially-updated cache entry — far preferable to wedging all array I/O. +fn lock_cache() -> std::sync::MutexGuard<'static, LruCache>> { + array_cache().lock().unwrap_or_else(|e| e.into_inner()) +} + fn build_array( storage: ReadableWritableListableStorage, path: &str, @@ -44,11 +51,11 @@ fn array_view( ) -> PyResult> { if let Some(root) = cache_key { let key = (root, path.to_string(), metadata_json.to_string()); - if let Some(array) = array_cache().lock().unwrap().get(&key).cloned() { + if let Some(array) = lock_cache().get(&key).cloned() { return Ok(array); } let array = Arc::new(build_array(storage, path, metadata_json)?); - array_cache().lock().unwrap().put(key, Arc::clone(&array)); + lock_cache().put(key, Arc::clone(&array)); Ok(array) } else { Ok(Arc::new(build_array(storage, path, metadata_json)?)) @@ -57,12 +64,12 @@ fn array_view( #[pyfunction] pub(crate) fn array_cache_len() -> usize { - array_cache().lock().unwrap().len() + lock_cache().len() } #[pyfunction] pub(crate) fn clear_array_cache() { - array_cache().lock().unwrap().clear(); + lock_cache().clear(); } #[pyfunction] From 0087a7e44ed6537c40fc6e8d1cc30a71dad5f734 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 11:19:04 +0200 Subject: [PATCH 25/41] docs: record Array construction cache in design spec Co-Authored-By: Claude Fable 5 --- .../2026-06-11-zarrs-functional-api-design.md | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md b/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md index bfafd59aae..0c7deb8eac 100644 --- a/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md +++ b/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md @@ -136,6 +136,30 @@ function calls a blocking Rust entry point via `asyncio.to_thread`; the Rust side releases the GIL during I/O and compute (reacquiring it only inside `PyStore` callbacks). zarrs's experimental async feature is not used. +## Array construction cache + +`Array::new_with_metadata` (serde-parsing the metadata document and building the +codec chain) is the dominant per-call cost on the native path — measured at +~20µs for a bytes-only array up to ~80µs for sharded+blosc, against single-digit +µs of actual chunk I/O on a warm filesystem. To amortize it across the common +"open one array, then do many chunk operations" pattern, the chunk/region +routines memoize the constructed `Array` in a process-global LRU cache +(capacity 128) keyed on `(filesystem root, node path, metadata JSON)`. + +This is safe because a zarrs `Array` caches no chunk data — it is metadata plus +codec chain plus a storage handle — so every read/write still goes through to +the store, and a correctly-keyed hit is behaviorally identical to a fresh build. +The key must include all three components: the same document at a different path +or store is a different array. Only native filesystem stores are cached; the +generic `PyStore` callback path has no stable cross-call identity to key on and +is left uncached (a future change may cache it if a store can supply a stable +value-based token). No invalidation hook is needed: delete/overwrite with +different metadata yields a different key, and an entry for a deleted-and-rebuilt +array with identical metadata stays valid because reads go through to the store. +A poisoned cache mutex is recovered rather than propagated, so the cache can +never wedge array I/O. Measured win: 14–20% faster per repeated call on a local +store, free on every hit. + ## Error handling The binding layer raises a small set of typed exceptions defined in one place: From cb87cae91dc3e8853b6283b43aee864e09643981 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 12:05:03 +0200 Subject: [PATCH 26/41] docs: design spec for backend-agnostic CRUD layer Co-Authored-By: Claude Fable 5 --- ...6-06-15-crud-backend-abstraction-design.md | 177 ++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md diff --git a/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md b/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md new file mode 100644 index 0000000000..02ab480913 --- /dev/null +++ b/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md @@ -0,0 +1,177 @@ +# Backend-agnostic CRUD layer for zarr-python + +Date: 2026-06-15 +Status: approved +Branch: `zarrs-bindings` + +## Goal + +Turn the low-level functional CRUD API (introduced as `zarr.zarrs` earlier on +this branch) into a backend-agnostic layer, with the Rust zarrs bindings as one +of several interchangeable implementations. Define the CRUD contract abstractly, +provide a pure-Python reference backend (no Rust required), and make the zarrs +bindings conform to the same contract. + +This validates the abstraction by having two real backends agree with each other +and with zarr-python, and it gives users a no-Rust fallback. + +Non-goals for this change (deliberately deferred): + +- Wiring the CRUD layer under zarr-python's own `Array`/`Group` classes. +- Entrypoint-based backend discovery (this change uses explicit import-time + registration). +- Region/selection features beyond what already exists (`encode_region`, + chunk-subset `selection` on `decode_chunk` remain future work, unchanged). + +## Background + +The current `zarr.zarrs._api` is a flat module of 13 async functions that +delegate to the `_zarrs_bindings` Rust extension. It already separates two +concerns that this design formalizes into a hard boundary: + +- **Backend-neutral glue:** `_normalize_selection`, `_array_shape`, + `_chunk_dtype_and_shape`, numpy assembly (`np.frombuffer`/reshape/strided + views), native-dtype coercion, options handling, error translation. +- **Genuinely zarrs-specific work:** producing/consuming raw chunk bytes, + reading array subsets as bytes, writing metadata documents — all via + `_zarrs_bindings` and the `_bridge.StoreShim`/`resolve_store` plumbing. + +The public surface (`zarr.zarrs.decode_region`, etc.) is unreleased on this +branch, so it can move without backward-compatibility constraints. + +zarr-python already contains everything a pure-Python backend needs: +`BatchedCodecPipeline` (`src/zarr/core/codec_pipeline.py`), `BasicIndexer` +(`src/zarr/core/indexing.py`), `save_metadata` (`src/zarr/core/metadata/io.py`), +metadata parsing (`ArrayV3Metadata.from_dict` / `ArrayV2Metadata.from_dict`), +and chunk-key encoding (`src/zarr/core/chunk_key_encodings.py`). + +## Architecture + +Two packages with a hard boundary. + +### `zarr.crud` (new, backend-neutral) + +- `_backend.py` — the `CrudBackend` `Protocol` (the narrow byte/metadata + contract below) plus the canonical exceptions. +- `_api.py` — the shared async facade: the 13 public functions moved out of + `zarr.zarrs`, holding all backend-neutral logic. Each function resolves a + backend (from the `backend` argument or the registry default) and calls its + byte/metadata methods, then does selection normalization, dtype handling, and + numpy assembly. +- `_reference.py` — `ReferenceBackend`, pure Python, wrapping zarr-python's own + codec/indexing/metadata machinery. Always importable; the default backend. +- `_registry.py` — `register_backend(name, backend)`, `get_backend(name)`, and + the config-driven default resolution. +- `__init__.py` — re-exports the facade functions, `CrudBackend`, + `ZarrsOptions`, the exceptions, and `register_backend`. + +### `zarr.zarrs` (shrinks to the zarrs provider) + +- `_backend.py` — `ZarrsBackend`, implementing `CrudBackend` by wrapping + `_zarrs_bindings`. Owns the zarrs-isms that move out of the facade: + `json.dumps` of the metadata dict, the `/`-prefixed zarrs node-path form + (formerly `_node_path`), `_bridge.resolve_store`, and translation of + `_zarrs_bindings` exceptions into the `zarr.crud` canonical exceptions. +- `_bridge.py` — unchanged (`StoreShim`, `resolve_store`). +- the Rust crate `zarrs-bindings/` and the construction cache — unchanged. +- registers itself as backend `"zarrs"` at import time. + +## The `CrudBackend` contract + +Narrow, byte/metadata level. Methods pass neutral types — the metadata document +as a `dict`, the zarr `Store`, and plain zarr paths (`""`, `"foo/bar"`) — and +return raw bytes / JSON-as-dict / `None`. Each backend serializes and bridges as +it needs. + +```python +class CrudBackend(Protocol): + async def create_array(self, store, path, metadata, *, overwrite: bool) -> None: ... + async def create_group(self, store, path, metadata, *, overwrite: bool) -> None: ... + async def read_metadata(self, store, path) -> dict[str, JSON]: ... + async def delete_node(self, store, path) -> None: ... + async def list_children(self, store, path) -> list[tuple[str, dict[str, JSON]]]: ... + async def retrieve_chunk(self, store, path, metadata, coords) -> bytes: ... + async def retrieve_encoded_chunk(self, store, path, metadata, coords) -> bytes | None: ... + async def retrieve_subset(self, store, path, metadata, start, shape) -> bytes: ... + async def store_chunk(self, store, path, metadata, coords, data: bytes) -> None: ... + async def erase_chunk(self, store, path, metadata, coords) -> None: ... +``` + +Byte conventions: `retrieve_chunk`/`retrieve_subset` return C-contiguous raw +bytes in the array's native byte order for the requested chunk / step-1 bounding +box; `store_chunk` takes the same. `retrieve_encoded_chunk` returns the raw +stored (still-encoded) chunk bytes or `None` if absent. `read_metadata`/ +`list_children` return parsed JSON documents as dicts. + +## Facade / backend split + +What stays in the `zarr.crud` facade (written once, backend-neutral): + +- selection normalization (`_normalize_selection`), shape/dtype resolution + (`_array_shape`, `_chunk_dtype_and_shape`), native-dtype coercion; +- numpy assembly: `np.frombuffer(...).reshape(...)` and the strided/reversed/ + integer-axis post-index views; read-only result guarantee; +- the empty-selection short circuit (no backend call); +- `ZarrsOptions` acceptance (still a placeholder) and backend resolution. + +What moves into each backend: + +- `ZarrsBackend`: `json.dumps`, the `/`-prefixed node-path form, + `resolve_store`, calling `_zarrs_bindings`, exception translation. +- `ReferenceBackend`: `ArrayV3Metadata.from_dict`/`ArrayV2Metadata.from_dict`, + building a `BatchedCodecPipeline` and `ChunkGrid`/`BasicIndexer`, assembling + `batch_info` and calling `codec_pipeline.read`/`write`, `save_metadata`, + `store.delete_dir`, and `list_dir` + per-child metadata reads. + +## Backend selection + +- A registry in `zarr.crud._registry`: `register_backend(name, backend)`, + `get_backend(name) -> CrudBackend`. +- A `zarr.config` key `crud.backend`, default `"reference"`. The pure-Python + backend always works and is predictable; `"zarrs"` opts into the accelerator + and is registered when `zarr.zarrs` is imported. +- Every facade function accepts `backend: CrudBackend | str | None = None`. + `None` → registry default; a string → registry lookup; an instance → used + directly. This enables side-by-side testing of backends. + +## Error handling + +`zarr.crud` defines the canonical exceptions: reuse +`zarr.errors.NodeNotFoundError`, and keep a `NodeExistsError` (exposed as +`zarr.crud.NodeExistsError`). Each backend raises these directly: + +- `ReferenceBackend` raises them at the point of detection. +- `ZarrsBackend` translates `_zarrs_bindings.NodeExistsError` / + `_zarrs_bindings.NodeNotFoundError` into the canonical types. + +The facade therefore no longer needs the `_translate_errors` shim. Phase-1 +fidelity limits (store-callback exceptions flattened to `RuntimeError` across the +Rust boundary) are unchanged for the zarrs backend; the reference backend +surfaces native exceptions directly. + +## Testing + +- Shared differential suite moves to `tests/crud/`, parametrized over + `backend ∈ {reference, zarrs}` × `store ∈ {memory, local}`. Each test writes + with zarr-python and reads through the facade (and vice versa), so the two + backends are checked against zarr-python *and*, transitively, against each + other. The zarrs-parametrized cases skip when `_zarrs_bindings` is not + installed (xdist-safe module-level `importorskip` in a zarrs-only conftest + helper, or a skip marker on the zarrs param). +- Zarrs-only tests stay in `tests/zarrs/`: the construction cache + (`test_cache.py`) and the store bridge (`test_bridge.py`). +- A focused `tests/crud/test_registry.py`: default resolution, `register_backend`, + string vs instance `backend=` override. +- `uv run --group zarrs pytest tests/crud tests/zarrs` is the full local check; + `uv run pytest tests/crud` (no zarrs group) must pass with the reference + backend alone and skip the zarrs params. + +## Migration notes + +- Move the 13 functions and the neutral helpers from `zarr.zarrs._api` into + `zarr.crud._api`; delete `zarr.zarrs._api`. No aliases in `zarr.zarrs`. +- `zarr.zarrs.__init__` exports only what is needed to register and identify the + zarrs backend (`ZarrsBackend`, and re-registers `"zarrs"` on import). +- The changelog fragment is updated to describe `zarr.crud` as the public CRUD + surface with pluggable backends, and `zarr.zarrs` as the zarrs backend. +- The CI job continues to build the crate and now runs `tests/crud tests/zarrs`. From 5ae3bb21a395387695cae9d7c9aed6434f56275d Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 12:13:06 +0200 Subject: [PATCH 27/41] docs: consistent CRUD verb set (create/read/write/delete/list) Co-Authored-By: Claude Fable 5 --- ...6-06-15-crud-backend-abstraction-design.md | 55 +++++++++++++++---- 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md b/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md index 02ab480913..2b4946a812 100644 --- a/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md +++ b/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md @@ -21,7 +21,7 @@ Non-goals for this change (deliberately deferred): - Entrypoint-based backend discovery (this change uses explicit import-time registration). - Region/selection features beyond what already exists (`encode_region`, - chunk-subset `selection` on `decode_chunk` remain future work, unchanged). + chunk-subset `selection` on `read_chunk` remain future work, unchanged). ## Background @@ -88,21 +88,51 @@ class CrudBackend(Protocol): async def create_array(self, store, path, metadata, *, overwrite: bool) -> None: ... async def create_group(self, store, path, metadata, *, overwrite: bool) -> None: ... async def read_metadata(self, store, path) -> dict[str, JSON]: ... + async def read_chunk(self, store, path, metadata, coords) -> bytes: ... + async def read_encoded_chunk(self, store, path, metadata, coords) -> bytes | None: ... + async def read_subset(self, store, path, metadata, start, shape) -> bytes: ... + async def write_chunk(self, store, path, metadata, coords, data: bytes) -> None: ... + async def delete_chunk(self, store, path, metadata, coords) -> None: ... async def delete_node(self, store, path) -> None: ... async def list_children(self, store, path) -> list[tuple[str, dict[str, JSON]]]: ... - async def retrieve_chunk(self, store, path, metadata, coords) -> bytes: ... - async def retrieve_encoded_chunk(self, store, path, metadata, coords) -> bytes | None: ... - async def retrieve_subset(self, store, path, metadata, start, shape) -> bytes: ... - async def store_chunk(self, store, path, metadata, coords, data: bytes) -> None: ... - async def erase_chunk(self, store, path, metadata, coords) -> None: ... ``` -Byte conventions: `retrieve_chunk`/`retrieve_subset` return C-contiguous raw -bytes in the array's native byte order for the requested chunk / step-1 bounding -box; `store_chunk` takes the same. `retrieve_encoded_chunk` returns the raw -stored (still-encoded) chunk bytes or `None` if absent. `read_metadata`/ +Byte conventions: `read_chunk`/`read_subset` return C-contiguous raw bytes in the +array's native byte order for the requested chunk / step-1 bounding box; +`write_chunk` takes the same. `read_encoded_chunk` returns the raw stored +(still-encoded) chunk bytes or `None` if absent. `read_metadata`/ `list_children` return parsed JSON documents as dicts. +## Method naming + +Both the public facade and the backend contract use a single, consistent verb +set: **create / read / write / delete / list**. No `decode`/`encode`/`retrieve`/ +`store`/`erase` synonyms. + +Public facade (`zarr.crud`): + +| Function | Verb | Notes | +|---|---|---| +| `create_new_group` / `create_overwrite_group` | create | node lifecycle | +| `create_new_array` / `create_overwrite_array` | create | node lifecycle | +| `read_metadata` | read | array or group document | +| `read_chunk` | read | decoded chunk → `ndarray` | +| `read_encoded_chunk` | read | raw stored bytes, no decode | +| `read_region` | read | numpy basic-indexing selection → `ndarray` | +| `write_chunk` | write | encode + store a chunk | +| `delete_chunk` | delete | remove one chunk | +| `delete_node` | delete | remove a node + descendants | +| `list_children` | list | direct children of a group | + +Facade → backend mapping for the byte-level methods: `read_chunk` → +`backend.read_chunk`, `read_encoded_chunk` → `backend.read_encoded_chunk`, +`read_region` → `backend.read_subset` (the facade normalizes the selection to a +step-1 bounding box `(start, shape)`), `write_chunk` → `backend.write_chunk`, +`delete_chunk` → `backend.delete_chunk`. The two distinct names `read_region` +(selection-based, public) and `read_subset` (bounding-box bytes, backend) are +intentional: they have different signatures and the facade is the adapter +between them. + ## Facade / backend split What stays in the `zarr.crud` facade (written once, backend-neutral): @@ -170,6 +200,11 @@ surfaces native exceptions directly. - Move the 13 functions and the neutral helpers from `zarr.zarrs._api` into `zarr.crud._api`; delete `zarr.zarrs._api`. No aliases in `zarr.zarrs`. +- Rename to the consistent verb set in the move (no compatibility aliases, since + the surface is unreleased): `decode_chunk` → `read_chunk`, `decode_region` → + `read_region`, `encode_chunk` → `write_chunk`, `erase_chunk` → `delete_chunk`. + `read_metadata`, `read_encoded_chunk`, `delete_node`, `list_children`, and the + `create_*` functions keep their names. - `zarr.zarrs.__init__` exports only what is needed to register and identify the zarrs backend (`ZarrsBackend`, and re-registers `"zarrs"` on import). - The changelog fragment is updated to describe `zarr.crud` as the public CRUD From c7af3053af347857118fb4cf3ceb0c2962c97a86 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 12:19:13 +0200 Subject: [PATCH 28/41] docs: simplify CRUD reads to two non-overlapping addressing axes read_chunk stays parameter-free (chunk-grid addressed, whole chunk); all selection-based reads route through read_region/read_subset (array-coordinate, spans chunks). Drops the deferred chunk-subset selection parameter. Co-Authored-By: Claude Fable 5 --- ...6-06-15-crud-backend-abstraction-design.md | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md b/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md index 2b4946a812..349c77fa73 100644 --- a/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md +++ b/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md @@ -20,8 +20,7 @@ Non-goals for this change (deliberately deferred): - Wiring the CRUD layer under zarr-python's own `Array`/`Group` classes. - Entrypoint-based backend discovery (this change uses explicit import-time registration). -- Region/selection features beyond what already exists (`encode_region`, - chunk-subset `selection` on `read_chunk` remain future work, unchanged). +- A write-side region operation (`write_region`) remains future work. ## Background @@ -103,6 +102,29 @@ array's native byte order for the requested chunk / step-1 bounding box; (still-encoded) chunk bytes or `None` if absent. `read_metadata`/ `list_children` return parsed JSON documents as dicts. +### Two read-addressing axes (no overlap) + +Reads are addressed in one of two coordinate spaces, and the two never overlap: + +- **Chunk-grid coordinates** — `read_chunk(coords)` / `read_encoded_chunk(coords)` + return a whole chunk addressed by its grid position. `read_chunk` returns the + *full* chunk shape, including the fill-padded overhang of edge chunks; + `read_encoded_chunk` returns the raw stored bytes. These pair with + `write_chunk` / `delete_chunk`, which are also chunk-grid-addressed. +- **Array-element coordinates** — `read_subset(start, shape)` returns an + arbitrary box in array space, which generally spans multiple chunks and is + clipped to the array bounds. The facade's `read_region(selection)` normalizes + a numpy selection to a step-1 bounding box and calls it. + +`read_chunk` takes no `selection` parameter. A sub-region *within* a single +chunk is simply a `read_region` whose bounding box lies inside one chunk; the +backend already decodes only the overlapping chunk(s) (sharding-aware in the +zarrs backend), so a chunk-relative partial-read needs no separate API. The +`Store.get(key, byte_range=)` analogue is therefore `read_region` over a +single-chunk box, not a parameter on `read_chunk`; `read_subset` itself has no +single-`get` analogue — it is closer to "`get_partial_values` across many keys, +stitched into one array." + ## Method naming Both the public facade and the backend contract use a single, consistent verb From 3740888d2ccf97c369aea5ccf4710cd79118015a Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 12:29:15 +0200 Subject: [PATCH 29/41] docs: make read_encoded_chunk a facade store.get helper, not a backend method Co-Authored-By: Claude Fable 5 --- ...6-06-15-crud-backend-abstraction-design.md | 57 +++++++++++++------ 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md b/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md index 349c77fa73..bf8043513b 100644 --- a/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md +++ b/docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md @@ -88,7 +88,6 @@ class CrudBackend(Protocol): async def create_group(self, store, path, metadata, *, overwrite: bool) -> None: ... async def read_metadata(self, store, path) -> dict[str, JSON]: ... async def read_chunk(self, store, path, metadata, coords) -> bytes: ... - async def read_encoded_chunk(self, store, path, metadata, coords) -> bytes | None: ... async def read_subset(self, store, path, metadata, start, shape) -> bytes: ... async def write_chunk(self, store, path, metadata, coords, data: bytes) -> None: ... async def delete_chunk(self, store, path, metadata, coords) -> None: ... @@ -96,21 +95,24 @@ class CrudBackend(Protocol): async def list_children(self, store, path) -> list[tuple[str, dict[str, JSON]]]: ... ``` +Nine methods. `read_encoded_chunk` is deliberately **not** a backend method — +see below; it is a backend-independent facade helper over `store.get`. + Byte conventions: `read_chunk`/`read_subset` return C-contiguous raw bytes in the array's native byte order for the requested chunk / step-1 bounding box; -`write_chunk` takes the same. `read_encoded_chunk` returns the raw stored -(still-encoded) chunk bytes or `None` if absent. `read_metadata`/ -`list_children` return parsed JSON documents as dicts. +`write_chunk` takes the same. `read_metadata`/`list_children` return parsed JSON +documents as dicts. ### Two read-addressing axes (no overlap) Reads are addressed in one of two coordinate spaces, and the two never overlap: - **Chunk-grid coordinates** — `read_chunk(coords)` / `read_encoded_chunk(coords)` - return a whole chunk addressed by its grid position. `read_chunk` returns the - *full* chunk shape, including the fill-padded overhang of edge chunks; - `read_encoded_chunk` returns the raw stored bytes. These pair with - `write_chunk` / `delete_chunk`, which are also chunk-grid-addressed. + return a whole chunk addressed by its grid position. `read_chunk` (a backend + method) decodes and returns the *full* chunk shape, including the fill-padded + overhang of edge chunks; `read_encoded_chunk` (a facade helper, not a backend + method) returns the raw stored bytes or `None`. These pair with `write_chunk` / + `delete_chunk`, which are chunk-grid-addressed backend methods. - **Array-element coordinates** — `read_subset(start, shape)` returns an arbitrary box in array space, which generally spans multiple chunks and is clipped to the array bounds. The facade's `read_region(selection)` normalizes @@ -125,6 +127,27 @@ single-chunk box, not a parameter on `read_chunk`; `read_subset` itself has no single-`get` analogue — it is closer to "`get_partial_values` across many keys, stitched into one array." +### `read_encoded_chunk` is facade-level, not a backend method + +Reading a chunk's raw stored bytes is just `store.get(chunk_key)`, and the chunk +key is computable from the metadata document alone via zarr-python's +`chunk_key_encoding` — no decoding, no codec pipeline, nothing backend-specific. +Both backends would implement it identically. So the facade implements +`read_encoded_chunk(store, path, metadata, coords)` directly as: encode the chunk +key from the metadata, `store.get` it, return the bytes or `None`. It works the +same regardless of which backend (or none) is selected, which is correct since it +is pure store I/O. Under sharding the chunk key holds the whole shard blob, and +this returns exactly that raw object. + +This raw read can also be *expressed* through `read_chunk` by supplying a view +metadata document (`data_type: uint8` + a single `bytes` codec, identity decode) +— a nice demonstration that the read-only-view mechanism is general — but that +route requires knowing the encoded byte length up front to set the chunk shape (a +`store.getsize` round-trip) and would synthesize a fill-valued array for a missing +chunk instead of returning `None`. So `store.get` is the correct implementation +for fetching stored bytes; the view trick is the general tool for *reinterpreting* +decoded data under a different dtype/shape, which `read_chunk` already supports. + ## Method naming Both the public facade and the backend contract use a single, consistent verb @@ -139,7 +162,7 @@ Public facade (`zarr.crud`): | `create_new_array` / `create_overwrite_array` | create | node lifecycle | | `read_metadata` | read | array or group document | | `read_chunk` | read | decoded chunk → `ndarray` | -| `read_encoded_chunk` | read | raw stored bytes, no decode | +| `read_encoded_chunk` | read | raw stored bytes, no decode (facade-only, `store.get`) | | `read_region` | read | numpy basic-indexing selection → `ndarray` | | `write_chunk` | write | encode + store a chunk | | `delete_chunk` | delete | remove one chunk | @@ -147,13 +170,13 @@ Public facade (`zarr.crud`): | `list_children` | list | direct children of a group | Facade → backend mapping for the byte-level methods: `read_chunk` → -`backend.read_chunk`, `read_encoded_chunk` → `backend.read_encoded_chunk`, -`read_region` → `backend.read_subset` (the facade normalizes the selection to a -step-1 bounding box `(start, shape)`), `write_chunk` → `backend.write_chunk`, -`delete_chunk` → `backend.delete_chunk`. The two distinct names `read_region` -(selection-based, public) and `read_subset` (bounding-box bytes, backend) are -intentional: they have different signatures and the facade is the adapter -between them. +`backend.read_chunk`, `read_region` → `backend.read_subset` (the facade +normalizes the selection to a step-1 bounding box `(start, shape)`), +`write_chunk` → `backend.write_chunk`, `delete_chunk` → `backend.delete_chunk`. +`read_encoded_chunk` maps to no backend method — the facade serves it from +`store.get`. The two distinct names `read_region` (selection-based, public) and +`read_subset` (bounding-box bytes, backend) are intentional: they have different +signatures and the facade is the adapter between them. ## Facade / backend split @@ -164,6 +187,8 @@ What stays in the `zarr.crud` facade (written once, backend-neutral): - numpy assembly: `np.frombuffer(...).reshape(...)` and the strided/reversed/ integer-axis post-index views; read-only result guarantee; - the empty-selection short circuit (no backend call); +- `read_encoded_chunk`: encode the chunk key from the metadata and `store.get` + it (no backend involved); - `ZarrsOptions` acceptance (still a placeholder) and backend resolution. What moves into each backend: From a9cdae6fb5103fde95ea1a73da77a090ab54c4ec Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 13:38:37 +0200 Subject: [PATCH 30/41] docs: implementation plan for backend-agnostic CRUD layer Co-Authored-By: Claude Fable 5 --- .../2026-06-15-crud-backend-abstraction.md | 1698 +++++++++++++++++ 1 file changed, 1698 insertions(+) create mode 100644 docs/superpowers/plans/2026-06-15-crud-backend-abstraction.md diff --git a/docs/superpowers/plans/2026-06-15-crud-backend-abstraction.md b/docs/superpowers/plans/2026-06-15-crud-backend-abstraction.md new file mode 100644 index 0000000000..985f325990 --- /dev/null +++ b/docs/superpowers/plans/2026-06-15-crud-backend-abstraction.md @@ -0,0 +1,1698 @@ +# Backend-agnostic CRUD layer Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Turn the low-level functional CRUD API into a backend-agnostic `zarr.crud` package with a pure-Python reference backend and the existing zarrs bindings as a second, interchangeable backend. + +**Architecture:** A narrow async `CrudBackend` protocol (byte/metadata level) plus a shared `zarr.crud` facade that holds all backend-neutral logic (selection normalization, numpy assembly, dtype handling, `read_encoded_chunk` via `store.get`). Two backends conform: `ReferenceBackend` (pure Python, wraps zarr-python's own codec pipeline / indexer / metadata machinery) and `ZarrsBackend` (wraps `_zarrs_bindings`). A registry + `zarr.config` key `crud.backend` (default `"reference"`) selects one; every facade function also takes `backend=`. + +**Tech Stack:** Python 3.12+, numpy, zarr-python internals (`BatchedCodecPipeline`, `AsyncArray`, `save_metadata`, `ArrayConfig`/`ArraySpec`, chunk-key encoding), the existing `_zarrs_bindings` Rust extension (unchanged — no Rust build needed). + +Spec: `docs/superpowers/specs/2026-06-15-crud-backend-abstraction-design.md`. + +--- + +## Environment notes (read first) + +- **Run python/pytest/mypy via `uv run`.** The zarrs backend needs the extension: `uv run --group zarrs pytest ...`. The reference backend works under plain `uv run pytest ...`. +- The Claude Code bash sandbox is broken on this host (`bwrap: loopback` error). Run commands with the sandbox **disabled**. +- **No Rust changes in this plan.** The `_zarrs_bindings` pyfunctions keep their existing names (`retrieve_chunk`, `store_chunk`, `erase_chunk`, `retrieve_array_subset`, `retrieve_encoded_chunk`, `create_array`, `create_group`, `read_metadata`, `delete_node`, `list_children`); `ZarrsBackend` adapts them to the contract's verb names. No `cargo` build or `uv sync --reinstall` is required, but the `zarrs` group must already be installed (`uv sync --group zarrs`) to run the zarrs-parametrized tests. +- Pre-commit hooks (ruff strict, mypy strict over `src`+`tests`, codespell) run on `git commit`. If a hook rewrites a file, `git add` and commit again. +- Docstrings use markdown (single backticks), not RST. +- pytest is configured with `asyncio_mode = "auto"` — async tests/fixtures need no decorator. + +## File structure + +``` +src/zarr/crud/ + __init__.py # public exports; registers the reference backend at import + _backend.py # CrudBackend Protocol + NodeExistsError + _registry.py # register_backend / get_backend + config default resolution + _reference.py # ReferenceBackend (pure Python) + _api.py # shared async facade (the public functions) + neutral helpers +src/zarr/zarrs/ + __init__.py # SHRINKS: version + register ZarrsBackend; no _api re-exports + _backend.py # ZarrsBackend (wraps _zarrs_bindings) — NEW + _bridge.py # unchanged + _api.py # DELETED +src/zarr/core/config.py # add "crud": {"backend": "reference"} +tests/crud/ + __init__.py + conftest.py # store fixture, backend fixture (reference+zarrs), metadata helpers + test_registry.py # registry + default + override + test_reference_backend.py # direct reference-backend smoke tests + test_crud.py # full differential suite, parametrized over backend x store +tests/zarrs/ + __init__.py # unchanged + conftest.py # unchanged (still used by test_bridge/test_cache) + test_bridge.py # unchanged + test_cache.py # imports updated to zarr.crud read_chunk/write_chunk, backend="zarrs" + test_node.py # DELETED (covered by tests/crud/test_crud.py) + test_chunk.py # DELETED (covered by tests/crud/test_crud.py) + test_api.py # DELETED (replaced by tests/crud import coverage) +changes/+zarrs-bindings.feature.md # reworded for zarr.crud +.github/workflows/zarrs.yml # run tests/crud tests/zarrs +``` + +--- + +### Task 1: `zarr.crud` skeleton — protocol, exceptions, registry, config + +**Files:** +- Create: `src/zarr/crud/__init__.py` +- Create: `src/zarr/crud/_backend.py` +- Create: `src/zarr/crud/_registry.py` +- Modify: `src/zarr/core/config.py` +- Create: `tests/crud/__init__.py` (empty) +- Test: `tests/crud/test_registry.py` + +- [ ] **Step 1: Write the failing test** — `tests/crud/test_registry.py` + +```python +from __future__ import annotations + +import pytest + +from zarr.crud import CrudBackend, NodeExistsError, get_backend, register_backend + + +def test_node_exists_error_is_value_error() -> None: + assert issubclass(NodeExistsError, ValueError) + + +def test_default_backend_is_reference() -> None: + # the reference backend is registered at import and is the configured default + be = get_backend() + assert be is get_backend("reference") + + +def test_get_unknown_backend_raises() -> None: + with pytest.raises(KeyError, match="no CRUD backend"): + get_backend("does-not-exist") + + +def test_register_and_resolve_instance() -> None: + class Dummy: + pass + + dummy = Dummy() + register_backend("dummy-test", dummy) # type: ignore[arg-type] + try: + assert get_backend("dummy-test") is dummy + finally: + from zarr.crud import _registry + + _registry._BACKENDS.pop("dummy-test", None) + + +def test_protocol_is_runtime_checkable() -> None: + # ReferenceBackend (registered as "reference") structurally satisfies the protocol + assert isinstance(get_backend("reference"), CrudBackend) +``` + +- [ ] **Step 2: Run it to verify failure** + +Run: `uv run pytest tests/crud/test_registry.py -v` +Expected: FAIL — `ModuleNotFoundError: No module named 'zarr.crud'` + +- [ ] **Step 3: Create `tests/crud/__init__.py`** (empty file) + +- [ ] **Step 4: Create `src/zarr/crud/_backend.py`** + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING, Protocol, runtime_checkable + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + + from zarr.abc.store import Store + from zarr.core.common import JSON + + +class NodeExistsError(ValueError): + """Raised when a node already exists at a path and overwrite was not requested.""" + + +@runtime_checkable +class CrudBackend(Protocol): + """The byte/metadata-level contract a CRUD backend must implement. + + Methods take neutral types: the metadata document as a `dict`, a zarr + `Store`, and plain zarr paths (`""`, `"foo/bar"`). They return raw bytes, + parsed JSON documents, or `None`. The shared `zarr.crud` facade builds the + numpy- and selection-level API on top of these. + + `create_*` raise `zarr.crud.NodeExistsError` when a node exists and + `overwrite` is false. `read_metadata`/`delete_node`/`list_children` raise + `zarr.errors.NodeNotFoundError` when the target is missing. + """ + + async def create_array( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: ... + + async def create_group( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: ... + + async def read_metadata(self, store: Store, path: str) -> dict[str, JSON]: ... + + async def read_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> bytes: ... + + async def read_subset( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + start: Sequence[int], + shape: Sequence[int], + ) -> bytes: ... + + async def write_chunk( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + coords: tuple[int, ...], + data: bytes, + ) -> None: ... + + async def delete_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> None: ... + + async def delete_node(self, store: Store, path: str) -> None: ... + + async def list_children( + self, store: Store, path: str + ) -> list[tuple[str, dict[str, JSON]]]: ... +``` + +- [ ] **Step 5: Create `src/zarr/crud/_registry.py`** + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING + +from zarr.core.config import config + +if TYPE_CHECKING: + from zarr.crud._backend import CrudBackend + +_BACKENDS: dict[str, CrudBackend] = {} + + +def register_backend(name: str, backend: CrudBackend) -> None: + """Register a CRUD backend instance under `name`.""" + _BACKENDS[name] = backend + + +def get_backend(name: str | None = None) -> CrudBackend: + """Resolve a backend by name, or the configured default when `name` is None. + + Selecting `"zarrs"` imports `zarr.zarrs` if needed so it can self-register. + """ + if name is None: + name = config.get("crud.backend") + if name not in _BACKENDS and name == "zarrs": + import zarr.zarrs # noqa: F401 (import registers the zarrs backend) + if name not in _BACKENDS: + raise KeyError( + f"no CRUD backend registered as {name!r}; registered: {sorted(_BACKENDS)}" + ) + return _BACKENDS[name] +``` + +- [ ] **Step 6: Create `src/zarr/crud/__init__.py`** (reference backend is added in Task 2; for now register nothing) + +```python +""" +Backend-agnostic low-level functional CRUD API for zarr hierarchies. + +The public functions delegate byte- and metadata-level work to a `CrudBackend`. +Two backends ship: a pure-Python reference backend (the default) and a +zarrs-accelerated backend (`zarr.zarrs`, requires the `zarrs-bindings` +extension). Select one with the `crud.backend` config key or a per-call +`backend=` argument. + +Array routines take an explicit metadata document (a `dict` matching the +`zarr.json` / `.zarray` document) rather than reading it from the store, which +makes read-only and virtual views possible. +""" + +from zarr.crud._backend import CrudBackend, NodeExistsError +from zarr.crud._registry import get_backend, register_backend + +__all__ = [ + "CrudBackend", + "NodeExistsError", + "get_backend", + "register_backend", +] +``` + +- [ ] **Step 7: Add the config default** — `src/zarr/core/config.py` + +Find the defaults mapping passed to the `Config(...)` constructor (it contains the `"codec_pipeline"` key). Add a sibling entry: + +```python + "crud": {"backend": "reference"}, +``` + +Run to confirm it loads: `uv run python -c "from zarr.core.config import config; print(config.get('crud.backend'))"` +Expected: `reference` + +- [ ] **Step 8: Run the test (note: `test_default_backend_is_reference` and the protocol test still fail — reference backend arrives in Task 2)** + +Run: `uv run pytest tests/crud/test_registry.py -v` +Expected: `test_node_exists_error_is_value_error`, `test_get_unknown_backend_raises`, `test_register_and_resolve_instance` PASS; `test_default_backend_is_reference` and `test_protocol_is_runtime_checkable` FAIL (KeyError: no backend `reference`). That is expected at this task boundary; they pass after Task 2. + +- [ ] **Step 9: Commit** + +```bash +git add src/zarr/crud/_backend.py src/zarr/crud/_registry.py src/zarr/crud/__init__.py src/zarr/core/config.py tests/crud/__init__.py tests/crud/test_registry.py +git commit -m "feat: zarr.crud skeleton — CrudBackend protocol, registry, config" +``` + +End every commit body in this plan with: +``` +Co-Authored-By: Claude Fable 5 +``` + +--- + +### Task 2: `ReferenceBackend` (pure Python) + +**Files:** +- Create: `src/zarr/crud/_reference.py` +- Modify: `src/zarr/crud/__init__.py` +- Test: `tests/crud/test_reference_backend.py` + +All snippets below are verified against the installed zarr-python. + +- [ ] **Step 1: Write the failing test** — `tests/crud/test_reference_backend.py` + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np + +import zarr +from zarr.crud import NodeExistsError, get_backend +from zarr.errors import NodeNotFoundError +from zarr.storage import MemoryStore + +if TYPE_CHECKING: + pass + +import pytest + + +def _array_meta() -> dict: + arr = zarr.create_array(store=MemoryStore(), shape=(8, 8), chunks=(4, 4), dtype="uint16") + return dict(arr.metadata.to_dict()) + + +async def test_reference_round_trip_chunk() -> None: + be = get_backend("reference") + store = MemoryStore() + meta = _array_meta() + await be.create_array(store, "a", meta, overwrite=False) + value = np.arange(16, dtype="uint16").reshape(4, 4) + await be.write_chunk(store, "a", meta, (0, 1), value.tobytes()) + raw = await be.read_chunk(store, "a", meta, (0, 1)) + np.testing.assert_array_equal(np.frombuffer(raw, dtype="uint16").reshape(4, 4), value) + + +async def test_reference_read_subset_spans_chunks() -> None: + be = get_backend("reference") + store = MemoryStore() + arr = zarr.create_array(store=store, name="a", shape=(8, 8), chunks=(4, 4), dtype="uint16") + data = np.arange(64, dtype="uint16").reshape(8, 8) + arr[:, :] = data + meta = dict(arr.metadata.to_dict()) + raw = await be.read_subset(store, "a", meta, (2, 1), (5, 4)) + np.testing.assert_array_equal( + np.frombuffer(raw, dtype="uint16").reshape(5, 4), data[2:7, 1:5] + ) + + +async def test_reference_create_exists_raises() -> None: + be = get_backend("reference") + store = MemoryStore() + meta = _array_meta() + await be.create_array(store, "a", meta, overwrite=False) + with pytest.raises(NodeExistsError): + await be.create_array(store, "a", meta, overwrite=False) + + +async def test_reference_read_metadata_missing_raises() -> None: + be = get_backend("reference") + with pytest.raises(NodeNotFoundError): + await be.read_metadata(MemoryStore(), "nope") +``` + +- [ ] **Step 2: Run it to verify failure** + +Run: `uv run pytest tests/crud/test_reference_backend.py -v` +Expected: FAIL — `KeyError: no CRUD backend registered as 'reference'` + +- [ ] **Step 3: Create `src/zarr/crud/_reference.py`** + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import numpy as np + +from zarr.core.array import AsyncArray, create_codec_pipeline +from zarr.core.array_spec import ArrayConfig, ArraySpec +from zarr.core.buffer.core import NDBuffer, default_buffer_prototype +from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON +from zarr.core.group import GroupMetadata +from zarr.core.metadata.io import save_metadata +from zarr.core.metadata.v2 import ArrayV2Metadata +from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGridMetadata +from zarr.crud._backend import NodeExistsError +from zarr.errors import NodeNotFoundError +from zarr.storage._common import StorePath + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + + from zarr.abc.store import Store + from zarr.core.common import JSON + + +def _parse_array_metadata( + metadata: Mapping[str, JSON], +) -> ArrayV3Metadata | ArrayV2Metadata: + """Parse a metadata document into a v2 or v3 array metadata object.""" + data = dict(metadata) + if data.get("zarr_format") == 3: + return ArrayV3Metadata.from_dict(data) + return ArrayV2Metadata.from_dict(data) + + +def _native_dtype(meta_obj: ArrayV3Metadata | ArrayV2Metadata) -> np.dtype[Any]: + """Numpy dtype in native byte order (zarrs and the facade assume native).""" + return meta_obj.dtype.to_native_dtype().newbyteorder("=") + + +def _chunk_shape(meta_obj: ArrayV3Metadata | ArrayV2Metadata) -> tuple[int, ...]: + if isinstance(meta_obj, ArrayV3Metadata): + grid = meta_obj.chunk_grid + if not isinstance(grid, RegularChunkGridMetadata): + raise NotImplementedError("only regular chunk grids are supported") + return tuple(grid.chunk_shape) + return tuple(meta_obj.chunks) + + +def _array_spec( + meta_obj: ArrayV3Metadata | ArrayV2Metadata, shape: tuple[int, ...] +) -> ArraySpec: + return ArraySpec( + shape=shape, + dtype=meta_obj.dtype, + fill_value=meta_obj.fill_value, + config=ArrayConfig.from_dict({}), + prototype=default_buffer_prototype(), + ) + + +def _meta_key(path: str, zarr_format: int) -> str: + fname = ZARR_JSON if zarr_format == 3 else ZARRAY_JSON + p = path.strip("/") + return f"{p}/{fname}" if p else fname + + +class ReferenceBackend: + """Pure-Python CRUD backend wrapping zarr-python's own machinery. + + Constructs no high-level `Array` for chunk operations (it drives the codec + pipeline directly); it does reuse `AsyncArray.getitem` for multi-chunk + subset reads, which is exactly the `BasicIndexer` + codec-pipeline read path. + """ + + async def create_array( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: + meta_obj = _parse_array_metadata(metadata) + await self._create(store, path, meta_obj, meta_obj.zarr_format, overwrite=overwrite) + + async def create_group( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: + meta_obj = GroupMetadata.from_dict(dict(metadata)) + await self._create(store, path, meta_obj, meta_obj.zarr_format, overwrite=overwrite) + + async def _create( + self, store: Store, path: str, meta_obj: Any, zarr_format: int, *, overwrite: bool + ) -> None: + sp = StorePath(store, path.strip("/")) + proto = default_buffer_prototype() + if overwrite: + await store.delete_dir(path.strip("/")) + else: + key = _meta_key(path, zarr_format) + if await store.get(key, prototype=proto) is not None: + raise NodeExistsError(f"a node already exists at path {path!r}") + await save_metadata(sp, meta_obj, ensure_parents=True) + + async def read_metadata(self, store: Store, path: str) -> dict[str, JSON]: + from zarr.core._json import buffer_to_json_object + + proto = default_buffer_prototype() + p = path.strip("/") + sp = StorePath(store, p) + buf = await (sp / ZARR_JSON).get(prototype=proto) + if buf is not None: + return buffer_to_json_object(buf) + buf2 = await (sp / ZARRAY_JSON).get(prototype=proto) + if buf2 is not None: + doc = buffer_to_json_object(buf2) + zattrs = await (sp / ZATTRS_JSON).get(prototype=proto) + if zattrs is not None: + doc["attributes"] = buffer_to_json_object(zattrs) + return doc + raise NodeNotFoundError(f"no node found at path {path!r}") + + async def read_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> bytes: + meta_obj = _parse_array_metadata(metadata) + shape = _chunk_shape(meta_obj) + np_dtype = _native_dtype(meta_obj) + sp = StorePath(store, path.strip("/")) + chunk_key = meta_obj.encode_chunk_key(coords) + buf = await (sp / chunk_key).get(prototype=default_buffer_prototype()) + if buf is None: + arr = np.full(shape, meta_obj.fill_value, dtype=np_dtype) + else: + pipeline = create_codec_pipeline(meta_obj) + spec = _array_spec(meta_obj, shape) + decoded = list(await pipeline.decode_batch([(buf, spec)])) + arr = np.asarray(decoded[0].as_numpy_array(), dtype=np_dtype) + return np.ascontiguousarray(arr).tobytes() + + async def read_subset( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + start: Sequence[int], + shape: Sequence[int], + ) -> bytes: + meta_obj = _parse_array_metadata(metadata) + np_dtype = _native_dtype(meta_obj) + async_arr = AsyncArray(metadata=meta_obj, store_path=StorePath(store, path.strip("/"))) + selection = tuple(slice(s, s + length) for s, length in zip(start, shape, strict=True)) + result = await async_arr.getitem(selection) + return np.ascontiguousarray(np.asarray(result, dtype=np_dtype)).tobytes() + + async def write_chunk( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + coords: tuple[int, ...], + data: bytes, + ) -> None: + meta_obj = _parse_array_metadata(metadata) + shape = _chunk_shape(meta_obj) + np_dtype = _native_dtype(meta_obj) + sp = StorePath(store, path.strip("/")) + chunk_key = meta_obj.encode_chunk_key(coords) + arr = np.frombuffer(data, dtype=np_dtype).reshape(shape) + pipeline = create_codec_pipeline(meta_obj) + spec = _array_spec(meta_obj, shape) + encoded = list(await pipeline.encode_batch([(NDBuffer.from_ndarray_like(arr), spec)])) + buf = encoded[0] + if buf is None: + await (sp / chunk_key).delete() + else: + await (sp / chunk_key).set(buf) + + async def delete_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> None: + meta_obj = _parse_array_metadata(metadata) + sp = StorePath(store, path.strip("/")) + await (sp / meta_obj.encode_chunk_key(coords)).delete() + + async def delete_node(self, store: Store, path: str) -> None: + proto = default_buffer_prototype() + p = path.strip("/") + sp = StorePath(store, p) + present = ( + await (sp / ZARR_JSON).get(prototype=proto) is not None + or await (sp / ZARRAY_JSON).get(prototype=proto) is not None + ) + if not present: + raise NodeNotFoundError(f"no node found at path {path!r}") + await store.delete_dir(p) + + async def list_children( + self, store: Store, path: str + ) -> list[tuple[str, dict[str, JSON]]]: + proto = default_buffer_prototype() + p = path.strip("/") + sp = StorePath(store, p) + if ( + await (sp / ZARR_JSON).get(prototype=proto) is None + and await (sp / ZARRAY_JSON).get(prototype=proto) is None + ): + raise NodeNotFoundError(f"no node found at path {path!r}") + prefix = f"{p}/" if p else "" + children: list[tuple[str, dict[str, JSON]]] = [] + async for name in store.list_dir(prefix): + child_path = f"{p}/{name}" if p else name + child_sp = StorePath(store, child_path) + if ( + await (child_sp / ZARR_JSON).get(prototype=proto) is not None + or await (child_sp / ZARRAY_JSON).get(prototype=proto) is not None + ): + children.append((name, await self.read_metadata(store, child_path))) + return children +``` + +Notes for the implementer: +- `decode_batch`/`encode_batch` are async and return iterables — wrap in `list(...)`. +- `ArraySpec.dtype` is the `ZDType` object (`meta_obj.dtype`), **not** a numpy dtype. +- `_native_dtype` byte-swaps to native order so both backends return identical + bytes through the facade (the facade reads them with a native dtype). +- `AsyncArray(metadata=meta_obj, store_path=...)` constructs from an explicit + document without reading the store. + +- [ ] **Step 4: Register the reference backend** — append to `src/zarr/crud/__init__.py` (after the imports, before `__all__`) + +```python +from zarr.crud._reference import ReferenceBackend + +register_backend("reference", ReferenceBackend()) +``` + +and add `"ReferenceBackend"` to `__all__`. + +- [ ] **Step 5: Run the tests** + +Run: `uv run pytest tests/crud/test_reference_backend.py tests/crud/test_registry.py -v` +Expected: all PASS (the two previously-failing registry tests now pass too). + +- [ ] **Step 6: Commit** + +```bash +git add src/zarr/crud/_reference.py src/zarr/crud/__init__.py tests/crud/test_reference_backend.py +git commit -m "feat: pure-Python ReferenceBackend for zarr.crud" +``` + +--- + +### Task 3: shared facade `zarr.crud._api` + differential suite (reference backend) + +**Files:** +- Create: `src/zarr/crud/_api.py` +- Modify: `src/zarr/crud/__init__.py` +- Create: `tests/crud/conftest.py` +- Test: `tests/crud/test_crud.py` + +- [ ] **Step 1: Create `tests/crud/conftest.py`** + +```python +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import numpy as np +import pytest + +import zarr +from zarr.storage import LocalStore, MemoryStore + +if TYPE_CHECKING: + from collections.abc import AsyncIterator + from pathlib import Path + + from zarr.abc.store import Store + + +def _zarrs_available() -> bool: + try: + import _zarrs_bindings # noqa: F401 + except ImportError: + return False + return True + + +@pytest.fixture( + params=[ + "reference", + pytest.param( + "zarrs", + marks=pytest.mark.skipif( + not _zarrs_available(), reason="zarrs-bindings is not installed" + ), + ), + ] +) +def backend(request: pytest.FixtureRequest) -> str: + """A CRUD backend name. The zarrs param is skipped when the extension is absent.""" + import zarr.crud # noqa: F401 (ensures reference is registered) + + if request.param == "zarrs": + import zarr.zarrs # noqa: F401 (registers the zarrs backend) + return request.param + + +@pytest.fixture(params=["memory", "local"]) +async def store(request: pytest.FixtureRequest, tmp_path: Path) -> AsyncIterator[Store]: + if request.param == "memory": + s: Store = await MemoryStore.open() + else: + s = await LocalStore.open(root=tmp_path / "store") + try: + yield s + finally: + s.close() + + +def array_metadata(**kwargs: Any) -> dict[str, Any]: + """An array metadata document built via zarr-python itself.""" + params: dict[str, Any] = { + "shape": (8, 8), + "chunks": (4, 4), + "dtype": "uint16", + "zarr_format": 3, + } | kwargs + arr = zarr.create_array(store=MemoryStore(), **params) + doc = dict(arr.metadata.to_dict()) + if params["zarr_format"] == 2: + doc.pop("attributes", None) + return doc + + +def filled(store: Store, **kwargs: Any) -> tuple[np.ndarray[Any, np.dtype[Any]], dict[str, Any]]: + """Create an 8x8 array 'a', fill it with a ramp, return (data, metadata).""" + params: dict[str, Any] = {"shape": (8, 8), "chunks": (4, 4), "dtype": "uint16"} | kwargs + arr = zarr.create_array(store=store, name="a", **params) + data = np.arange(64, dtype=params["dtype"]).reshape(8, 8) + arr[:, :] = data + doc = dict(arr.metadata.to_dict()) + if params.get("zarr_format") == 2: + doc.pop("attributes", None) + return data, doc +``` + +- [ ] **Step 2: Write the failing test** — `tests/crud/test_crud.py` + +```python +from __future__ import annotations + +import copy +import json +from typing import TYPE_CHECKING, Any + +import numpy as np +import pytest + +import zarr +from tests.crud.conftest import array_metadata, filled +from zarr.codecs import BloscCodec, GzipCodec, ZstdCodec +from zarr.core.buffer.core import default_buffer_prototype +from zarr.crud import ( + NodeExistsError, + create_new_array, + create_new_group, + create_overwrite_array, + create_overwrite_group, + delete_chunk, + delete_node, + list_children, + read_chunk, + read_encoded_chunk, + read_metadata, + read_region, + write_chunk, +) +from zarr.errors import NodeNotFoundError + +if TYPE_CHECKING: + from zarr.abc.store import Store + +GROUP_META: dict[str, Any] = {"zarr_format": 3, "node_type": "group", "attributes": {"answer": 42}} + + +# --- node lifecycle --- + +async def test_create_new_group(backend: str, store: Store) -> None: + await create_new_group(GROUP_META, store, "foo", backend=backend) + assert dict(zarr.open_group(store=store, path="foo", mode="r").attrs) == {"answer": 42} + + +async def test_create_new_group_existing_raises(backend: str, store: Store) -> None: + await create_new_group(GROUP_META, store, "foo", backend=backend) + with pytest.raises(NodeExistsError): + await create_new_group(GROUP_META, store, "foo", backend=backend) + + +async def test_create_overwrite_group_replaces_array(backend: str, store: Store) -> None: + arr = zarr.create_array(store=store, name="foo", shape=(4,), chunks=(2,), dtype="uint8") + arr[:] = 1 + await create_overwrite_group(GROUP_META, store, "foo", backend=backend) + assert dict(zarr.open_group(store=store, path="foo", mode="r").attrs) == {"answer": 42} + assert not await store.exists("foo/c/0") + + +async def test_create_new_array(backend: str, store: Store) -> None: + await create_new_array(array_metadata(), store, "arr", backend=backend) + a = zarr.open_array(store=store, path="arr", mode="r") + assert a.shape == (8, 8) + assert a.dtype == np.dtype("uint16") + + +async def test_create_new_array_v2(backend: str, store: Store) -> None: + await create_new_array(array_metadata(zarr_format=2), store, "arr", backend=backend) + assert zarr.open_array(store=store, path="arr", mode="r").metadata.zarr_format == 2 + + +async def test_create_overwrite_array(backend: str, store: Store) -> None: + zarr.create_group(store=store, path="arr") + await create_overwrite_array(array_metadata(), store, "arr", backend=backend) + assert zarr.open_array(store=store, path="arr", mode="r").shape == (8, 8) + + +async def test_read_metadata(backend: str, store: Store) -> None: + await create_new_array(array_metadata(), store, "arr", backend=backend) + observed = await read_metadata(store, "arr", backend=backend) + raw = await store.get("arr/zarr.json", prototype=default_buffer_prototype()) + assert raw is not None + assert observed == json.loads(raw.to_bytes()) + + +async def test_read_metadata_missing(backend: str, store: Store) -> None: + with pytest.raises(NodeNotFoundError): + await read_metadata(store, "nope", backend=backend) + + +async def test_delete_node(backend: str, store: Store) -> None: + arr = zarr.create_array(store=store, name="doomed", shape=(4,), chunks=(2,), dtype="uint8") + arr[:] = 1 + await delete_node(store, "doomed", backend=backend) + assert not await store.exists("doomed/zarr.json") + assert not await store.exists("doomed/c/0") + + +async def test_delete_node_missing(backend: str, store: Store) -> None: + with pytest.raises(NodeNotFoundError): + await delete_node(store, "nope", backend=backend) + + +async def test_list_children(backend: str, store: Store) -> None: + root = zarr.create_group(store=store) + root.create_group("sub_group", attributes={"kind": "group"}) + root.create_array("sub_array", shape=(4,), chunks=(2,), dtype="uint8") + by_path = dict(await list_children(store, "", backend=backend)) + assert set(by_path) == {"sub_group", "sub_array"} + assert by_path["sub_group"]["node_type"] == "group" + assert by_path["sub_array"]["node_type"] == "array" + assert not any(p.startswith("/") for p in by_path) + + +# --- chunk I/O --- + +@pytest.mark.parametrize("dtype", ["uint8", "int32", "float64", "u2"]) +async def test_read_chunk_differential(backend: str, store: Store, dtype: str) -> None: + data, meta = filled(store, dtype=dtype) + observed = await read_chunk(meta, store, "a", (1, 0), backend=backend) + np.testing.assert_array_equal(observed, data[4:8, 0:4]) + + +@pytest.mark.parametrize( + "compressors", [None, (GzipCodec(),), (ZstdCodec(),), (BloscCodec(cname="lz4"),)] +) +async def test_read_chunk_codecs(backend: str, store: Store, compressors: Any) -> None: + data, meta = filled(store, compressors=compressors) + observed = await read_chunk(meta, store, "a", (0, 1), backend=backend) + np.testing.assert_array_equal(observed, data[0:4, 4:8]) + + +async def test_read_chunk_v2(backend: str, store: Store) -> None: + data, meta = filled(store, dtype=" None: + data, meta = filled(store, chunks=(2, 2), shards=(4, 4)) + observed = await read_chunk(meta, store, "a", (1, 1), backend=backend) + np.testing.assert_array_equal(observed, data[4:8, 4:8]) + + +async def test_read_chunk_missing_is_fill(backend: str, store: Store) -> None: + arr = zarr.create_array( + store=store, name="a", shape=(8, 8), chunks=(4, 4), dtype="uint16", fill_value=7 + ) + meta = dict(arr.metadata.to_dict()) + observed = await read_chunk(meta, store, "a", (0, 0), backend=backend) + np.testing.assert_array_equal(observed, np.full((4, 4), 7, dtype="uint16")) + + +async def test_read_chunk_metadata_view(backend: str, store: Store) -> None: + data, meta = filled(store, dtype="uint16", compressors=None) + view = copy.deepcopy(meta) + view["data_type"] = "uint8" + view["shape"] = [8, 16] + view["chunk_grid"]["configuration"]["chunk_shape"] = [4, 8] + observed = await read_chunk(view, store, "a", (1, 0), backend=backend) + np.testing.assert_array_equal(observed, data[4:8, 0:4].view("uint8")) + + +async def test_read_chunk_readonly(backend: str, store: Store) -> None: + _, meta = filled(store) + observed = await read_chunk(meta, store, "a", (0, 0), backend=backend) + assert not observed.flags.writeable + + +async def test_write_chunk_differential(backend: str, store: Store) -> None: + meta = array_metadata() + await create_new_array(meta, store, "a", backend=backend) + value = np.arange(16, dtype="uint16").reshape(4, 4) + await write_chunk(meta, store, "a", (0, 1), value, backend=backend) + np.testing.assert_array_equal(zarr.open_array(store=store, path="a", mode="r")[0:4, 4:8], value) + + +async def test_write_chunk_shape_mismatch(backend: str, store: Store) -> None: + meta = array_metadata() + await create_new_array(meta, store, "a", backend=backend) + with pytest.raises(ValueError, match="chunk shape"): + await write_chunk(meta, store, "a", (0, 0), np.zeros((2, 2), dtype="uint16"), backend=backend) + + +async def test_delete_chunk(backend: str, store: Store) -> None: + data, meta = filled(store) + assert await store.exists("a/c/0/0") + await delete_chunk(meta, store, "a", (0, 0), backend=backend) + assert not await store.exists("a/c/0/0") + + +async def test_read_encoded_chunk_matches_store(backend: str, store: Store) -> None: + _, meta = filled(store) + raw = await read_encoded_chunk(meta, store, "a", (0, 0), backend=backend) + expected = await store.get("a/c/0/0", prototype=default_buffer_prototype()) + assert expected is not None + assert raw == expected.to_bytes() + + +async def test_read_encoded_chunk_missing_is_none(backend: str, store: Store) -> None: + arr = zarr.create_array(store=store, name="e", shape=(8, 8), chunks=(4, 4), dtype="uint16") + meta = dict(arr.metadata.to_dict()) + assert await read_encoded_chunk(meta, store, "e", (0, 0), backend=backend) is None + + +# --- region I/O --- + +SELECTIONS: list[Any] = [ + (slice(None), slice(None)), + (slice(2, 7), slice(1, 5)), + (slice(None), 3), + (5, slice(None)), + (3, 4), + (slice(1, 8, 2), slice(None)), + (slice(None), slice(6, 1, -2)), + (slice(-3, None), slice(None, -1)), + ..., + (..., slice(2, 4)), + (slice(0, 0), slice(None)), + (slice(2, 6),), +] + + +@pytest.mark.parametrize("sel", SELECTIONS) +async def test_read_region_differential(backend: str, store: Store, sel: Any) -> None: + data, meta = filled(store) + observed = await read_region(meta, store, "a", sel, backend=backend) + np.testing.assert_array_equal(observed, data[sel]) + + +async def test_read_region_sharding(backend: str, store: Store) -> None: + data, meta = filled(store, chunks=(2, 2), shards=(4, 4)) + observed = await read_region(meta, store, "a", (slice(1, 7), slice(3, 8)), backend=backend) + np.testing.assert_array_equal(observed, data[1:7, 3:8]) + + +async def test_read_region_too_many_indices(backend: str, store: Store) -> None: + _, meta = filled(store) + with pytest.raises(IndexError, match="too many indices"): + await read_region(meta, store, "a", (0, 0, 0), backend=backend) + + +async def test_read_region_fancy_rejected(backend: str, store: Store) -> None: + _, meta = filled(store) + with pytest.raises(TypeError, match="only integers, slices"): + await read_region(meta, store, "a", ([0, 1], slice(None)), backend=backend) # type: ignore[arg-type] +``` + +- [ ] **Step 3: Run it to verify failure** + +Run: `uv run pytest tests/crud/test_crud.py -q` +Expected: collection error — `ImportError: cannot import name 'read_chunk' from 'zarr.crud'` + +- [ ] **Step 4: Create `src/zarr/crud/_api.py`** + +```python +from __future__ import annotations + +import operator +import types +from collections.abc import Sequence +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, cast + +import numpy as np + +from zarr.core.buffer.core import default_buffer_prototype +from zarr.crud._registry import get_backend + +if TYPE_CHECKING: + from collections.abc import Mapping + + import numpy.typing as npt + + from zarr.abc.store import Store + from zarr.core.common import JSON + from zarr.core.metadata.v2 import ArrayV2Metadata + from zarr.core.metadata.v3 import ArrayV3Metadata + from zarr.crud._backend import CrudBackend + + +@dataclass(frozen=True, slots=True) +class CrudOptions: + """Options for CRUD operations. + + Currently empty: fields (concurrency limits, checksum validation) arrive in + a later phase. Accepting it now keeps signatures stable. + """ + + +BasicIndex = int | slice | types.EllipsisType +BasicSelection = BasicIndex | tuple[BasicIndex, ...] + + +def _resolve_backend(backend: CrudBackend | str | None) -> CrudBackend: + if backend is None or isinstance(backend, str): + return get_backend(backend) + return backend + + +def _parse_array_metadata( + metadata: Mapping[str, JSON], +) -> ArrayV3Metadata | ArrayV2Metadata: + from zarr.core.metadata.v2 import ArrayV2Metadata + from zarr.core.metadata.v3 import ArrayV3Metadata + + data = dict(metadata) + if data.get("zarr_format") == 3: + return ArrayV3Metadata.from_dict(data) + return ArrayV2Metadata.from_dict(data) + + +def _chunk_dtype_and_shape( + metadata: Mapping[str, JSON], +) -> tuple[np.dtype[Any], tuple[int, ...]]: + """Resolve native-byte-order numpy dtype and regular chunk shape. + + Backends decode to (and encode from) the native in-memory representation, + applying any byte-order codec themselves, so the dtype is coerced to native. + """ + from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGridMetadata + + meta_obj = _parse_array_metadata(metadata) + if isinstance(meta_obj, ArrayV3Metadata): + grid = meta_obj.chunk_grid + if not isinstance(grid, RegularChunkGridMetadata): + raise NotImplementedError("only regular chunk grids are supported") + chunk_shape = tuple(grid.chunk_shape) + else: + chunk_shape = tuple(meta_obj.chunks) + return meta_obj.dtype.to_native_dtype().newbyteorder("="), chunk_shape + + +def _array_shape(metadata: Mapping[str, JSON]) -> tuple[int, ...]: + shape = metadata.get("shape") + if not isinstance(shape, Sequence) or isinstance(shape, str): + raise TypeError("metadata document has no valid 'shape'") + result: list[int] = [] + for s in shape: + if not isinstance(s, (int, float)): + raise TypeError(f"shape element {s!r} is not a number") + if isinstance(s, float) and not s.is_integer(): + raise TypeError(f"shape element {s!r} is not an integer") + result.append(int(s)) + return tuple(result) + + +def _chunk_key(metadata: Mapping[str, JSON], path: str, coords: tuple[int, ...]) -> str: + meta_obj = _parse_array_metadata(metadata) + rel = meta_obj.encode_chunk_key(coords) + p = path.strip("/") + return f"{p}/{rel}" if p else rel + + +def _normalize_selection( + selection: BasicSelection, shape: tuple[int, ...] +) -> tuple[list[int], list[int], tuple[slice | int, ...]]: + """Normalize a numpy basic-indexing selection to a step-1 bounding box. + + Returns `(start, bounding_shape, post_index)`: the box to fetch and the + numpy index to apply to it (strides, reversals, integer-axis removal). Only + integers, slices, and `Ellipsis` are supported; fancy indexing raises. + """ + sel_tuple = selection if isinstance(selection, tuple) else (selection,) + + n_ellipsis = sum(1 for s in sel_tuple if s is Ellipsis) + if n_ellipsis > 1: + raise IndexError("an index can only have a single ellipsis ('...')") + if n_ellipsis == 1: + i = sel_tuple.index(Ellipsis) + n_fill = len(shape) - (len(sel_tuple) - 1) + if n_fill < 0: + raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional") + sel_tuple = sel_tuple[:i] + (slice(None),) * n_fill + sel_tuple[i + 1 :] + if len(sel_tuple) > len(shape): + raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional") + sel_tuple = sel_tuple + (slice(None),) * (len(shape) - len(sel_tuple)) + + starts: list[int] = [] + lengths: list[int] = [] + post: list[slice | int] = [] + for dim, (sel, size) in enumerate(zip(sel_tuple, shape, strict=True)): + if isinstance(sel, slice): + start, stop, step = sel.indices(size) + n = len(range(start, stop, step)) + if n == 0: + starts.append(0) + lengths.append(0) + post.append(slice(None)) + elif step > 0: + last = start + (n - 1) * step + starts.append(start) + lengths.append(last - start + 1) + post.append(slice(None, None, step)) + else: + last = start + (n - 1) * step + starts.append(last) + lengths.append(start - last + 1) + post.append(slice(None, None, step)) + else: + assert not isinstance(sel, types.EllipsisType), "Ellipsis already expanded above" + try: + idx = operator.index(sel) + except TypeError: + raise TypeError( + "unsupported selection element " + f"{sel!r}: only integers, slices, and Ellipsis are supported" + ) from None + if idx < 0: + idx += size + if not 0 <= idx < size: + raise IndexError(f"index {sel} is out of bounds for axis {dim} with size {size}") + starts.append(idx) + lengths.append(1) + post.append(0) + return starts, lengths, tuple(post) + + +# --- node lifecycle --- + +async def create_new_group( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Create a group from a group metadata document. Raises `NodeExistsError` + if a node already exists at `path`. Not atomic against concurrent writers.""" + await _resolve_backend(backend).create_group(store, path, metadata, overwrite=False) + + +async def create_overwrite_group( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Create a group, deleting any existing node (and children) first. Not + atomic against concurrent writers.""" + await _resolve_backend(backend).create_group(store, path, metadata, overwrite=True) + + +async def create_new_array( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Create an array from a v2 or v3 metadata document. Raises + `NodeExistsError` if a node already exists. Not atomic against concurrent + writers.""" + await _resolve_backend(backend).create_array(store, path, metadata, overwrite=False) + + +async def create_overwrite_array( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Create an array, deleting any existing node (and children) first. Not + atomic against concurrent writers.""" + await _resolve_backend(backend).create_array(store, path, metadata, overwrite=True) + + +async def read_metadata( + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> dict[str, JSON]: + """Read the metadata document of the array or group at `path`. Raises + `zarr.errors.NodeNotFoundError` if no node exists there.""" + return await _resolve_backend(backend).read_metadata(store, path) + + +async def delete_node( + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Delete the node at `path` and everything under it. Raises + `zarr.errors.NodeNotFoundError` if absent. `path=""` clears the store.""" + await _resolve_backend(backend).delete_node(store, path) + + +async def list_children( + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> list[tuple[str, dict[str, JSON]]]: + """List the direct children of the group at `path` as + `(path, metadata_document)` pairs (store-relative, no leading `/`). Raises + `zarr.errors.NodeNotFoundError` if no group exists there.""" + return await _resolve_backend(backend).list_children(store, path) + + +# --- chunk I/O --- + +async def read_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> np.ndarray[Any, np.dtype[Any]]: + """Read and decode the whole chunk at `chunk_coords`. The metadata document + is authoritative; missing chunks decode to the fill value. The result is a + read-only view (`.copy()` for a writable array).""" + be = _resolve_backend(backend) + raw = await be.read_chunk(store, path, metadata, tuple(chunk_coords)) + dtype, chunk_shape = _chunk_dtype_and_shape(metadata) + return np.frombuffer(raw, dtype=dtype).reshape(chunk_shape) + + +async def read_encoded_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> bytes | None: + """Read the raw, still-encoded bytes of the chunk at `chunk_coords`, or + `None` if absent. Pure store I/O (`store.get` on the chunk key): the + `backend` argument is accepted for signature uniformity but unused.""" + key = _chunk_key(metadata, path, tuple(chunk_coords)) + buf = await store.get(key, prototype=default_buffer_prototype()) + return None if buf is None else buf.to_bytes() + + +async def write_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + value: npt.ArrayLike, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Encode `value` with the codecs in `metadata` and store it as the chunk at + `chunk_coords`. `value` must match the chunk shape exactly.""" + be = _resolve_backend(backend) + dtype, chunk_shape = _chunk_dtype_and_shape(metadata) + arr = np.ascontiguousarray(np.asarray(value, dtype=dtype)) + if arr.shape != chunk_shape: + raise ValueError(f"value shape {arr.shape} does not match chunk shape {chunk_shape}") + await be.write_chunk(store, path, metadata, tuple(chunk_coords), arr.tobytes()) + + +async def delete_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Delete the chunk at `chunk_coords`. Deleting a missing chunk is a no-op.""" + await _resolve_backend(backend).delete_chunk(store, path, metadata, tuple(chunk_coords)) + + +# --- region I/O --- + +async def read_region( + metadata: Mapping[str, JSON], + store: Store, + path: str, + selection: BasicSelection, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> np.ndarray[Any, np.dtype[Any]]: + """Read and decode a region given by a numpy basic-indexing `selection` + (integers, slices with steps, `Ellipsis`). One backend call fetches the + step-1 bounding box; strides/reversals/integer-axis removal are applied as + numpy views. Missing chunks decode to the fill value. Fancy indexing raises + `TypeError`. The result is a read-only view. + + Note: a `slice(0, N, step)` reads `O(N)` bytes even though `O(N / step)` are + returned; for sparse selections over large arrays prefer `read_chunk`.""" + be = _resolve_backend(backend) + dtype, _ = _chunk_dtype_and_shape(metadata) + shape = _array_shape(metadata) + starts, lengths, post_index = _normalize_selection(selection, shape) + if 0 in lengths: + block = np.empty(lengths, dtype=dtype) + block.flags.writeable = False + else: + raw = await be.read_subset(store, path, metadata, tuple(starts), tuple(lengths)) + block = np.frombuffer(raw, dtype=dtype).reshape(lengths) + return cast("np.ndarray[Any, np.dtype[Any]]", block[post_index]) +``` + +Note: `BackendArg` is a documentation alias only; use the literal +`CrudBackend | str | None` annotations as written above. + +- [ ] **Step 5: Export the facade from `src/zarr/crud/__init__.py`** + +Add to the imports and `__all__` (keep `__all__` sorted): + +```python +from zarr.crud._api import ( + CrudOptions, + create_new_array, + create_new_group, + create_overwrite_array, + create_overwrite_group, + delete_chunk, + delete_node, + list_children, + read_chunk, + read_encoded_chunk, + read_metadata, + read_region, + write_chunk, +) +``` + +Final `__all__`: + +```python +__all__ = [ + "CrudBackend", + "CrudOptions", + "NodeExistsError", + "ReferenceBackend", + "create_new_array", + "create_new_group", + "create_overwrite_array", + "create_overwrite_group", + "delete_chunk", + "delete_node", + "get_backend", + "list_children", + "read_chunk", + "read_encoded_chunk", + "read_metadata", + "read_region", + "register_backend", + "write_chunk", +] +``` + +- [ ] **Step 6: Run the suite against the reference backend** + +Run: `uv run pytest tests/crud/test_crud.py -q` +Expected: all PASS. The `backend` fixture's `zarrs` param is skipped (no `--group zarrs`), so every test runs once on `reference` × {memory, local}. If `test_read_chunk_differential[>u2-...]` fails, the byte-order coercion in `_reference._native_dtype` / `_chunk_dtype_and_shape` is wrong — both must end in `.newbyteorder("=")`; do not weaken the assertion. + +- [ ] **Step 7: Commit** + +```bash +git add src/zarr/crud/_api.py src/zarr/crud/__init__.py tests/crud/conftest.py tests/crud/test_crud.py +git commit -m "feat: zarr.crud shared facade + differential suite (reference backend)" +``` + +--- + +### Task 4: `ZarrsBackend` + shrink `zarr.zarrs` + migrate zarrs tests + +**Files:** +- Create: `src/zarr/zarrs/_backend.py` +- Modify: `src/zarr/zarrs/__init__.py` +- Delete: `src/zarr/zarrs/_api.py` +- Delete: `tests/zarrs/test_node.py`, `tests/zarrs/test_chunk.py`, `tests/zarrs/test_api.py` +- Modify: `tests/zarrs/test_cache.py` + +- [ ] **Step 1: Create `src/zarr/zarrs/_backend.py`** + +```python +from __future__ import annotations + +import asyncio +import json +from contextlib import contextmanager +from typing import TYPE_CHECKING, cast + +import _zarrs_bindings as _zb + +from zarr.crud import NodeExistsError +from zarr.errors import NodeNotFoundError +from zarr.zarrs._bridge import resolve_store + +if TYPE_CHECKING: + from collections.abc import Iterator, Mapping, Sequence + + from zarr.abc.store import Store + from zarr.core.common import JSON + + +def _node_path(path: str) -> str: + """Convert a zarr path (`""`, `"foo/bar"`) to a zarrs node path (`"/"`, + `"/foo/bar"`).""" + return f"/{path.strip('/')}" + + +@contextmanager +def _translate_errors() -> Iterator[None]: + try: + yield + except _zb.NodeNotFoundError as err: + raise NodeNotFoundError(str(err)) from err + except _zb.NodeExistsError as err: + raise NodeExistsError(str(err)) from err + + +class ZarrsBackend: + """CRUD backend backed by the Rust `zarrs` crate via `_zarrs_bindings`. + + Owns the zarrs-specific plumbing: JSON-serializing the metadata document, + the `/`-prefixed node-path form, store resolution, offloading the blocking + Rust calls to a worker thread, and translating binding exceptions to the + canonical `zarr.crud` / `zarr.errors` types. + """ + + async def create_array( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: + with _translate_errors(): + await asyncio.to_thread( + _zb.create_array, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + overwrite, + ) + + async def create_group( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: + with _translate_errors(): + await asyncio.to_thread( + _zb.create_group, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + overwrite, + ) + + async def read_metadata(self, store: Store, path: str) -> dict[str, JSON]: + with _translate_errors(): + raw = await asyncio.to_thread(_zb.read_metadata, resolve_store(store), _node_path(path)) + return cast("dict[str, JSON]", json.loads(raw)) + + async def read_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> bytes: + return await asyncio.to_thread( + _zb.retrieve_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(coords), + ) + + async def read_subset( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + start: Sequence[int], + shape: Sequence[int], + ) -> bytes: + return await asyncio.to_thread( + _zb.retrieve_array_subset, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(start), + list(shape), + ) + + async def write_chunk( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + coords: tuple[int, ...], + data: bytes, + ) -> None: + await asyncio.to_thread( + _zb.store_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(coords), + data, + ) + + async def delete_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> None: + await asyncio.to_thread( + _zb.erase_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(coords), + ) + + async def delete_node(self, store: Store, path: str) -> None: + with _translate_errors(): + await asyncio.to_thread(_zb.delete_node, resolve_store(store), _node_path(path)) + + async def list_children( + self, store: Store, path: str + ) -> list[tuple[str, dict[str, JSON]]]: + with _translate_errors(): + raw: list[tuple[str, str]] = await asyncio.to_thread( + _zb.list_children, resolve_store(store), _node_path(path) + ) + return [ + (child_path.lstrip("/"), cast("dict[str, JSON]", json.loads(doc))) + for child_path, doc in raw + ] +``` + +- [ ] **Step 2: Rewrite `src/zarr/zarrs/__init__.py`** + +```python +""" +The zarrs CRUD backend for `zarr.crud`, backed by the Rust +[`zarrs`](https://zarrs.dev) crate. + +Importing this module registers the `"zarrs"` backend. Requires the +`zarrs-bindings` extension (in-repo Rust crate; `uv sync --group zarrs`). Select +it with `zarr.config.set({"crud.backend": "zarrs"})` or per call via +`backend="zarrs"`. +""" + +try: + import _zarrs_bindings +except ImportError as e: + raise ImportError( + "zarr.zarrs requires the `zarrs-bindings` package, which is not installed. " + "It is built from the zarr-python repository: run `uv sync --group zarrs`." + ) from e + +from zarr.crud import register_backend +from zarr.zarrs._backend import ZarrsBackend + +__version__: str = _zarrs_bindings.version() + +register_backend("zarrs", ZarrsBackend()) + +__all__ = ["ZarrsBackend", "__version__"] +``` + +- [ ] **Step 3: Delete the moved module and obsolete tests** + +```bash +git rm src/zarr/zarrs/_api.py tests/zarrs/test_node.py tests/zarrs/test_chunk.py tests/zarrs/test_api.py +``` + +- [ ] **Step 4: Update `tests/zarrs/test_cache.py`** — change imports from the old `zarr.zarrs` functions to the `zarr.crud` facade with the zarrs backend. + +Replace the import block: + +```python +from zarr.zarrs import decode_chunk, encode_chunk +``` + +with: + +```python +from zarr.crud import read_chunk, write_chunk +``` + +Then in that file replace every `decode_chunk(` call with `read_chunk(` and every `encode_chunk(` call with `write_chunk(`, adding `backend="zarrs"` as the final keyword argument to each so they exercise the cached zarrs path. For example: + +```python + await read_chunk(meta, store, "a", (0, 0), backend="zarrs") +... + await write_chunk(meta, store, "a", (0, 0), new, backend="zarrs") +``` + +The cache assertions (`zb.array_cache_len()` / `zb.clear_array_cache()`) and the `import _zarrs_bindings as zb` line are unchanged. The module-level `pytest.importorskip("_zarrs_bindings", ...)` stays. + +- [ ] **Step 5: Add the zarrs param coverage — already wired** + +`tests/crud/conftest.py` already parametrizes `backend` over `["reference", "zarrs"]` with the zarrs case skipped when the extension is missing. No change needed; running with `--group zarrs` now exercises it. + +- [ ] **Step 6: Run everything with the zarrs extension** + +Run: `uv run --group zarrs pytest tests/crud tests/zarrs -q` +Expected: all PASS. `tests/crud/test_crud.py` now runs each test on both `reference` and `zarrs` × {memory, local}; `tests/zarrs/test_cache.py` and `test_bridge.py` pass. If a differential test passes on `reference` but fails on `zarrs` (or vice versa), the two backends disagree — investigate the backend, never weaken the assertion. + +- [ ] **Step 7: Run without the extension (reference-only path stays green)** + +Run: `uv run pytest tests/crud -q` +Expected: all PASS, zarrs params skipped. (`tests/zarrs` is not collectable without the extension; that's fine — its module-level `importorskip` skips it.) + +- [ ] **Step 8: Commit** + +```bash +git add src/zarr/zarrs tests/zarrs +git commit -m "feat: ZarrsBackend conforms to CrudBackend; zarr.zarrs is now a backend" +``` + +--- + +### Task 5: changelog, CI, and final verification + +**Files:** +- Modify: `changes/+zarrs-bindings.feature.md` +- Modify: `.github/workflows/zarrs.yml` + +- [ ] **Step 1: Reword the changelog fragment** — overwrite `changes/+zarrs-bindings.feature.md` + +```markdown +Added `zarr.crud`, an experimental backend-agnostic low-level functional API for +zarr hierarchy CRUD (`create_*`, `read_chunk`, `read_region`, `read_encoded_chunk`, +`write_chunk`, `delete_chunk`, `read_metadata`, `delete_node`, `list_children`). +Array routines take an explicit metadata document, enabling read-only views. +Operations delegate to a pluggable `CrudBackend`: a pure-Python reference backend +(the default) or the zarrs-accelerated backend in `zarr.zarrs`, backed by the Rust +[zarrs](https://zarrs.dev) crate via the in-repo `zarrs-bindings` PyO3 crate. +Select a backend with the `crud.backend` config key or a per-call `backend=` +argument. Build the zarrs backend for development with `uv sync --group zarrs`. +``` + +- [ ] **Step 2: Update the CI test command** — `.github/workflows/zarrs.yml` + +Change the test step's `run:` from: + +```yaml + run: uv run --group zarrs pytest tests/zarrs -v +``` + +to: + +```yaml + run: uv run --group zarrs pytest tests/crud tests/zarrs -v +``` + +Validate: `uvx zizmor .github/workflows/zarrs.yml` → no findings. + +- [ ] **Step 3: Lint and type-check the new code** + +Run: `uv run --group dev ruff format src/zarr/crud src/zarr/zarrs tests/crud tests/zarrs` +Run: `uv run --group dev ruff check --fix src/zarr/crud src/zarr/zarrs tests/crud tests/zarrs` +Run: `uv run --group dev --group zarrs mypy src/zarr/crud src/zarr/zarrs tests/crud tests/zarrs` +Expected: all clean. (mypy is strict; the facade and backends are fully annotated.) + +- [ ] **Step 4: Full suites, both with and without the extension** + +Run: `uv run --group zarrs pytest tests/crud tests/zarrs -q` → all pass +Run: `uv run pytest tests/crud -q` → all pass (zarrs skipped) +Run (regression — the rest of zarr-python is untouched): `uv run pytest tests/test_array.py tests/test_group.py -q` → pass + +- [ ] **Step 5: Commit** + +```bash +git add changes/+zarrs-bindings.feature.md .github/workflows/zarrs.yml +git commit -m "docs/ci: zarr.crud changelog and CI coverage" +``` + +--- + +## Out of scope (per spec) + +- Wiring `zarr.crud` under zarr-python's `Array`/`Group` classes. +- Entrypoint-based backend discovery (registration is explicit/import-time). +- A write-side region operation (`write_region`). +- Renaming the Rust `_zarrs_bindings` pyfunctions (private; adapted by `ZarrsBackend`). +- `CrudOptions` fields (concurrency, checksums) — still a placeholder. From c2e11e105dfb1aded33afaa6c6c8e144ecf5ff89 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 13:52:01 +0200 Subject: [PATCH 31/41] =?UTF-8?q?feat:=20zarr.crud=20skeleton=20=E2=80=94?= =?UTF-8?q?=20CrudBackend=20protocol,=20registry,=20config?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Fable 5 --- src/zarr/core/config.py | 1 + src/zarr/crud/__init__.py | 23 +++++++++++++ src/zarr/crud/_backend.py | 68 +++++++++++++++++++++++++++++++++++++ src/zarr/crud/_registry.py | 29 ++++++++++++++++ tests/crud/__init__.py | 0 tests/crud/test_registry.py | 39 +++++++++++++++++++++ 6 files changed, 160 insertions(+) create mode 100644 src/zarr/crud/__init__.py create mode 100644 src/zarr/crud/_backend.py create mode 100644 src/zarr/crud/_registry.py create mode 100644 tests/crud/__init__.py create mode 100644 tests/crud/test_registry.py diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py index 7dcbc78e31..288f56de69 100644 --- a/src/zarr/core/config.py +++ b/src/zarr/core/config.py @@ -107,6 +107,7 @@ def enable_gpu(self) -> ConfigSet: "path": "zarr.core.codec_pipeline.BatchedCodecPipeline", "batch_size": 1, }, + "crud": {"backend": "reference"}, "codecs": { "blosc": "zarr.codecs.blosc.BloscCodec", "gzip": "zarr.codecs.gzip.GzipCodec", diff --git a/src/zarr/crud/__init__.py b/src/zarr/crud/__init__.py new file mode 100644 index 0000000000..96eaf7af1f --- /dev/null +++ b/src/zarr/crud/__init__.py @@ -0,0 +1,23 @@ +""" +Backend-agnostic low-level functional CRUD API for zarr hierarchies. + +The public functions delegate byte- and metadata-level work to a `CrudBackend`. +Two backends ship: a pure-Python reference backend (the default) and a +zarrs-accelerated backend (`zarr.zarrs`, requires the `zarrs-bindings` +extension). Select one with the `crud.backend` config key or a per-call +`backend=` argument. + +Array routines take an explicit metadata document (a `dict` matching the +`zarr.json` / `.zarray` document) rather than reading it from the store, which +makes read-only and virtual views possible. +""" + +from zarr.crud._backend import CrudBackend, NodeExistsError +from zarr.crud._registry import get_backend, register_backend + +__all__ = [ + "CrudBackend", + "NodeExistsError", + "get_backend", + "register_backend", +] diff --git a/src/zarr/crud/_backend.py b/src/zarr/crud/_backend.py new file mode 100644 index 0000000000..808fc52498 --- /dev/null +++ b/src/zarr/crud/_backend.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Protocol, runtime_checkable + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + + from zarr.abc.store import Store + from zarr.core.common import JSON + + +class NodeExistsError(ValueError): + """Raised when a node already exists at a path and overwrite was not requested.""" + + +@runtime_checkable +class CrudBackend(Protocol): + """The byte/metadata-level contract a CRUD backend must implement. + + Methods take neutral types: the metadata document as a `dict`, a zarr + `Store`, and plain zarr paths (`""`, `"foo/bar"`). They return raw bytes, + parsed JSON documents, or `None`. The shared `zarr.crud` facade builds the + numpy- and selection-level API on top of these. + + `create_*` raise `zarr.crud.NodeExistsError` when a node exists and + `overwrite` is false. `read_metadata`/`delete_node`/`list_children` raise + `zarr.errors.NodeNotFoundError` when the target is missing. + """ + + async def create_array( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: ... + + async def create_group( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: ... + + async def read_metadata(self, store: Store, path: str) -> dict[str, JSON]: ... + + async def read_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> bytes: ... + + async def read_subset( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + start: Sequence[int], + shape: Sequence[int], + ) -> bytes: ... + + async def write_chunk( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + coords: tuple[int, ...], + data: bytes, + ) -> None: ... + + async def delete_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> None: ... + + async def delete_node(self, store: Store, path: str) -> None: ... + + async def list_children(self, store: Store, path: str) -> list[tuple[str, dict[str, JSON]]]: ... diff --git a/src/zarr/crud/_registry.py b/src/zarr/crud/_registry.py new file mode 100644 index 0000000000..eca86df372 --- /dev/null +++ b/src/zarr/crud/_registry.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from zarr.core.config import config + +if TYPE_CHECKING: + from zarr.crud._backend import CrudBackend + +_BACKENDS: dict[str, CrudBackend] = {} + + +def register_backend(name: str, backend: CrudBackend) -> None: + """Register a CRUD backend instance under `name`.""" + _BACKENDS[name] = backend + + +def get_backend(name: str | None = None) -> CrudBackend: + """Resolve a backend by name, or the configured default when `name` is None. + + Selecting `"zarrs"` imports `zarr.zarrs` if needed so it can self-register. + """ + if name is None: + name = config.get("crud.backend") + if name not in _BACKENDS and name == "zarrs": + import zarr.zarrs # noqa: F401 (import registers the zarrs backend) + if name not in _BACKENDS: + raise KeyError(f"no CRUD backend registered as {name!r}; registered: {sorted(_BACKENDS)}") + return _BACKENDS[name] diff --git a/tests/crud/__init__.py b/tests/crud/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/crud/test_registry.py b/tests/crud/test_registry.py new file mode 100644 index 0000000000..f5a8f8b829 --- /dev/null +++ b/tests/crud/test_registry.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +import pytest + +from zarr.crud import CrudBackend, NodeExistsError, get_backend, register_backend + + +def test_node_exists_error_is_value_error() -> None: + assert issubclass(NodeExistsError, ValueError) + + +def test_default_backend_is_reference() -> None: + # the reference backend is registered at import and is the configured default + be = get_backend() + assert be is get_backend("reference") + + +def test_get_unknown_backend_raises() -> None: + with pytest.raises(KeyError, match="no CRUD backend"): + get_backend("does-not-exist") + + +def test_register_and_resolve_instance() -> None: + class Dummy: + pass + + dummy = Dummy() + register_backend("dummy-test", dummy) # type: ignore[arg-type] + try: + assert get_backend("dummy-test") is dummy # type: ignore[comparison-overlap] + finally: + from zarr.crud import _registry + + _registry._BACKENDS.pop("dummy-test", None) + + +def test_protocol_is_runtime_checkable() -> None: + # ReferenceBackend (registered as "reference") structurally satisfies the protocol + assert isinstance(get_backend("reference"), CrudBackend) From 29d126259cec41aa2e6505f55a11278bf6dbc0db Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 15:21:07 +0200 Subject: [PATCH 32/41] fix: friendly error for missing zarrs backend; document protocol limits - wrap the lazy `import zarr.zarrs` in `get_backend` with a try/except that raises a helpful ImportError when the zarrs-bindings extension is not installed - add a thread-safety comment above the `_BACKENDS` dict explaining that CPython's import lock + GIL make it safe without additional locking - document the `runtime_checkable` limitation in the `CrudBackend` docstring: isinstance only checks method names, not signatures or async-ness; mypy is the authoritative conformance check Co-Authored-By: Claude Fable 5 --- src/zarr/crud/_backend.py | 4 ++++ src/zarr/crud/_registry.py | 13 ++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/zarr/crud/_backend.py b/src/zarr/crud/_backend.py index 808fc52498..79fa1f49df 100644 --- a/src/zarr/crud/_backend.py +++ b/src/zarr/crud/_backend.py @@ -25,6 +25,10 @@ class CrudBackend(Protocol): `create_*` raise `zarr.crud.NodeExistsError` when a node exists and `overwrite` is false. `read_metadata`/`delete_node`/`list_children` raise `zarr.errors.NodeNotFoundError` when the target is missing. + + Note: because this protocol is `runtime_checkable`, `isinstance` checks only + verify that the method names exist, not their signatures or that they are + async. Static type checking (mypy) is the authoritative conformance check. """ async def create_array( diff --git a/src/zarr/crud/_registry.py b/src/zarr/crud/_registry.py index eca86df372..84fde1bc20 100644 --- a/src/zarr/crud/_registry.py +++ b/src/zarr/crud/_registry.py @@ -7,6 +7,9 @@ if TYPE_CHECKING: from zarr.crud._backend import CrudBackend +# Backends are registered at import time (reference by zarr.crud, zarrs by +# zarr.zarrs). CPython's import lock plus the GIL make this dict safe without +# additional locking. _BACKENDS: dict[str, CrudBackend] = {} @@ -23,7 +26,15 @@ def get_backend(name: str | None = None) -> CrudBackend: if name is None: name = config.get("crud.backend") if name not in _BACKENDS and name == "zarrs": - import zarr.zarrs # noqa: F401 (import registers the zarrs backend) + # "reference" is pre-registered by zarr.crud at import; "zarrs" lives in a + # separate package that may not be imported yet, so load it on demand. + try: + import zarr.zarrs # noqa: F401 (import registers the zarrs backend) + except ImportError as e: + raise ImportError( + "the 'zarrs' CRUD backend requires the zarrs-bindings extension; " + "install it with: uv sync --group zarrs" + ) from e if name not in _BACKENDS: raise KeyError(f"no CRUD backend registered as {name!r}; registered: {sorted(_BACKENDS)}") return _BACKENDS[name] From 3d3784dde20b567a3eb547b153f7278d5761df85 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 15:35:17 +0200 Subject: [PATCH 33/41] feat: pure-Python ReferenceBackend for zarr.crud Implements `ReferenceBackend` in `src/zarr/crud/_reference.py`, registers it as the default `"reference"` backend at `zarr.crud` import time. Drives chunk encode/decode via `create_codec_pipeline` + the abstract `CodecPipeline.encode`/`decode` methods (not `encode_batch`/`decode_batch`, which live only on `BatchedCodecPipeline` and are not in the abstract Protocol), and multi-chunk subset reads via `AsyncArray.getitem`. Fixes two previously-failing registry tests; adds four new backend tests covering round-trip chunk I/O, multi-chunk subset reads, duplicate-create error, and missing-metadata error. Co-Authored-By: Claude Fable 5 --- src/zarr/crud/__init__.py | 4 + src/zarr/crud/_reference.py | 215 +++++++++++++++++++++++++++ tests/crud/test_reference_backend.py | 53 +++++++ 3 files changed, 272 insertions(+) create mode 100644 src/zarr/crud/_reference.py create mode 100644 tests/crud/test_reference_backend.py diff --git a/src/zarr/crud/__init__.py b/src/zarr/crud/__init__.py index 96eaf7af1f..ac6d1a5e60 100644 --- a/src/zarr/crud/__init__.py +++ b/src/zarr/crud/__init__.py @@ -13,11 +13,15 @@ """ from zarr.crud._backend import CrudBackend, NodeExistsError +from zarr.crud._reference import ReferenceBackend from zarr.crud._registry import get_backend, register_backend +register_backend("reference", ReferenceBackend()) + __all__ = [ "CrudBackend", "NodeExistsError", + "ReferenceBackend", "get_backend", "register_backend", ] diff --git a/src/zarr/crud/_reference.py b/src/zarr/crud/_reference.py new file mode 100644 index 0000000000..f770cc88b4 --- /dev/null +++ b/src/zarr/crud/_reference.py @@ -0,0 +1,215 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import numpy as np + +from zarr.core.array import AsyncArray, create_codec_pipeline +from zarr.core.array_spec import ArrayConfig, ArraySpec +from zarr.core.buffer.core import NDBuffer, default_buffer_prototype +from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON +from zarr.core.group import GroupMetadata +from zarr.core.metadata.io import save_metadata +from zarr.core.metadata.v2 import ArrayV2Metadata +from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGridMetadata +from zarr.crud._backend import NodeExistsError +from zarr.errors import NodeNotFoundError +from zarr.storage._common import StorePath + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + + from zarr.abc.store import Store + from zarr.core.common import JSON + + +def _parse_array_metadata( + metadata: Mapping[str, JSON], +) -> ArrayV3Metadata | ArrayV2Metadata: + """Parse a metadata document into a v2 or v3 array metadata object.""" + data = dict(metadata) + if data.get("zarr_format") == 3: + return ArrayV3Metadata.from_dict(data) + return ArrayV2Metadata.from_dict(data) + + +def _native_dtype(meta_obj: ArrayV3Metadata | ArrayV2Metadata) -> np.dtype[Any]: + """Numpy dtype in native byte order (zarrs and the facade assume native).""" + return meta_obj.dtype.to_native_dtype().newbyteorder("=") + + +def _chunk_shape(meta_obj: ArrayV3Metadata | ArrayV2Metadata) -> tuple[int, ...]: + if isinstance(meta_obj, ArrayV3Metadata): + grid = meta_obj.chunk_grid + if not isinstance(grid, RegularChunkGridMetadata): + raise NotImplementedError("only regular chunk grids are supported") + return tuple(grid.chunk_shape) + return tuple(meta_obj.chunks) + + +def _array_spec(meta_obj: ArrayV3Metadata | ArrayV2Metadata, shape: tuple[int, ...]) -> ArraySpec: + return ArraySpec( + shape=shape, + dtype=meta_obj.dtype, + fill_value=meta_obj.fill_value, + config=ArrayConfig.from_dict({}), + prototype=default_buffer_prototype(), + ) + + +def _meta_key(path: str, zarr_format: int) -> str: + fname = ZARR_JSON if zarr_format == 3 else ZARRAY_JSON + p = path.strip("/") + return f"{p}/{fname}" if p else fname + + +class ReferenceBackend: + """Pure-Python CRUD backend wrapping zarr-python's own machinery. + + Constructs no high-level `Array` for chunk operations (it drives the codec + pipeline directly); it does reuse `AsyncArray.getitem` for multi-chunk + subset reads, which is exactly the `BasicIndexer` + codec-pipeline read path. + """ + + async def create_array( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: + meta_obj = _parse_array_metadata(metadata) + await self._create(store, path, meta_obj, meta_obj.zarr_format, overwrite=overwrite) + + async def create_group( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: + meta_obj = GroupMetadata.from_dict(dict(metadata)) + await self._create(store, path, meta_obj, meta_obj.zarr_format, overwrite=overwrite) + + async def _create( + self, store: Store, path: str, meta_obj: Any, zarr_format: int, *, overwrite: bool + ) -> None: + sp = StorePath(store, path.strip("/")) + proto = default_buffer_prototype() + if overwrite: + await store.delete_dir(path.strip("/")) + else: + key = _meta_key(path, zarr_format) + if await store.get(key, prototype=proto) is not None: + raise NodeExistsError(f"a node already exists at path {path!r}") + await save_metadata(sp, meta_obj, ensure_parents=True) + + async def read_metadata(self, store: Store, path: str) -> dict[str, JSON]: + from zarr.core._json import buffer_to_json_object + + proto = default_buffer_prototype() + p = path.strip("/") + sp = StorePath(store, p) + buf = await (sp / ZARR_JSON).get(prototype=proto) + if buf is not None: + return buffer_to_json_object(buf) + buf2 = await (sp / ZARRAY_JSON).get(prototype=proto) + if buf2 is not None: + doc = buffer_to_json_object(buf2) + zattrs = await (sp / ZATTRS_JSON).get(prototype=proto) + if zattrs is not None: + doc["attributes"] = buffer_to_json_object(zattrs) + return doc + raise NodeNotFoundError(f"no node found at path {path!r}") + + async def read_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> bytes: + meta_obj = _parse_array_metadata(metadata) + shape = _chunk_shape(meta_obj) + np_dtype = _native_dtype(meta_obj) + sp = StorePath(store, path.strip("/")) + chunk_key = meta_obj.encode_chunk_key(coords) + buf = await (sp / chunk_key).get(prototype=default_buffer_prototype()) + if buf is None: + arr = np.full(shape, meta_obj.fill_value, dtype=np_dtype) + else: + pipeline = create_codec_pipeline(meta_obj) + spec = _array_spec(meta_obj, shape) + decoded = list(await pipeline.decode([(buf, spec)])) + nd_buf = decoded[0] + if nd_buf is None: + arr = np.full(shape, meta_obj.fill_value, dtype=np_dtype) + else: + arr = np.asarray(nd_buf.as_numpy_array(), dtype=np_dtype) + return np.ascontiguousarray(arr).tobytes() + + async def read_subset( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + start: Sequence[int], + shape: Sequence[int], + ) -> bytes: + meta_obj = _parse_array_metadata(metadata) + np_dtype = _native_dtype(meta_obj) + async_arr = AsyncArray(metadata=meta_obj, store_path=StorePath(store, path.strip("/"))) + selection = tuple(slice(s, s + length) for s, length in zip(start, shape, strict=True)) + result = await async_arr.getitem(selection) + return np.ascontiguousarray(np.asarray(result, dtype=np_dtype)).tobytes() + + async def write_chunk( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + coords: tuple[int, ...], + data: bytes, + ) -> None: + meta_obj = _parse_array_metadata(metadata) + shape = _chunk_shape(meta_obj) + np_dtype = _native_dtype(meta_obj) + sp = StorePath(store, path.strip("/")) + chunk_key = meta_obj.encode_chunk_key(coords) + arr = np.frombuffer(data, dtype=np_dtype).reshape(shape) + pipeline = create_codec_pipeline(meta_obj) + spec = _array_spec(meta_obj, shape) + encoded = list(await pipeline.encode([(NDBuffer.from_ndarray_like(arr), spec)])) + buf = encoded[0] + if buf is None: + await (sp / chunk_key).delete() + else: + await (sp / chunk_key).set(buf) + + async def delete_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> None: + meta_obj = _parse_array_metadata(metadata) + sp = StorePath(store, path.strip("/")) + await (sp / meta_obj.encode_chunk_key(coords)).delete() + + async def delete_node(self, store: Store, path: str) -> None: + proto = default_buffer_prototype() + p = path.strip("/") + sp = StorePath(store, p) + present = ( + await (sp / ZARR_JSON).get(prototype=proto) is not None + or await (sp / ZARRAY_JSON).get(prototype=proto) is not None + ) + if not present: + raise NodeNotFoundError(f"no node found at path {path!r}") + await store.delete_dir(p) + + async def list_children(self, store: Store, path: str) -> list[tuple[str, dict[str, JSON]]]: + proto = default_buffer_prototype() + p = path.strip("/") + sp = StorePath(store, p) + if ( + await (sp / ZARR_JSON).get(prototype=proto) is None + and await (sp / ZARRAY_JSON).get(prototype=proto) is None + ): + raise NodeNotFoundError(f"no node found at path {path!r}") + prefix = f"{p}/" if p else "" + children: list[tuple[str, dict[str, JSON]]] = [] + async for name in store.list_dir(prefix): + child_path = f"{p}/{name}" if p else name + child_sp = StorePath(store, child_path) + if ( + await (child_sp / ZARR_JSON).get(prototype=proto) is not None + or await (child_sp / ZARRAY_JSON).get(prototype=proto) is not None + ): + children.append((name, await self.read_metadata(store, child_path))) + return children diff --git a/tests/crud/test_reference_backend.py b/tests/crud/test_reference_backend.py new file mode 100644 index 0000000000..9b6f4662eb --- /dev/null +++ b/tests/crud/test_reference_backend.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from typing import Any + +import numpy as np +import pytest + +import zarr +from zarr.crud import NodeExistsError, get_backend +from zarr.errors import NodeNotFoundError +from zarr.storage import MemoryStore + + +def _array_meta() -> dict[str, Any]: + arr = zarr.create_array(store=MemoryStore(), shape=(8, 8), chunks=(4, 4), dtype="uint16") + return dict(arr.metadata.to_dict()) + + +async def test_reference_round_trip_chunk() -> None: + be = get_backend("reference") + store = MemoryStore() + meta = _array_meta() + await be.create_array(store, "a", meta, overwrite=False) + value = np.arange(16, dtype="uint16").reshape(4, 4) + await be.write_chunk(store, "a", meta, (0, 1), value.tobytes()) + raw = await be.read_chunk(store, "a", meta, (0, 1)) + np.testing.assert_array_equal(np.frombuffer(raw, dtype="uint16").reshape(4, 4), value) + + +async def test_reference_read_subset_spans_chunks() -> None: + be = get_backend("reference") + store = MemoryStore() + arr = zarr.create_array(store=store, name="a", shape=(8, 8), chunks=(4, 4), dtype="uint16") + data = np.arange(64, dtype="uint16").reshape(8, 8) + arr[:, :] = data + meta = dict(arr.metadata.to_dict()) + raw = await be.read_subset(store, "a", meta, (2, 1), (5, 4)) + np.testing.assert_array_equal(np.frombuffer(raw, dtype="uint16").reshape(5, 4), data[2:7, 1:5]) + + +async def test_reference_create_exists_raises() -> None: + be = get_backend("reference") + store = MemoryStore() + meta = _array_meta() + await be.create_array(store, "a", meta, overwrite=False) + with pytest.raises(NodeExistsError): + await be.create_array(store, "a", meta, overwrite=False) + + +async def test_reference_read_metadata_missing_raises() -> None: + be = get_backend("reference") + with pytest.raises(NodeNotFoundError): + await be.read_metadata(MemoryStore(), "nope") From 9197a021a45f81d3d3ff9bff666a8ee22788a3df Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 16:08:42 +0200 Subject: [PATCH 34/41] fix: honor v2 memory order in ReferenceBackend chunk codec spec `_array_spec` was hardcoding `ArrayConfig.from_dict({})`, which always defaults to C order, causing silent data corruption for v2 arrays created with `order="F"`. `read_chunk` returned transposed bytes and `write_chunk` stored bytes that zarr-python read back incorrectly. Fix: detect `ArrayV2Metadata` and pass its `.order` attribute into the `ArrayConfig`, so the codec pipeline uses the correct memory layout. v3 arrays are unaffected (order there is a transpose codec, not memory order). Co-Authored-By: Claude Fable 5 --- src/zarr/crud/_reference.py | 3 ++- tests/crud/test_reference_backend.py | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/zarr/crud/_reference.py b/src/zarr/crud/_reference.py index f770cc88b4..e8b76571bd 100644 --- a/src/zarr/crud/_reference.py +++ b/src/zarr/crud/_reference.py @@ -48,11 +48,12 @@ def _chunk_shape(meta_obj: ArrayV3Metadata | ArrayV2Metadata) -> tuple[int, ...] def _array_spec(meta_obj: ArrayV3Metadata | ArrayV2Metadata, shape: tuple[int, ...]) -> ArraySpec: + order = meta_obj.order if isinstance(meta_obj, ArrayV2Metadata) else "C" return ArraySpec( shape=shape, dtype=meta_obj.dtype, fill_value=meta_obj.fill_value, - config=ArrayConfig.from_dict({}), + config=ArrayConfig.from_dict({"order": order}), prototype=default_buffer_prototype(), ) diff --git a/tests/crud/test_reference_backend.py b/tests/crud/test_reference_backend.py index 9b6f4662eb..4fef43427b 100644 --- a/tests/crud/test_reference_backend.py +++ b/tests/crud/test_reference_backend.py @@ -51,3 +51,23 @@ async def test_reference_read_metadata_missing_raises() -> None: be = get_backend("reference") with pytest.raises(NodeNotFoundError): await be.read_metadata(MemoryStore(), "nope") + + +async def test_reference_v2_fortran_order_round_trip() -> None: + be = get_backend("reference") + store = MemoryStore() + arr = zarr.create_array( + store=store, name="f", shape=(4, 6), chunks=(4, 6), dtype="uint16", order="F", zarr_format=2 + ) + data = np.arange(24, dtype="uint16").reshape(4, 6) + arr[:, :] = data + meta = dict(arr.metadata.to_dict()) + meta.pop("attributes", None) + # read_chunk must return native C-contiguous bytes matching the logical data + raw = await be.read_chunk(store, "f", meta, (0, 0)) + np.testing.assert_array_equal(np.frombuffer(raw, dtype="uint16").reshape(4, 6), data) + # write_chunk must store data zarr-python reads back correctly + new = (data + 100).astype("uint16") + await be.write_chunk(store, "f", meta, (0, 0), np.ascontiguousarray(new).tobytes()) + back = zarr.open_array(store=store, path="f", mode="r") + np.testing.assert_array_equal(back[:, :], new) From cc7fa45fac8958709a2024a6bed64c1f999b8bfe Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 16:20:32 +0200 Subject: [PATCH 35/41] feat: zarr.crud shared facade + differential suite (reference backend) Adds the public `zarr.crud` facade dispatching to a CrudBackend, plus the differential test suite exercising it against the reference backend. The zarrs param in the backend fixture is skipped until Task 4 registers the zarrs backend. Co-Authored-By: Claude Fable 5 --- src/zarr/crud/__init__.py | 28 +++ src/zarr/crud/_api.py | 357 ++++++++++++++++++++++++++++++++++++++ tests/crud/conftest.py | 95 ++++++++++ tests/crud/test_crud.py | 254 +++++++++++++++++++++++++++ 4 files changed, 734 insertions(+) create mode 100644 src/zarr/crud/_api.py create mode 100644 tests/crud/conftest.py create mode 100644 tests/crud/test_crud.py diff --git a/src/zarr/crud/__init__.py b/src/zarr/crud/__init__.py index ac6d1a5e60..33b01668b2 100644 --- a/src/zarr/crud/__init__.py +++ b/src/zarr/crud/__init__.py @@ -12,6 +12,21 @@ makes read-only and virtual views possible. """ +from zarr.crud._api import ( + CrudOptions, + create_new_array, + create_new_group, + create_overwrite_array, + create_overwrite_group, + delete_chunk, + delete_node, + list_children, + read_chunk, + read_encoded_chunk, + read_metadata, + read_region, + write_chunk, +) from zarr.crud._backend import CrudBackend, NodeExistsError from zarr.crud._reference import ReferenceBackend from zarr.crud._registry import get_backend, register_backend @@ -20,8 +35,21 @@ __all__ = [ "CrudBackend", + "CrudOptions", "NodeExistsError", "ReferenceBackend", + "create_new_array", + "create_new_group", + "create_overwrite_array", + "create_overwrite_group", + "delete_chunk", + "delete_node", "get_backend", + "list_children", + "read_chunk", + "read_encoded_chunk", + "read_metadata", + "read_region", "register_backend", + "write_chunk", ] diff --git a/src/zarr/crud/_api.py b/src/zarr/crud/_api.py new file mode 100644 index 0000000000..5fdb8fe1d2 --- /dev/null +++ b/src/zarr/crud/_api.py @@ -0,0 +1,357 @@ +from __future__ import annotations + +import operator +import types +from collections.abc import Sequence +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, cast + +import numpy as np + +from zarr.core.buffer.core import default_buffer_prototype +from zarr.crud._registry import get_backend + +if TYPE_CHECKING: + from collections.abc import Mapping + + import numpy.typing as npt + + from zarr.abc.store import Store + from zarr.core.common import JSON + from zarr.core.metadata.v2 import ArrayV2Metadata + from zarr.core.metadata.v3 import ArrayV3Metadata + from zarr.crud._backend import CrudBackend + + +@dataclass(frozen=True, slots=True) +class CrudOptions: + """Options for CRUD operations. + + Currently empty: fields (concurrency limits, checksum validation) arrive in + a later phase. Accepting it now keeps signatures stable. + """ + + +BasicIndex = int | slice | types.EllipsisType +BasicSelection = BasicIndex | tuple[BasicIndex, ...] + + +def _resolve_backend(backend: CrudBackend | str | None) -> CrudBackend: + if backend is None or isinstance(backend, str): + return get_backend(backend) + return backend + + +def _parse_array_metadata( + metadata: Mapping[str, JSON], +) -> ArrayV3Metadata | ArrayV2Metadata: + from zarr.core.metadata.v2 import ArrayV2Metadata + from zarr.core.metadata.v3 import ArrayV3Metadata + + data = dict(metadata) + if data.get("zarr_format") == 3: + return ArrayV3Metadata.from_dict(data) + return ArrayV2Metadata.from_dict(data) + + +def _chunk_dtype_and_shape( + metadata: Mapping[str, JSON], +) -> tuple[np.dtype[Any], tuple[int, ...]]: + """Resolve native-byte-order numpy dtype and regular chunk shape. + + Backends decode to (and encode from) the native in-memory representation, + applying any byte-order codec themselves, so the dtype is coerced to native. + """ + from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGridMetadata + + meta_obj = _parse_array_metadata(metadata) + if isinstance(meta_obj, ArrayV3Metadata): + grid = meta_obj.chunk_grid + if not isinstance(grid, RegularChunkGridMetadata): + raise NotImplementedError("only regular chunk grids are supported") + chunk_shape = tuple(grid.chunk_shape) + else: + chunk_shape = tuple(meta_obj.chunks) + return meta_obj.dtype.to_native_dtype().newbyteorder("="), chunk_shape + + +def _array_shape(metadata: Mapping[str, JSON]) -> tuple[int, ...]: + shape = metadata.get("shape") + if not isinstance(shape, Sequence) or isinstance(shape, str): + raise TypeError("metadata document has no valid 'shape'") + result: list[int] = [] + for s in shape: + if not isinstance(s, (int, float)): + raise TypeError(f"shape element {s!r} is not a number") + if isinstance(s, float) and not s.is_integer(): + raise TypeError(f"shape element {s!r} is not an integer") + result.append(int(s)) + return tuple(result) + + +def _chunk_key(metadata: Mapping[str, JSON], path: str, coords: tuple[int, ...]) -> str: + meta_obj = _parse_array_metadata(metadata) + rel = meta_obj.encode_chunk_key(coords) + p = path.strip("/") + return f"{p}/{rel}" if p else rel + + +def _normalize_selection( + selection: BasicSelection, shape: tuple[int, ...] +) -> tuple[list[int], list[int], tuple[slice | int, ...]]: + """Normalize a numpy basic-indexing selection to a step-1 bounding box. + + Returns `(start, bounding_shape, post_index)`: the box to fetch and the + numpy index to apply to it (strides, reversals, integer-axis removal). Only + integers, slices, and `Ellipsis` are supported; fancy indexing raises. + """ + sel_tuple = selection if isinstance(selection, tuple) else (selection,) + + n_ellipsis = sum(1 for s in sel_tuple if s is Ellipsis) + if n_ellipsis > 1: + raise IndexError("an index can only have a single ellipsis ('...')") + if n_ellipsis == 1: + i = sel_tuple.index(Ellipsis) + n_fill = len(shape) - (len(sel_tuple) - 1) + if n_fill < 0: + raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional") + sel_tuple = sel_tuple[:i] + (slice(None),) * n_fill + sel_tuple[i + 1 :] + if len(sel_tuple) > len(shape): + raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional") + sel_tuple = sel_tuple + (slice(None),) * (len(shape) - len(sel_tuple)) + + starts: list[int] = [] + lengths: list[int] = [] + post: list[slice | int] = [] + for dim, (sel, size) in enumerate(zip(sel_tuple, shape, strict=True)): + if isinstance(sel, slice): + start, stop, step = sel.indices(size) + n = len(range(start, stop, step)) + if n == 0: + starts.append(0) + lengths.append(0) + post.append(slice(None)) + elif step > 0: + last = start + (n - 1) * step + starts.append(start) + lengths.append(last - start + 1) + post.append(slice(None, None, step)) + else: + last = start + (n - 1) * step + starts.append(last) + lengths.append(start - last + 1) + post.append(slice(None, None, step)) + else: + assert not isinstance(sel, types.EllipsisType), "Ellipsis already expanded above" + try: + idx = operator.index(sel) + except TypeError: + raise TypeError( + "unsupported selection element " + f"{sel!r}: only integers, slices, and Ellipsis are supported" + ) from None + if idx < 0: + idx += size + if not 0 <= idx < size: + raise IndexError(f"index {sel} is out of bounds for axis {dim} with size {size}") + starts.append(idx) + lengths.append(1) + post.append(0) + return starts, lengths, tuple(post) + + +# --- node lifecycle --- + + +async def create_new_group( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Create a group from a group metadata document. Raises `NodeExistsError` + if a node already exists at `path`. Not atomic against concurrent writers.""" + await _resolve_backend(backend).create_group(store, path, metadata, overwrite=False) + + +async def create_overwrite_group( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Create a group, deleting any existing node (and children) first. Not + atomic against concurrent writers.""" + await _resolve_backend(backend).create_group(store, path, metadata, overwrite=True) + + +async def create_new_array( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Create an array from a v2 or v3 metadata document. Raises + `NodeExistsError` if a node already exists. Not atomic against concurrent + writers.""" + await _resolve_backend(backend).create_array(store, path, metadata, overwrite=False) + + +async def create_overwrite_array( + metadata: Mapping[str, JSON], + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Create an array, deleting any existing node (and children) first. Not + atomic against concurrent writers.""" + await _resolve_backend(backend).create_array(store, path, metadata, overwrite=True) + + +async def read_metadata( + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> dict[str, JSON]: + """Read the metadata document of the array or group at `path`. Raises + `zarr.errors.NodeNotFoundError` if no node exists there.""" + return await _resolve_backend(backend).read_metadata(store, path) + + +async def delete_node( + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Delete the node at `path` and everything under it. Raises + `zarr.errors.NodeNotFoundError` if absent. `path=""` clears the store.""" + await _resolve_backend(backend).delete_node(store, path) + + +async def list_children( + store: Store, + path: str, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> list[tuple[str, dict[str, JSON]]]: + """List the direct children of the group at `path` as + `(path, metadata_document)` pairs (store-relative, no leading `/`). Raises + `zarr.errors.NodeNotFoundError` if no group exists there.""" + return await _resolve_backend(backend).list_children(store, path) + + +# --- chunk I/O --- + + +async def read_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> np.ndarray[Any, np.dtype[Any]]: + """Read and decode the whole chunk at `chunk_coords`. The metadata document + is authoritative; missing chunks decode to the fill value. The result is a + read-only view (`.copy()` for a writable array).""" + be = _resolve_backend(backend) + raw = await be.read_chunk(store, path, metadata, tuple(chunk_coords)) + dtype, chunk_shape = _chunk_dtype_and_shape(metadata) + return np.frombuffer(raw, dtype=dtype).reshape(chunk_shape) + + +async def read_encoded_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> bytes | None: + """Read the raw, still-encoded bytes of the chunk at `chunk_coords`, or + `None` if absent. Pure store I/O (`store.get` on the chunk key): the + `backend` argument is accepted for signature uniformity but unused.""" + key = _chunk_key(metadata, path, tuple(chunk_coords)) + buf = await store.get(key, prototype=default_buffer_prototype()) + return None if buf is None else buf.to_bytes() + + +async def write_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + value: npt.ArrayLike, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Encode `value` with the codecs in `metadata` and store it as the chunk at + `chunk_coords`. `value` must match the chunk shape exactly.""" + be = _resolve_backend(backend) + dtype, chunk_shape = _chunk_dtype_and_shape(metadata) + arr = np.ascontiguousarray(np.asarray(value, dtype=dtype)) + if arr.shape != chunk_shape: + raise ValueError(f"value shape {arr.shape} does not match chunk shape {chunk_shape}") + await be.write_chunk(store, path, metadata, tuple(chunk_coords), arr.tobytes()) + + +async def delete_chunk( + metadata: Mapping[str, JSON], + store: Store, + path: str, + chunk_coords: tuple[int, ...], + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> None: + """Delete the chunk at `chunk_coords`. Deleting a missing chunk is a no-op.""" + await _resolve_backend(backend).delete_chunk(store, path, metadata, tuple(chunk_coords)) + + +# --- region I/O --- + + +async def read_region( + metadata: Mapping[str, JSON], + store: Store, + path: str, + selection: BasicSelection, + *, + options: CrudOptions | None = None, + backend: CrudBackend | str | None = None, +) -> np.ndarray[Any, np.dtype[Any]]: + """Read and decode a region given by a numpy basic-indexing `selection` + (integers, slices with steps, `Ellipsis`). One backend call fetches the + step-1 bounding box; strides/reversals/integer-axis removal are applied as + numpy views. Missing chunks decode to the fill value. Fancy indexing raises + `TypeError`. The result is a read-only view. + + Note: a `slice(0, N, step)` reads `O(N)` bytes even though `O(N / step)` are + returned; for sparse selections over large arrays prefer `read_chunk`.""" + be = _resolve_backend(backend) + dtype, _ = _chunk_dtype_and_shape(metadata) + shape = _array_shape(metadata) + starts, lengths, post_index = _normalize_selection(selection, shape) + if 0 in lengths: + block = np.empty(lengths, dtype=dtype) + block.flags.writeable = False + else: + raw = await be.read_subset(store, path, metadata, tuple(starts), tuple(lengths)) + block = np.frombuffer(raw, dtype=dtype).reshape(lengths) + return cast("np.ndarray[Any, np.dtype[Any]]", block[post_index]) diff --git a/tests/crud/conftest.py b/tests/crud/conftest.py new file mode 100644 index 0000000000..fbf2cf9e02 --- /dev/null +++ b/tests/crud/conftest.py @@ -0,0 +1,95 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import numpy as np +import pytest + +import zarr +from zarr.storage import LocalStore, MemoryStore + +if TYPE_CHECKING: + from collections.abc import AsyncIterator + from pathlib import Path + + from zarr.abc.store import Store + + +def _zarrs_available() -> bool: + """Return True only if the zarrs CrudBackend is fully usable (registered).""" + try: + import _zarrs_bindings # noqa: F401 + except ImportError: + return False + try: + import zarr.zarrs + except ImportError: + return False + # The module might exist but not yet register the zarrs CrudBackend (e.g. + # Task 4 not yet merged). Verify registration before enabling the param. + try: + import zarr.crud + + zarr.crud.get_backend("zarrs") + except (ImportError, KeyError): + return False + return True + + +@pytest.fixture( + params=[ + "reference", + pytest.param( + "zarrs", + marks=pytest.mark.skipif( + not _zarrs_available(), reason="zarrs-bindings is not installed" + ), + ), + ] +) +def backend(request: pytest.FixtureRequest) -> str: + """A CRUD backend name. The zarrs param is skipped when the extension is absent.""" + import zarr.crud + + if request.param == "zarrs": + import zarr.zarrs # noqa: F401 (registers the zarrs backend) + return str(request.param) + + +@pytest.fixture(params=["memory", "local"]) +async def store(request: pytest.FixtureRequest, tmp_path: Path) -> AsyncIterator[Store]: + if request.param == "memory": + s: Store = await MemoryStore.open() + else: + s = await LocalStore.open(root=tmp_path / "store") + try: + yield s + finally: + s.close() + + +def array_metadata(**kwargs: Any) -> dict[str, Any]: + """An array metadata document built via zarr-python itself.""" + params: dict[str, Any] = { + "shape": (8, 8), + "chunks": (4, 4), + "dtype": "uint16", + "zarr_format": 3, + } | kwargs + arr = zarr.create_array(store=MemoryStore(), **params) + doc = dict(arr.metadata.to_dict()) + if params["zarr_format"] == 2: + doc.pop("attributes", None) + return doc + + +def filled(store: Store, **kwargs: Any) -> tuple[np.ndarray[Any, np.dtype[Any]], dict[str, Any]]: + """Create an 8x8 array 'a', fill it with a ramp, return (data, metadata).""" + params: dict[str, Any] = {"shape": (8, 8), "chunks": (4, 4), "dtype": "uint16"} | kwargs + arr = zarr.create_array(store=store, name="a", **params) + data = np.arange(64, dtype=params["dtype"]).reshape(8, 8) + arr[:, :] = data + doc = dict(arr.metadata.to_dict()) + if params.get("zarr_format") == 2: + doc.pop("attributes", None) + return data, doc diff --git a/tests/crud/test_crud.py b/tests/crud/test_crud.py new file mode 100644 index 0000000000..c301725e22 --- /dev/null +++ b/tests/crud/test_crud.py @@ -0,0 +1,254 @@ +from __future__ import annotations + +import copy +import json +from typing import TYPE_CHECKING, Any + +import numpy as np +import pytest + +import zarr +from tests.crud.conftest import array_metadata, filled +from zarr.codecs import BloscCodec, GzipCodec, ZstdCodec +from zarr.core.buffer.core import default_buffer_prototype +from zarr.crud import ( + NodeExistsError, + create_new_array, + create_new_group, + create_overwrite_array, + create_overwrite_group, + delete_chunk, + delete_node, + list_children, + read_chunk, + read_encoded_chunk, + read_metadata, + read_region, + write_chunk, +) +from zarr.errors import NodeNotFoundError + +if TYPE_CHECKING: + from zarr.abc.store import Store + +GROUP_META: dict[str, Any] = {"zarr_format": 3, "node_type": "group", "attributes": {"answer": 42}} + + +# --- node lifecycle --- + + +async def test_create_new_group(backend: str, store: Store) -> None: + await create_new_group(GROUP_META, store, "foo", backend=backend) + assert dict(zarr.open_group(store=store, path="foo", mode="r").attrs) == {"answer": 42} + + +async def test_create_new_group_existing_raises(backend: str, store: Store) -> None: + await create_new_group(GROUP_META, store, "foo", backend=backend) + with pytest.raises(NodeExistsError): + await create_new_group(GROUP_META, store, "foo", backend=backend) + + +async def test_create_overwrite_group_replaces_array(backend: str, store: Store) -> None: + arr = zarr.create_array(store=store, name="foo", shape=(4,), chunks=(2,), dtype="uint8") + arr[:] = 1 + await create_overwrite_group(GROUP_META, store, "foo", backend=backend) + assert dict(zarr.open_group(store=store, path="foo", mode="r").attrs) == {"answer": 42} + assert not await store.exists("foo/c/0") + + +async def test_create_new_array(backend: str, store: Store) -> None: + await create_new_array(array_metadata(), store, "arr", backend=backend) + a = zarr.open_array(store=store, path="arr", mode="r") + assert a.shape == (8, 8) + assert a.dtype == np.dtype("uint16") + + +async def test_create_new_array_v2(backend: str, store: Store) -> None: + await create_new_array(array_metadata(zarr_format=2), store, "arr", backend=backend) + assert zarr.open_array(store=store, path="arr", mode="r").metadata.zarr_format == 2 + + +async def test_create_overwrite_array(backend: str, store: Store) -> None: + zarr.create_group(store=store, path="arr") + await create_overwrite_array(array_metadata(), store, "arr", backend=backend) + assert zarr.open_array(store=store, path="arr", mode="r").shape == (8, 8) + + +async def test_read_metadata(backend: str, store: Store) -> None: + await create_new_array(array_metadata(), store, "arr", backend=backend) + observed = await read_metadata(store, "arr", backend=backend) + raw = await store.get("arr/zarr.json", prototype=default_buffer_prototype()) + assert raw is not None + assert observed == json.loads(raw.to_bytes()) + + +async def test_read_metadata_missing(backend: str, store: Store) -> None: + with pytest.raises(NodeNotFoundError): + await read_metadata(store, "nope", backend=backend) + + +async def test_delete_node(backend: str, store: Store) -> None: + arr = zarr.create_array(store=store, name="doomed", shape=(4,), chunks=(2,), dtype="uint8") + arr[:] = 1 + await delete_node(store, "doomed", backend=backend) + assert not await store.exists("doomed/zarr.json") + assert not await store.exists("doomed/c/0") + + +async def test_delete_node_missing(backend: str, store: Store) -> None: + with pytest.raises(NodeNotFoundError): + await delete_node(store, "nope", backend=backend) + + +async def test_list_children(backend: str, store: Store) -> None: + root = zarr.create_group(store=store) + root.create_group("sub_group", attributes={"kind": "group"}) + root.create_array("sub_array", shape=(4,), chunks=(2,), dtype="uint8") + by_path = dict(await list_children(store, "", backend=backend)) + assert set(by_path) == {"sub_group", "sub_array"} + assert by_path["sub_group"]["node_type"] == "group" + assert by_path["sub_array"]["node_type"] == "array" + assert not any(p.startswith("/") for p in by_path) + + +# --- chunk I/O --- + + +@pytest.mark.parametrize("dtype", ["uint8", "int32", "float64", "u2"]) +async def test_read_chunk_differential(backend: str, store: Store, dtype: str) -> None: + data, meta = filled(store, dtype=dtype) + observed = await read_chunk(meta, store, "a", (1, 0), backend=backend) + np.testing.assert_array_equal(observed, data[4:8, 0:4]) + + +@pytest.mark.parametrize( + "compressors", [None, (GzipCodec(),), (ZstdCodec(),), (BloscCodec(cname="lz4"),)] +) +async def test_read_chunk_codecs(backend: str, store: Store, compressors: Any) -> None: + data, meta = filled(store, compressors=compressors) + observed = await read_chunk(meta, store, "a", (0, 1), backend=backend) + np.testing.assert_array_equal(observed, data[0:4, 4:8]) + + +async def test_read_chunk_v2(backend: str, store: Store) -> None: + data, meta = filled(store, dtype=" None: + data, meta = filled(store, dtype="uint16", zarr_format=2, order="F") + observed = await read_chunk(meta, store, "a", (1, 1), backend=backend) + np.testing.assert_array_equal(observed, data[4:8, 4:8]) + + +async def test_read_chunk_sharding(backend: str, store: Store) -> None: + data, meta = filled(store, chunks=(2, 2), shards=(4, 4)) + observed = await read_chunk(meta, store, "a", (1, 1), backend=backend) + np.testing.assert_array_equal(observed, data[4:8, 4:8]) + + +async def test_read_chunk_missing_is_fill(backend: str, store: Store) -> None: + arr = zarr.create_array( + store=store, name="a", shape=(8, 8), chunks=(4, 4), dtype="uint16", fill_value=7 + ) + meta = dict(arr.metadata.to_dict()) + observed = await read_chunk(meta, store, "a", (0, 0), backend=backend) + np.testing.assert_array_equal(observed, np.full((4, 4), 7, dtype="uint16")) + + +async def test_read_chunk_metadata_view(backend: str, store: Store) -> None: + data, meta = filled(store, dtype="uint16", compressors=None) + view = copy.deepcopy(meta) + view["data_type"] = "uint8" + view["shape"] = [8, 16] + view["chunk_grid"]["configuration"]["chunk_shape"] = [4, 8] + observed = await read_chunk(view, store, "a", (1, 0), backend=backend) + np.testing.assert_array_equal(observed, data[4:8, 0:4].view("uint8")) + + +async def test_read_chunk_readonly(backend: str, store: Store) -> None: + _, meta = filled(store) + observed = await read_chunk(meta, store, "a", (0, 0), backend=backend) + assert not observed.flags.writeable + + +async def test_write_chunk_differential(backend: str, store: Store) -> None: + meta = array_metadata() + await create_new_array(meta, store, "a", backend=backend) + value = np.arange(16, dtype="uint16").reshape(4, 4) + await write_chunk(meta, store, "a", (0, 1), value, backend=backend) + np.testing.assert_array_equal(zarr.open_array(store=store, path="a", mode="r")[0:4, 4:8], value) + + +async def test_write_chunk_shape_mismatch(backend: str, store: Store) -> None: + meta = array_metadata() + await create_new_array(meta, store, "a", backend=backend) + with pytest.raises(ValueError, match="chunk shape"): + await write_chunk( + meta, store, "a", (0, 0), np.zeros((2, 2), dtype="uint16"), backend=backend + ) + + +async def test_delete_chunk(backend: str, store: Store) -> None: + _data, meta = filled(store) + assert await store.exists("a/c/0/0") + await delete_chunk(meta, store, "a", (0, 0), backend=backend) + assert not await store.exists("a/c/0/0") + + +async def test_read_encoded_chunk_matches_store(backend: str, store: Store) -> None: + _, meta = filled(store) + raw = await read_encoded_chunk(meta, store, "a", (0, 0), backend=backend) + expected = await store.get("a/c/0/0", prototype=default_buffer_prototype()) + assert expected is not None + assert raw == expected.to_bytes() + + +async def test_read_encoded_chunk_missing_is_none(backend: str, store: Store) -> None: + arr = zarr.create_array(store=store, name="e", shape=(8, 8), chunks=(4, 4), dtype="uint16") + meta = dict(arr.metadata.to_dict()) + assert await read_encoded_chunk(meta, store, "e", (0, 0), backend=backend) is None + + +# --- region I/O --- + +SELECTIONS: list[Any] = [ + (slice(None), slice(None)), + (slice(2, 7), slice(1, 5)), + (slice(None), 3), + (5, slice(None)), + (3, 4), + (slice(1, 8, 2), slice(None)), + (slice(None), slice(6, 1, -2)), + (slice(-3, None), slice(None, -1)), + ..., + (..., slice(2, 4)), + (slice(0, 0), slice(None)), + (slice(2, 6),), +] + + +@pytest.mark.parametrize("sel", SELECTIONS) +async def test_read_region_differential(backend: str, store: Store, sel: Any) -> None: + data, meta = filled(store) + observed = await read_region(meta, store, "a", sel, backend=backend) + np.testing.assert_array_equal(observed, data[sel]) + + +async def test_read_region_sharding(backend: str, store: Store) -> None: + data, meta = filled(store, chunks=(2, 2), shards=(4, 4)) + observed = await read_region(meta, store, "a", (slice(1, 7), slice(3, 8)), backend=backend) + np.testing.assert_array_equal(observed, data[1:7, 3:8]) + + +async def test_read_region_too_many_indices(backend: str, store: Store) -> None: + _, meta = filled(store) + with pytest.raises(IndexError, match="too many indices"): + await read_region(meta, store, "a", (0, 0, 0), backend=backend) + + +async def test_read_region_fancy_rejected(backend: str, store: Store) -> None: + _, meta = filled(store) + with pytest.raises(TypeError, match="only integers, slices"): + await read_region(meta, store, "a", ([0, 1], slice(None)), backend=backend) # type: ignore[arg-type] From 9dd680abdb56095a074e3b55fefd7db11090da9c Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 17:35:23 +0200 Subject: [PATCH 36/41] refactor: share parse_array_metadata; document bytes contract; OOB test Co-Authored-By: Claude Fable 5 --- src/zarr/crud/_api.py | 19 +++---------------- src/zarr/crud/_backend.py | 4 ++++ src/zarr/crud/_common.py | 21 +++++++++++++++++++++ src/zarr/crud/_reference.py | 21 ++++++--------------- tests/crud/test_crud.py | 6 ++++++ 5 files changed, 40 insertions(+), 31 deletions(-) create mode 100644 src/zarr/crud/_common.py diff --git a/src/zarr/crud/_api.py b/src/zarr/crud/_api.py index 5fdb8fe1d2..91aeef5007 100644 --- a/src/zarr/crud/_api.py +++ b/src/zarr/crud/_api.py @@ -9,6 +9,7 @@ import numpy as np from zarr.core.buffer.core import default_buffer_prototype +from zarr.crud._common import parse_array_metadata from zarr.crud._registry import get_backend if TYPE_CHECKING: @@ -18,8 +19,6 @@ from zarr.abc.store import Store from zarr.core.common import JSON - from zarr.core.metadata.v2 import ArrayV2Metadata - from zarr.core.metadata.v3 import ArrayV3Metadata from zarr.crud._backend import CrudBackend @@ -42,18 +41,6 @@ def _resolve_backend(backend: CrudBackend | str | None) -> CrudBackend: return backend -def _parse_array_metadata( - metadata: Mapping[str, JSON], -) -> ArrayV3Metadata | ArrayV2Metadata: - from zarr.core.metadata.v2 import ArrayV2Metadata - from zarr.core.metadata.v3 import ArrayV3Metadata - - data = dict(metadata) - if data.get("zarr_format") == 3: - return ArrayV3Metadata.from_dict(data) - return ArrayV2Metadata.from_dict(data) - - def _chunk_dtype_and_shape( metadata: Mapping[str, JSON], ) -> tuple[np.dtype[Any], tuple[int, ...]]: @@ -64,7 +51,7 @@ def _chunk_dtype_and_shape( """ from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGridMetadata - meta_obj = _parse_array_metadata(metadata) + meta_obj = parse_array_metadata(metadata) if isinstance(meta_obj, ArrayV3Metadata): grid = meta_obj.chunk_grid if not isinstance(grid, RegularChunkGridMetadata): @@ -90,7 +77,7 @@ def _array_shape(metadata: Mapping[str, JSON]) -> tuple[int, ...]: def _chunk_key(metadata: Mapping[str, JSON], path: str, coords: tuple[int, ...]) -> str: - meta_obj = _parse_array_metadata(metadata) + meta_obj = parse_array_metadata(metadata) rel = meta_obj.encode_chunk_key(coords) p = path.strip("/") return f"{p}/{rel}" if p else rel diff --git a/src/zarr/crud/_backend.py b/src/zarr/crud/_backend.py index 79fa1f49df..638dacf6f6 100644 --- a/src/zarr/crud/_backend.py +++ b/src/zarr/crud/_backend.py @@ -29,6 +29,10 @@ class CrudBackend(Protocol): Note: because this protocol is `runtime_checkable`, `isinstance` checks only verify that the method names exist, not their signatures or that they are async. Static type checking (mypy) is the authoritative conformance check. + + `read_chunk` and `read_subset` must return immutable `bytes` (not + `bytearray`): the facade wraps them with `numpy.frombuffer`, which yields a + read-only array only for immutable buffers. """ async def create_array( diff --git a/src/zarr/crud/_common.py b/src/zarr/crud/_common.py new file mode 100644 index 0000000000..4837edfa03 --- /dev/null +++ b/src/zarr/crud/_common.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from zarr.core.metadata.v2 import ArrayV2Metadata +from zarr.core.metadata.v3 import ArrayV3Metadata + +if TYPE_CHECKING: + from collections.abc import Mapping + + from zarr.core.common import JSON + + +def parse_array_metadata( + metadata: Mapping[str, JSON], +) -> ArrayV3Metadata | ArrayV2Metadata: + """Parse a metadata document into a v2 or v3 array metadata object.""" + data = dict(metadata) + if data.get("zarr_format") == 3: + return ArrayV3Metadata.from_dict(data) + return ArrayV2Metadata.from_dict(data) diff --git a/src/zarr/crud/_reference.py b/src/zarr/crud/_reference.py index e8b76571bd..2b48186e56 100644 --- a/src/zarr/crud/_reference.py +++ b/src/zarr/crud/_reference.py @@ -13,6 +13,7 @@ from zarr.core.metadata.v2 import ArrayV2Metadata from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGridMetadata from zarr.crud._backend import NodeExistsError +from zarr.crud._common import parse_array_metadata from zarr.errors import NodeNotFoundError from zarr.storage._common import StorePath @@ -23,16 +24,6 @@ from zarr.core.common import JSON -def _parse_array_metadata( - metadata: Mapping[str, JSON], -) -> ArrayV3Metadata | ArrayV2Metadata: - """Parse a metadata document into a v2 or v3 array metadata object.""" - data = dict(metadata) - if data.get("zarr_format") == 3: - return ArrayV3Metadata.from_dict(data) - return ArrayV2Metadata.from_dict(data) - - def _native_dtype(meta_obj: ArrayV3Metadata | ArrayV2Metadata) -> np.dtype[Any]: """Numpy dtype in native byte order (zarrs and the facade assume native).""" return meta_obj.dtype.to_native_dtype().newbyteorder("=") @@ -75,7 +66,7 @@ class ReferenceBackend: async def create_array( self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool ) -> None: - meta_obj = _parse_array_metadata(metadata) + meta_obj = parse_array_metadata(metadata) await self._create(store, path, meta_obj, meta_obj.zarr_format, overwrite=overwrite) async def create_group( @@ -118,7 +109,7 @@ async def read_metadata(self, store: Store, path: str) -> dict[str, JSON]: async def read_chunk( self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] ) -> bytes: - meta_obj = _parse_array_metadata(metadata) + meta_obj = parse_array_metadata(metadata) shape = _chunk_shape(meta_obj) np_dtype = _native_dtype(meta_obj) sp = StorePath(store, path.strip("/")) @@ -145,7 +136,7 @@ async def read_subset( start: Sequence[int], shape: Sequence[int], ) -> bytes: - meta_obj = _parse_array_metadata(metadata) + meta_obj = parse_array_metadata(metadata) np_dtype = _native_dtype(meta_obj) async_arr = AsyncArray(metadata=meta_obj, store_path=StorePath(store, path.strip("/"))) selection = tuple(slice(s, s + length) for s, length in zip(start, shape, strict=True)) @@ -160,7 +151,7 @@ async def write_chunk( coords: tuple[int, ...], data: bytes, ) -> None: - meta_obj = _parse_array_metadata(metadata) + meta_obj = parse_array_metadata(metadata) shape = _chunk_shape(meta_obj) np_dtype = _native_dtype(meta_obj) sp = StorePath(store, path.strip("/")) @@ -178,7 +169,7 @@ async def write_chunk( async def delete_chunk( self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] ) -> None: - meta_obj = _parse_array_metadata(metadata) + meta_obj = parse_array_metadata(metadata) sp = StorePath(store, path.strip("/")) await (sp / meta_obj.encode_chunk_key(coords)).delete() diff --git a/tests/crud/test_crud.py b/tests/crud/test_crud.py index c301725e22..2b406c0a5f 100644 --- a/tests/crud/test_crud.py +++ b/tests/crud/test_crud.py @@ -252,3 +252,9 @@ async def test_read_region_fancy_rejected(backend: str, store: Store) -> None: _, meta = filled(store) with pytest.raises(TypeError, match="only integers, slices"): await read_region(meta, store, "a", ([0, 1], slice(None)), backend=backend) # type: ignore[arg-type] + + +async def test_read_region_out_of_bounds(backend: str, store: Store) -> None: + _, meta = filled(store) + with pytest.raises(IndexError, match="out of bounds"): + await read_region(meta, store, "a", (8, slice(None)), backend=backend) From 26af3e4a015cf21cb8a4af1b6e344685aaba05fc Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 17:41:50 +0200 Subject: [PATCH 37/41] feat: ZarrsBackend conforms to CrudBackend; zarr.zarrs is now a backend Replaces the old flat-function API in `zarr.zarrs._api` with `ZarrsBackend` (a `CrudBackend` implementation) in `zarr.zarrs._backend`. Importing `zarr.zarrs` now registers the backend under the key `"zarrs"` via `zarr.crud.register_backend`, enabling `backend="zarrs"` in all crud facade functions and activating the previously-skipped zarrs params in `tests/crud/test_crud.py`. Old tests that exercised the removed flat API are deleted; cache tests are migrated to use the `zarr.crud` facade. Co-Authored-By: Claude Fable 5 --- src/zarr/zarrs/__init__.py | 50 +---- src/zarr/zarrs/_api.py | 428 ------------------------------------- src/zarr/zarrs/_backend.py | 143 +++++++++++++ tests/zarrs/test_api.py | 13 -- tests/zarrs/test_cache.py | 23 +- tests/zarrs/test_chunk.py | 226 -------------------- tests/zarrs/test_node.py | 143 ------------- 7 files changed, 165 insertions(+), 861 deletions(-) delete mode 100644 src/zarr/zarrs/_api.py create mode 100644 src/zarr/zarrs/_backend.py delete mode 100644 tests/zarrs/test_api.py delete mode 100644 tests/zarrs/test_chunk.py delete mode 100644 tests/zarrs/test_node.py diff --git a/src/zarr/zarrs/__init__.py b/src/zarr/zarrs/__init__.py index 1e287b9cfa..bff68ade62 100644 --- a/src/zarr/zarrs/__init__.py +++ b/src/zarr/zarrs/__init__.py @@ -1,13 +1,11 @@ """ -Low-level functional API for zarr hierarchies, backed by the Rust +The zarrs CRUD backend for `zarr.crud`, backed by the Rust [`zarrs`](https://zarrs.dev) crate. -This subpackage is experimental. It requires the `zarrs-bindings` package -(in-repo Rust crate; install for development with `uv sync --group zarrs`). - -All array routines take an explicit metadata document (a `dict` matching the -`zarr.json` / `.zarray` document) rather than reading metadata from the store, -which makes read-only and virtual views possible. +Importing this module registers the `"zarrs"` backend. Requires the +`zarrs-bindings` extension (in-repo Rust crate; `uv sync --group zarrs`). Select +it with `zarr.config.set({"crud.backend": "zarrs"})` or per call via +`backend="zarrs"`. """ try: @@ -18,39 +16,11 @@ "It is built from the zarr-python repository: run `uv sync --group zarrs`." ) from e +from zarr.crud import register_backend +from zarr.zarrs._backend import ZarrsBackend + __version__: str = _zarrs_bindings.version() -from zarr.zarrs._api import ( - NodeExistsError, - ZarrsOptions, - create_new_array, - create_new_group, - create_overwrite_array, - create_overwrite_group, - decode_chunk, - decode_region, - delete_node, - encode_chunk, - erase_chunk, - list_children, - read_encoded_chunk, - read_metadata, -) +register_backend("zarrs", ZarrsBackend()) -__all__ = [ - "NodeExistsError", - "ZarrsOptions", - "__version__", - "create_new_array", - "create_new_group", - "create_overwrite_array", - "create_overwrite_group", - "decode_chunk", - "decode_region", - "delete_node", - "encode_chunk", - "erase_chunk", - "list_children", - "read_encoded_chunk", - "read_metadata", -] +__all__ = ["ZarrsBackend", "__version__"] diff --git a/src/zarr/zarrs/_api.py b/src/zarr/zarrs/_api.py deleted file mode 100644 index df0804f719..0000000000 --- a/src/zarr/zarrs/_api.py +++ /dev/null @@ -1,428 +0,0 @@ -from __future__ import annotations - -import asyncio -import json -import operator -import types -from collections.abc import Sequence -from contextlib import contextmanager -from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, cast - -import _zarrs_bindings as _zb -import numpy as np - -from zarr.errors import NodeNotFoundError -from zarr.zarrs._bridge import resolve_store - -if TYPE_CHECKING: - from collections.abc import Iterator, Mapping - - import numpy.typing as npt - - from zarr.abc.store import Store - from zarr.core.common import JSON - -NodeExistsError = _zb.NodeExistsError -"""Raised by `create_new_*` when a node already exists at the target path.""" - - -@dataclass(frozen=True, slots=True) -class ZarrsOptions: - """Options for zarrs-backed operations. - - Currently empty: fields (concurrency limits, checksum validation) arrive in - a later phase. Accepting it now keeps signatures stable. - """ - - -BasicIndex = int | slice | types.EllipsisType -BasicSelection = BasicIndex | tuple[BasicIndex, ...] - - -def _node_path(path: str) -> str: - """Convert a zarr-python node path (`""`, `"foo/bar"`) to a zarrs node path - (`"/"`, `"/foo/bar"`).""" - return f"/{path.strip('/')}" - - -@contextmanager -def _translate_errors() -> Iterator[None]: - try: - yield - except _zb.NodeNotFoundError as err: - raise NodeNotFoundError(str(err)) from err - - -async def create_new_group( - metadata: Mapping[str, JSON], - store: Store, - path: str, - *, - options: ZarrsOptions | None = None, -) -> None: - """Create a group at `path` from a group metadata document. - - Raises `NodeExistsError` if any node already exists at `path`. - Creation is not atomic with respect to concurrent writers: a concurrent - creation at the same path can race the existence check. - """ - with _translate_errors(): - await asyncio.to_thread( - _zb.create_group, resolve_store(store), _node_path(path), json.dumps(metadata), False - ) - - -async def create_overwrite_group( - metadata: Mapping[str, JSON], - store: Store, - path: str, - *, - options: ZarrsOptions | None = None, -) -> None: - """Create a group at `path`, deleting any existing node (and its children) first. - - Creation is not atomic with respect to concurrent writers: a concurrent - creation at the same path can race the existence check. - """ - with _translate_errors(): - await asyncio.to_thread( - _zb.create_group, resolve_store(store), _node_path(path), json.dumps(metadata), True - ) - - -async def create_new_array( - metadata: Mapping[str, JSON], - store: Store, - path: str, - *, - options: ZarrsOptions | None = None, -) -> None: - """Create an array at `path` from a v2 or v3 array metadata document. - - Raises `NodeExistsError` if any node already exists at `path`. Creation is - not atomic with respect to concurrent writers: a concurrent creation at the - same path can race the existence check. - """ - with _translate_errors(): - await asyncio.to_thread( - _zb.create_array, resolve_store(store), _node_path(path), json.dumps(metadata), False - ) - - -async def create_overwrite_array( - metadata: Mapping[str, JSON], - store: Store, - path: str, - *, - options: ZarrsOptions | None = None, -) -> None: - """Create an array at `path`, deleting any existing node (and its children) - first. The delete-then-create sequence is not atomic with respect to - concurrent writers. - """ - with _translate_errors(): - await asyncio.to_thread( - _zb.create_array, resolve_store(store), _node_path(path), json.dumps(metadata), True - ) - - -async def read_metadata( - store: Store, - path: str, - *, - options: ZarrsOptions | None = None, -) -> dict[str, JSON]: - """Read the metadata document of the array or group at `path`. - - Raises `zarr.errors.NodeNotFoundError` if no node exists there. - """ - with _translate_errors(): - raw = await asyncio.to_thread(_zb.read_metadata, resolve_store(store), _node_path(path)) - return cast("dict[str, JSON]", json.loads(raw)) - - -async def delete_node( - store: Store, - path: str, - *, - options: ZarrsOptions | None = None, -) -> None: - """Delete the node at `path`, including all keys and child nodes under it. - - Raises `zarr.errors.NodeNotFoundError` if no node exists there. Deleting - the root node (`path=""`) clears the entire store. - """ - with _translate_errors(): - await asyncio.to_thread(_zb.delete_node, resolve_store(store), _node_path(path)) - - -async def list_children( - store: Store, - path: str, - *, - options: ZarrsOptions | None = None, -) -> list[tuple[str, dict[str, JSON]]]: - """List the direct children of the group at `path` as - `(path, metadata_document)` pairs. Paths are store-relative (no leading - `/`). - - Raises `zarr.errors.NodeNotFoundError` if no *group* exists at `path` -- - including when `path` holds an array. - """ - with _translate_errors(): - raw: list[tuple[str, str]] = await asyncio.to_thread( - _zb.list_children, resolve_store(store), _node_path(path) - ) - return [ - (child_path.lstrip("/"), cast("dict[str, JSON]", json.loads(doc))) - for child_path, doc in raw - ] - - -def _array_shape(metadata: Mapping[str, JSON]) -> tuple[int, ...]: - """Resolve the array shape from a metadata document.""" - shape = metadata.get("shape") - if not isinstance(shape, Sequence) or isinstance(shape, str): - raise TypeError("metadata document has no valid 'shape'") - result: list[int] = [] - for s in shape: - if not isinstance(s, (int, float)): - raise TypeError(f"shape element {s!r} is not a number") - if isinstance(s, float) and not s.is_integer(): - raise TypeError(f"shape element {s!r} is not an integer") - result.append(int(s)) - return tuple(result) - - -def _normalize_selection( - selection: BasicSelection, shape: tuple[int, ...] -) -> tuple[list[int], list[int], tuple[slice | int, ...]]: - """Normalize a numpy-style basic-indexing selection against `shape`. - - Returns `(start, bounding_shape, post_index)`: the step-1 bounding box to - fetch (per-dimension start and length), and the numpy index to apply to - the fetched block to produce the final result (strides, reversals, and - integer-axis removal). Only integers, slices, and `Ellipsis` are - supported; fancy indexing raises `TypeError`. - """ - sel_tuple = selection if isinstance(selection, tuple) else (selection,) - - n_ellipsis = sum(1 for s in sel_tuple if s is Ellipsis) - if n_ellipsis > 1: - raise IndexError("an index can only have a single ellipsis ('...')") - if n_ellipsis == 1: - i = sel_tuple.index(Ellipsis) - n_fill = len(shape) - (len(sel_tuple) - 1) - if n_fill < 0: - raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional") - sel_tuple = sel_tuple[:i] + (slice(None),) * n_fill + sel_tuple[i + 1 :] - if len(sel_tuple) > len(shape): - raise IndexError(f"too many indices for array: array is {len(shape)}-dimensional") - sel_tuple = sel_tuple + (slice(None),) * (len(shape) - len(sel_tuple)) - - starts: list[int] = [] - lengths: list[int] = [] - post: list[slice | int] = [] - for dim, (sel, size) in enumerate(zip(sel_tuple, shape, strict=True)): - if isinstance(sel, slice): - start, stop, step = sel.indices(size) - n = len(range(start, stop, step)) - if n == 0: - starts.append(0) - lengths.append(0) - post.append(slice(None)) - elif step > 0: - last = start + (n - 1) * step - starts.append(start) - lengths.append(last - start + 1) - post.append(slice(None, None, step)) - else: - # descending: bounding box is [last, start], ascending in store - # order; slice(None, None, step) over the block starts at its - # final element (global `start`) and lands exactly on index 0 - # (global `last`) because the block length is (n-1)*|step| + 1. - last = start + (n - 1) * step - starts.append(last) - lengths.append(start - last + 1) - post.append(slice(None, None, step)) - else: - assert not isinstance(sel, types.EllipsisType), "Ellipsis already expanded above" - try: - idx = operator.index(sel) - except TypeError: - raise TypeError( - "unsupported selection element " - f"{sel!r}: only integers, slices, and Ellipsis are supported" - ) from None - if idx < 0: - idx += size - if not 0 <= idx < size: - raise IndexError(f"index {sel} is out of bounds for axis {dim} with size {size}") - starts.append(idx) - lengths.append(1) - post.append(0) - return starts, lengths, tuple(post) - - -def _chunk_dtype_and_shape( - metadata: Mapping[str, JSON], -) -> tuple[np.dtype[Any], tuple[int, ...]]: - """Resolve the numpy dtype and chunk shape from a metadata document, using - zarr-python's own metadata parsing. - - The dtype is coerced to native byte order: zarrs always decodes to (and - encodes from) the native in-memory representation, applying any byte-order - codec itself. - """ - from zarr.core.metadata.v2 import ArrayV2Metadata - from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGridMetadata - - if metadata.get("zarr_format") == 3: - meta3 = ArrayV3Metadata.from_dict(dict(metadata)) - grid = meta3.chunk_grid - if not isinstance(grid, RegularChunkGridMetadata): - raise NotImplementedError("only regular chunk grids are supported") - return meta3.data_type.to_native_dtype().newbyteorder("="), grid.chunk_shape - meta2 = ArrayV2Metadata.from_dict(dict(metadata)) - return meta2.dtype.to_native_dtype().newbyteorder("="), meta2.chunks - - -async def decode_chunk( - metadata: Mapping[str, JSON], - store: Store, - path: str, - chunk_coords: tuple[int, ...], - *, - selection: tuple[slice | int, ...] | None = None, - options: ZarrsOptions | None = None, -) -> np.ndarray[Any, np.dtype[Any]]: - """Read and decode the chunk at `chunk_coords` of the array described by - `metadata`, located at `path` in `store`. - - The metadata document is authoritative: it is not read from the store. - Missing chunks decode to the fill value. `selection` (a chunk-relative - subset) is not implemented yet. - - The returned array is a read-only, zero-copy view over the decoded bytes; - call `.copy()` if you need a writable array. - """ - if selection is not None: - raise NotImplementedError("chunk subset selection is not implemented yet") - raw = await asyncio.to_thread( - _zb.retrieve_chunk, - resolve_store(store), - _node_path(path), - json.dumps(metadata), - list(chunk_coords), - ) - dtype, chunk_shape = _chunk_dtype_and_shape(metadata) - return np.frombuffer(raw, dtype=dtype).reshape(chunk_shape) - - -async def read_encoded_chunk( - metadata: Mapping[str, JSON], - store: Store, - path: str, - chunk_coords: tuple[int, ...], - *, - options: ZarrsOptions | None = None, -) -> bytes | None: - """Read the raw, still-encoded bytes of the chunk at `chunk_coords`, or - `None` if the chunk does not exist. No codecs are applied.""" - result: bytes | None = await asyncio.to_thread( - _zb.retrieve_encoded_chunk, - resolve_store(store), - _node_path(path), - json.dumps(metadata), - list(chunk_coords), - ) - return result - - -async def encode_chunk( - metadata: Mapping[str, JSON], - store: Store, - path: str, - chunk_coords: tuple[int, ...], - value: npt.ArrayLike, - *, - options: ZarrsOptions | None = None, -) -> None: - """Encode `value` with the codecs in `metadata` and store it as the chunk - at `chunk_coords`. `value` must match the chunk shape exactly.""" - dtype, chunk_shape = _chunk_dtype_and_shape(metadata) - arr = np.ascontiguousarray(np.asarray(value, dtype=dtype)) - if arr.shape != chunk_shape: - raise ValueError(f"value shape {arr.shape} does not match chunk shape {chunk_shape}") - await asyncio.to_thread( - _zb.store_chunk, - resolve_store(store), - _node_path(path), - json.dumps(metadata), - list(chunk_coords), - arr.tobytes(), - ) - - -async def erase_chunk( - metadata: Mapping[str, JSON], - store: Store, - path: str, - chunk_coords: tuple[int, ...], - *, - options: ZarrsOptions | None = None, -) -> None: - """Delete the chunk at `chunk_coords`. Deleting a missing chunk is a no-op.""" - await asyncio.to_thread( - _zb.erase_chunk, - resolve_store(store), - _node_path(path), - json.dumps(metadata), - list(chunk_coords), - ) - - -async def decode_region( - metadata: Mapping[str, JSON], - store: Store, - path: str, - selection: BasicSelection, - *, - options: ZarrsOptions | None = None, -) -> np.ndarray[Any, np.dtype[Any]]: - """Read and decode the region of the array described by `metadata` given - by a numpy-style basic-indexing `selection` (integers, slices including - steps, `Ellipsis`). - - The metadata document is authoritative: it is not read from the store. - One zarrs call fetches the step-1 bounding box of the selection (decoding - all overlapping chunks, in parallel for multi-chunk regions); strides, - reversals, and integer-axis removal are applied as numpy views on the - result. Missing chunks decode to the fill value. Fancy indexing (integer - or boolean arrays) is not supported and raises `TypeError`. The returned - array is a read-only view; call `.copy()` if you need a writable array. - - Note: zarrs fetches the step-1 bounding box of the selection. A selection - like `slice(0, N, step)` reads `O(N)` bytes from the store even though only - `O(N / step)` are returned; for sparse selections over large arrays, prefer - reading per-chunk with `decode_chunk`. - """ - dtype, _ = _chunk_dtype_and_shape(metadata) - shape = _array_shape(metadata) - starts, lengths, post_index = _normalize_selection(selection, shape) - if 0 in lengths: - block = np.empty(lengths, dtype=dtype) - block.flags.writeable = False - else: - raw = await asyncio.to_thread( - _zb.retrieve_array_subset, - resolve_store(store), - _node_path(path), - json.dumps(metadata), - starts, - lengths, - ) - block = np.frombuffer(raw, dtype=dtype).reshape(lengths) - result: np.ndarray[Any, np.dtype[Any]] = block[post_index] - return result diff --git a/src/zarr/zarrs/_backend.py b/src/zarr/zarrs/_backend.py new file mode 100644 index 0000000000..196cc10af0 --- /dev/null +++ b/src/zarr/zarrs/_backend.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import asyncio +import json +from contextlib import contextmanager +from typing import TYPE_CHECKING, cast + +import _zarrs_bindings as _zb + +from zarr.crud import NodeExistsError +from zarr.errors import NodeNotFoundError +from zarr.zarrs._bridge import resolve_store + +if TYPE_CHECKING: + from collections.abc import Iterator, Mapping, Sequence + + from zarr.abc.store import Store + from zarr.core.common import JSON + + +def _node_path(path: str) -> str: + """Convert a zarr path (`""`, `"foo/bar"`) to a zarrs node path (`"/"`, + `"/foo/bar"`).""" + return f"/{path.strip('/')}" + + +@contextmanager +def _translate_errors() -> Iterator[None]: + try: + yield + except _zb.NodeNotFoundError as err: + raise NodeNotFoundError(str(err)) from err + except _zb.NodeExistsError as err: + raise NodeExistsError(str(err)) from err + + +class ZarrsBackend: + """CRUD backend backed by the Rust `zarrs` crate via `_zarrs_bindings`. + + Owns the zarrs-specific plumbing: JSON-serializing the metadata document, + the `/`-prefixed node-path form, store resolution, offloading the blocking + Rust calls to a worker thread, and translating binding exceptions to the + canonical `zarr.crud` / `zarr.errors` types. + """ + + async def create_array( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: + with _translate_errors(): + await asyncio.to_thread( + _zb.create_array, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + overwrite, + ) + + async def create_group( + self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool + ) -> None: + with _translate_errors(): + await asyncio.to_thread( + _zb.create_group, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + overwrite, + ) + + async def read_metadata(self, store: Store, path: str) -> dict[str, JSON]: + with _translate_errors(): + raw = await asyncio.to_thread(_zb.read_metadata, resolve_store(store), _node_path(path)) + return cast("dict[str, JSON]", json.loads(raw)) + + async def read_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> bytes: + return await asyncio.to_thread( + _zb.retrieve_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(coords), + ) + + async def read_subset( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + start: Sequence[int], + shape: Sequence[int], + ) -> bytes: + return await asyncio.to_thread( + _zb.retrieve_array_subset, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(start), + list(shape), + ) + + async def write_chunk( + self, + store: Store, + path: str, + metadata: Mapping[str, JSON], + coords: tuple[int, ...], + data: bytes, + ) -> None: + await asyncio.to_thread( + _zb.store_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(coords), + data, + ) + + async def delete_chunk( + self, store: Store, path: str, metadata: Mapping[str, JSON], coords: tuple[int, ...] + ) -> None: + await asyncio.to_thread( + _zb.erase_chunk, + resolve_store(store), + _node_path(path), + json.dumps(metadata), + list(coords), + ) + + async def delete_node(self, store: Store, path: str) -> None: + with _translate_errors(): + await asyncio.to_thread(_zb.delete_node, resolve_store(store), _node_path(path)) + + async def list_children(self, store: Store, path: str) -> list[tuple[str, dict[str, JSON]]]: + with _translate_errors(): + raw: list[tuple[str, str]] = await asyncio.to_thread( + _zb.list_children, resolve_store(store), _node_path(path) + ) + return [ + (child_path.lstrip("/"), cast("dict[str, JSON]", json.loads(doc))) + for child_path, doc in raw + ] diff --git a/tests/zarrs/test_api.py b/tests/zarrs/test_api.py deleted file mode 100644 index 1a3e9005e2..0000000000 --- a/tests/zarrs/test_api.py +++ /dev/null @@ -1,13 +0,0 @@ -from __future__ import annotations - -import pytest - -pytest.importorskip( - "_zarrs_bindings", reason="zarrs-bindings is not installed", exc_type=ImportError -) - - -def test_import() -> None: - import zarr.zarrs - - assert isinstance(zarr.zarrs.__version__, str) diff --git a/tests/zarrs/test_cache.py b/tests/zarrs/test_cache.py index 41e3e24f29..9af72555d8 100644 --- a/tests/zarrs/test_cache.py +++ b/tests/zarrs/test_cache.py @@ -12,8 +12,9 @@ import _zarrs_bindings as zb import zarr +import zarr.zarrs # registers the "zarrs" CrudBackend +from zarr.crud import read_chunk, write_chunk from zarr.storage import LocalStore, MemoryStore -from zarr.zarrs import decode_chunk, encode_chunk if TYPE_CHECKING: from pathlib import Path @@ -34,17 +35,17 @@ async def test_localstore_populates_cache(tmp_path: Path) -> None: store = await LocalStore.open(root=tmp_path / "s") meta = _meta(store) assert zb.array_cache_len() == 0 - await decode_chunk(meta, store, "a", (0, 0)) + await read_chunk(meta, store, "a", (0, 0), backend="zarrs") assert zb.array_cache_len() == 1 # second op on the SAME array reuses the entry, does not grow the cache - await decode_chunk(meta, store, "a", (1, 1)) + await read_chunk(meta, store, "a", (1, 1), backend="zarrs") assert zb.array_cache_len() == 1 async def test_memorystore_is_not_cached() -> None: store = MemoryStore() meta = _meta(store) - await decode_chunk(meta, store, "a", (0, 0)) + await read_chunk(meta, store, "a", (0, 0), backend="zarrs") assert zb.array_cache_len() == 0 @@ -52,8 +53,8 @@ async def test_distinct_metadata_distinct_entries(tmp_path: Path) -> None: store = await LocalStore.open(root=tmp_path / "s") meta_a = _meta(store, "a") meta_b = _meta(store, "b") - await decode_chunk(meta_a, store, "a", (0, 0)) - await decode_chunk(meta_b, store, "b", (0, 0)) + await read_chunk(meta_a, store, "a", (0, 0), backend="zarrs") + await read_chunk(meta_b, store, "b", (0, 0), backend="zarrs") assert zb.array_cache_len() == 2 @@ -67,8 +68,8 @@ async def test_cache_keyed_on_root_not_just_metadata(tmp_path: Path) -> None: a2 = zarr.create_array(store=s2, name="a", shape=(4, 4), chunks=(4, 4), dtype="uint16") a2[:, :] = 2 meta = dict(a1.metadata.to_dict()) # identical metadata document - out1 = await decode_chunk(meta, s1, "a", (0, 0)) - out2 = await decode_chunk(meta, s2, "a", (0, 0)) + out1 = await read_chunk(meta, s1, "a", (0, 0), backend="zarrs") + out2 = await read_chunk(meta, s2, "a", (0, 0), backend="zarrs") np.testing.assert_array_equal(out1, np.full((4, 4), 1, dtype="uint16")) np.testing.assert_array_equal(out2, np.full((4, 4), 2, dtype="uint16")) assert zb.array_cache_len() == 2 @@ -79,8 +80,8 @@ async def test_cache_reflects_writes_through_store(tmp_path: Path) -> None: # a subsequent read (proves the cache does not stale-cache chunk data) store = await LocalStore.open(root=tmp_path / "s") meta = _meta(store) - await decode_chunk(meta, store, "a", (0, 0)) # caches the Array + await read_chunk(meta, store, "a", (0, 0), backend="zarrs") # caches the Array new = np.full((4, 4), 99, dtype="uint16") - await encode_chunk(meta, store, "a", (0, 0), new) # write via (cached) Array - out = await decode_chunk(meta, store, "a", (0, 0)) + await write_chunk(meta, store, "a", (0, 0), new, backend="zarrs") # write via (cached) Array + out = await read_chunk(meta, store, "a", (0, 0), backend="zarrs") np.testing.assert_array_equal(out, new) diff --git a/tests/zarrs/test_chunk.py b/tests/zarrs/test_chunk.py deleted file mode 100644 index 6b4964413c..0000000000 --- a/tests/zarrs/test_chunk.py +++ /dev/null @@ -1,226 +0,0 @@ -from __future__ import annotations - -import copy -from typing import TYPE_CHECKING, Any - -import numpy as np -import pytest - -pytest.importorskip( - "_zarrs_bindings", reason="zarrs-bindings is not installed", exc_type=ImportError -) - -import zarr -from tests.zarrs.conftest import array_metadata -from zarr.codecs import BloscCodec, GzipCodec, ZstdCodec -from zarr.core.buffer.core import default_buffer_prototype -from zarr.zarrs import ( - create_new_array, - decode_chunk, - decode_region, - encode_chunk, - erase_chunk, - read_encoded_chunk, -) - -if TYPE_CHECKING: - from zarr.abc.store import Store - - -def _filled(store: Store, **kwargs: Any) -> tuple[np.ndarray[Any, np.dtype[Any]], dict[str, Any]]: - """Create an 8x8 array named 'a' via zarr-python, fill it with a ramp, and - return (data, metadata_document).""" - params: dict[str, Any] = {"shape": (8, 8), "chunks": (4, 4), "dtype": "uint16"} | kwargs - arr = zarr.create_array(store=store, name="a", **params) - data = np.arange(64, dtype=params["dtype"]).reshape(8, 8) - arr[:, :] = data - doc = dict(arr.metadata.to_dict()) - if params.get("zarr_format") == 2: - # v2 attributes live in .zattrs, not in the .zarray document - doc.pop("attributes", None) - return data, doc - - -@pytest.mark.parametrize("dtype", ["uint8", "int32", "float64"]) -async def test_decode_chunk_differential(store: Store, dtype: str) -> None: - data, meta = _filled(store, dtype=dtype) - observed = await decode_chunk(meta, store, "a", (1, 0)) - np.testing.assert_array_equal(observed, data[4:8, 0:4]) - - -@pytest.mark.parametrize( - "compressors", [None, (GzipCodec(),), (ZstdCodec(),), (BloscCodec(cname="lz4"),)] -) -async def test_decode_chunk_codecs(store: Store, compressors: Any) -> None: - data, meta = _filled(store, compressors=compressors) - observed = await decode_chunk(meta, store, "a", (0, 1)) - np.testing.assert_array_equal(observed, data[0:4, 4:8]) - - -async def test_decode_chunk_v2(store: Store) -> None: - data, meta = _filled(store, zarr_format=2) - observed = await decode_chunk(meta, store, "a", (1, 1)) - np.testing.assert_array_equal(observed, data[4:8, 4:8]) - - -async def test_decode_chunk_v2_big_endian(store: Store) -> None: - data, meta = _filled(store, dtype=">u2", zarr_format=2) - observed = await decode_chunk(meta, store, "a", (1, 1)) - np.testing.assert_array_equal(observed, data[4:8, 4:8]) - - -async def test_encode_chunk_v2_big_endian(store: Store) -> None: - meta = array_metadata(dtype=">u2", zarr_format=2) - await create_new_array(meta, store, "a") - value = np.arange(16, dtype="uint16").reshape(4, 4) - await encode_chunk(meta, store, "a", (0, 1), value) - arr = zarr.open_array(store=store, path="a", mode="r") - np.testing.assert_array_equal(arr[0:4, 4:8], value) - - -async def test_decode_chunk_readonly(store: Store) -> None: - _, meta = _filled(store) - observed = await decode_chunk(meta, store, "a", (0, 0)) - assert not observed.flags.writeable - - -async def test_decode_chunk_sharding(store: Store) -> None: - # with sharding, the metadata chunk grid is the shard grid - data, meta = _filled(store, chunks=(2, 2), shards=(4, 4)) - observed = await decode_chunk(meta, store, "a", (1, 1)) - np.testing.assert_array_equal(observed, data[4:8, 4:8]) - - -async def test_decode_chunk_missing_returns_fill_value(store: Store) -> None: - arr = zarr.create_array( - store=store, name="a", shape=(8, 8), chunks=(4, 4), dtype="uint16", fill_value=7 - ) - meta = dict(arr.metadata.to_dict()) - observed = await decode_chunk(meta, store, "a", (0, 0)) - np.testing.assert_array_equal(observed, np.full((4, 4), 7, dtype="uint16")) - - -async def test_decode_chunk_selection_not_implemented(store: Store) -> None: - _, meta = _filled(store) - with pytest.raises(NotImplementedError): - await decode_chunk(meta, store, "a", (0, 0), selection=(slice(0, 2), slice(0, 2))) - - -async def test_decode_chunk_metadata_view(store: Store) -> None: - # the read-only-view case: decode with a metadata document the store never saw - data, meta = _filled(store, dtype="uint16", compressors=None) - view = copy.deepcopy(meta) - view["data_type"] = "uint8" - view["shape"] = [8, 16] - view["chunk_grid"]["configuration"]["chunk_shape"] = [4, 8] - observed = await decode_chunk(view, store, "a", (1, 0)) - np.testing.assert_array_equal(observed, data[4:8, 0:4].view("uint8")) - - -async def test_encode_chunk_differential(store: Store) -> None: - meta = array_metadata() - await create_new_array(meta, store, "a") - value = np.arange(16, dtype="uint16").reshape(4, 4) - await encode_chunk(meta, store, "a", (0, 1), value) - arr = zarr.open_array(store=store, path="a", mode="r") - np.testing.assert_array_equal(arr[0:4, 4:8], value) - - -async def test_encode_chunk_shape_mismatch(store: Store) -> None: - meta = array_metadata() - await create_new_array(meta, store, "a") - with pytest.raises(ValueError, match="chunk shape"): - await encode_chunk(meta, store, "a", (0, 0), np.zeros((2, 2), dtype="uint16")) - - -async def test_read_encoded_chunk_matches_store(store: Store) -> None: - _, meta = _filled(store) - raw = await read_encoded_chunk(meta, store, "a", (0, 0)) - expected = await store.get("a/c/0/0", prototype=default_buffer_prototype()) - assert expected is not None - assert raw == expected.to_bytes() - - -async def test_read_encoded_chunk_missing_returns_none(store: Store) -> None: - arr = zarr.create_array(store=store, name="empty", shape=(8, 8), chunks=(4, 4), dtype="uint16") - meta = dict(arr.metadata.to_dict()) - assert await read_encoded_chunk(meta, store, "empty", (0, 0)) is None - - -async def test_erase_chunk(store: Store) -> None: - _, meta = _filled(store) - assert await store.exists("a/c/0/0") - await erase_chunk(meta, store, "a", (0, 0)) - assert not await store.exists("a/c/0/0") - arr = zarr.open_array(store=store, path="a", mode="r") - np.testing.assert_array_equal(arr[0:4, 0:4], np.zeros((4, 4), dtype="uint16")) - - -SELECTIONS: list[Any] = [ - (slice(None), slice(None)), - (slice(2, 7), slice(1, 5)), # crosses chunk boundaries - (slice(None), 3), - (5, slice(None)), - (3, 4), # fully scalar -> 0-d result - (slice(1, 8, 2), slice(None)), - (slice(None), slice(6, 1, -2)), # negative step - (slice(-3, None), slice(None, -1)), # negative bounds - ..., # Ellipsis alone - (..., slice(2, 4)), - (slice(0, 0), slice(None)), # empty - (slice(2, 6),), # partial selection, missing trailing dims -] - - -@pytest.mark.parametrize("sel", SELECTIONS) -async def test_decode_region_differential(store: Store, sel: Any) -> None: - data, meta = _filled(store) - observed = await decode_region(meta, store, "a", sel) - np.testing.assert_array_equal(observed, data[sel]) - - -async def test_decode_region_sharding(store: Store) -> None: - data, meta = _filled(store, chunks=(2, 2), shards=(4, 4)) - observed = await decode_region(meta, store, "a", (slice(1, 7), slice(3, 8))) - np.testing.assert_array_equal(observed, data[1:7, 3:8]) - - -async def test_decode_region_v2(store: Store) -> None: - data, meta = _filled(store, zarr_format=2) - observed = await decode_region(meta, store, "a", (slice(2, 7), slice(None, None, 3))) - np.testing.assert_array_equal(observed, data[2:7, ::3]) - - -async def test_decode_region_missing_chunks_fill_value(store: Store) -> None: - arr = zarr.create_array( - store=store, name="a", shape=(8, 8), chunks=(4, 4), dtype="uint16", fill_value=7 - ) - meta = dict(arr.metadata.to_dict()) - observed = await decode_region(meta, store, "a", (slice(2, 6), slice(2, 6))) - np.testing.assert_array_equal(observed, np.full((4, 4), 7, dtype="uint16")) - - -async def test_decode_region_out_of_bounds(store: Store) -> None: - _, meta = _filled(store) - with pytest.raises(IndexError, match="out of bounds"): - await decode_region(meta, store, "a", (8, slice(None))) - - -async def test_decode_region_too_many_indices(store: Store) -> None: - _, meta = _filled(store) - with pytest.raises(IndexError, match="too many indices"): - await decode_region(meta, store, "a", (0, 0, 0)) - - -async def test_decode_region_fancy_indexing_rejected(store: Store) -> None: - _, meta = _filled(store) - with pytest.raises(TypeError, match="only integers, slices"): - await decode_region(meta, store, "a", ([0, 1], slice(None))) # type: ignore[arg-type] - - -async def test_decode_region_readonly(store: Store) -> None: - _, meta = _filled(store) - observed = await decode_region(meta, store, "a", (slice(0, 4), slice(0, 4))) - assert not observed.flags.writeable - empty = await decode_region(meta, store, "a", (slice(0, 0), slice(None))) - assert not empty.flags.writeable diff --git a/tests/zarrs/test_node.py b/tests/zarrs/test_node.py deleted file mode 100644 index eefb4cf137..0000000000 --- a/tests/zarrs/test_node.py +++ /dev/null @@ -1,143 +0,0 @@ -from __future__ import annotations - -import json -from typing import TYPE_CHECKING, Any - -import numpy as np -import pytest - -pytest.importorskip( - "_zarrs_bindings", reason="zarrs-bindings is not installed", exc_type=ImportError -) - -import zarr -from tests.zarrs.conftest import array_metadata -from zarr.core.buffer.core import default_buffer_prototype -from zarr.errors import NodeNotFoundError -from zarr.zarrs import ( - NodeExistsError, - create_new_array, - create_new_group, - create_overwrite_array, - create_overwrite_group, - delete_node, - list_children, - read_metadata, -) - -if TYPE_CHECKING: - from zarr.abc.store import Store - -GROUP_META: dict[str, Any] = { - "zarr_format": 3, - "node_type": "group", - "attributes": {"answer": 42}, -} - - -async def test_create_new_group(store: Store) -> None: - await create_new_group(GROUP_META, store, "foo") - group = zarr.open_group(store=store, path="foo", mode="r") - assert dict(group.attrs) == {"answer": 42} - - -async def test_create_new_group_at_root(store: Store) -> None: - await create_new_group(GROUP_META, store, "") - group = zarr.open_group(store=store, mode="r") - assert dict(group.attrs) == {"answer": 42} - - -async def test_create_new_group_existing_node(store: Store) -> None: - await create_new_group(GROUP_META, store, "foo") - with pytest.raises(NodeExistsError): - await create_new_group(GROUP_META, store, "foo") - - -async def test_create_overwrite_group(store: Store) -> None: - # an array and its chunks previously occupied the path; overwrite removes both - arr = zarr.create_array(store=store, name="foo", shape=(4,), chunks=(2,), dtype="uint8") - arr[:] = 1 - assert await store.exists("foo/c/0") - await create_overwrite_group(GROUP_META, store, "foo") - group = zarr.open_group(store=store, path="foo", mode="r") - assert dict(group.attrs) == {"answer": 42} - assert not await store.exists("foo/c/0") - assert await store.get("foo/zarr.json", prototype=default_buffer_prototype()) is not None - - -async def test_create_new_array(store: Store) -> None: - await create_new_array(array_metadata(), store, "arr") - arr = zarr.open_array(store=store, path="arr", mode="r") - assert arr.shape == (8, 8) - assert arr.chunks == (4, 4) - assert arr.dtype == np.dtype("uint16") - - -async def test_create_new_array_existing_node(store: Store) -> None: - await create_new_array(array_metadata(), store, "arr") - with pytest.raises(NodeExistsError): - await create_new_array(array_metadata(), store, "arr") - - -async def test_create_overwrite_array(store: Store) -> None: - zarr.create_group(store=store, path="arr") - await create_overwrite_array(array_metadata(), store, "arr") - arr = zarr.open_array(store=store, path="arr", mode="r") - assert arr.shape == (8, 8) - - -async def test_create_new_array_v2(store: Store) -> None: - await create_new_array(array_metadata(zarr_format=2), store, "arr") - arr = zarr.open_array(store=store, path="arr", mode="r") - assert arr.metadata.zarr_format == 2 - assert arr.shape == (8, 8) - - -async def test_read_metadata_matches_stored_document(store: Store) -> None: - await create_new_array(array_metadata(), store, "arr") - observed = await read_metadata(store, "arr") - raw = await store.get("arr/zarr.json", prototype=default_buffer_prototype()) - assert raw is not None - assert observed == json.loads(raw.to_bytes()) - - -async def test_read_metadata_zarr_python_group(store: Store) -> None: - zarr.create_group(store=store, path="g", attributes={"a": 1}) - observed = await read_metadata(store, "g") - assert observed["node_type"] == "group" - assert observed["attributes"] == {"a": 1} - - -async def test_read_metadata_missing(store: Store) -> None: - with pytest.raises(NodeNotFoundError): - await read_metadata(store, "nope") - - -async def test_delete_node(store: Store) -> None: - arr = zarr.create_array(store=store, name="doomed", shape=(4,), chunks=(2,), dtype="uint8") - arr[:] = 1 - await delete_node(store, "doomed") - assert not await store.exists("doomed/zarr.json") - assert not await store.exists("doomed/c/0") - - -async def test_delete_node_missing(store: Store) -> None: - with pytest.raises(NodeNotFoundError): - await delete_node(store, "nope") - - -async def test_list_children(store: Store) -> None: - root = zarr.create_group(store=store) - root.create_group("sub_group", attributes={"kind": "group"}) - root.create_array("sub_array", shape=(4,), chunks=(2,), dtype="uint8") - children = await list_children(store, "") - by_path = dict(children) - assert set(by_path) == {"sub_group", "sub_array"} - assert not any(p.startswith("/") for p in by_path) - assert by_path["sub_group"]["node_type"] == "group" - assert by_path["sub_array"]["node_type"] == "array" - - -async def test_list_children_missing(store: Store) -> None: - with pytest.raises(NodeNotFoundError): - await list_children(store, "nope") From 03c80d743c820071fab3d4e47210f5b5836c8201 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 18:44:21 +0200 Subject: [PATCH 38/41] docs/ci: zarr.crud changelog and CI coverage Co-Authored-By: Claude Fable 5 --- .github/workflows/zarrs.yml | 2 +- changes/+zarrs-bindings.feature.md | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/workflows/zarrs.yml b/.github/workflows/zarrs.yml index 954dcda79b..b17cfb3a3b 100644 --- a/.github/workflows/zarrs.yml +++ b/.github/workflows/zarrs.yml @@ -31,4 +31,4 @@ jobs: - name: Run zarrs bindings tests # the ubuntu runner image ships a Rust toolchain; the maturin build # backend is fetched by uv on demand - run: uv run --group zarrs pytest tests/zarrs -v + run: uv run --group zarrs pytest tests/crud tests/zarrs -v diff --git a/changes/+zarrs-bindings.feature.md b/changes/+zarrs-bindings.feature.md index 0b0e7ee384..26216a71ae 100644 --- a/changes/+zarrs-bindings.feature.md +++ b/changes/+zarrs-bindings.feature.md @@ -1,6 +1,9 @@ -Added `zarr.zarrs`, an experimental low-level functional API for zarr hierarchy -CRUD backed by the Rust [zarrs](https://zarrs.dev) crate via the new in-repo -`zarrs-bindings` PyO3 crate. Array routines take an explicit metadata document, -enabling read-only views such as decoding chunks with externally supplied -metadata or reading raw encoded chunk bytes. Build for development with -`uv sync --group zarrs`. +Added `zarr.crud`, an experimental backend-agnostic low-level functional API for +zarr hierarchy CRUD (`create_*`, `read_chunk`, `read_region`, `read_encoded_chunk`, +`write_chunk`, `delete_chunk`, `read_metadata`, `delete_node`, `list_children`). +Array routines take an explicit metadata document, enabling read-only views. +Operations delegate to a pluggable `CrudBackend`: a pure-Python reference backend +(the default) or the zarrs-accelerated backend in `zarr.zarrs`, backed by the Rust +[zarrs](https://zarrs.dev) crate via the in-repo `zarrs-bindings` PyO3 crate. +Select a backend with the `crud.backend` config key or a per-call `backend=` +argument. Build the zarrs backend for development with `uv sync --group zarrs`. From c8ce00e5e84bdf2b7efdd4d73dee8216b6d69ba6 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 19:19:38 +0200 Subject: [PATCH 39/41] fix: reference backend matches zarrs on v2 groups and all-fill chunk drops - Add ZGROUP_JSON + a _node_exists() helper probing all three meta-key variants (zarr.json / .zarray / .zgroup) so v2 groups are visible to every CRUD operation; rewrite _create/read_metadata/delete_node/list_children to use it; remove dead _meta_key(). - Add _is_all_fill_value() and drop all-fill chunks in write_chunk to match zarrs's sparse-storage convention. - Add differential tests for v2 groups, v2 array read_metadata, and all-fill chunk writes. Co-Authored-By: Claude Fable 5 --- src/zarr/crud/_reference.py | 84 ++++++++++++++++++------------------- tests/crud/test_crud.py | 38 +++++++++++++++++ 2 files changed, 78 insertions(+), 44 deletions(-) diff --git a/src/zarr/crud/_reference.py b/src/zarr/crud/_reference.py index 2b48186e56..78db4c6e04 100644 --- a/src/zarr/crud/_reference.py +++ b/src/zarr/crud/_reference.py @@ -7,7 +7,7 @@ from zarr.core.array import AsyncArray, create_codec_pipeline from zarr.core.array_spec import ArrayConfig, ArraySpec from zarr.core.buffer.core import NDBuffer, default_buffer_prototype -from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON +from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON from zarr.core.group import GroupMetadata from zarr.core.metadata.io import save_metadata from zarr.core.metadata.v2 import ArrayV2Metadata @@ -49,10 +49,16 @@ def _array_spec(meta_obj: ArrayV3Metadata | ArrayV2Metadata, shape: tuple[int, . ) -def _meta_key(path: str, zarr_format: int) -> str: - fname = ZARR_JSON if zarr_format == 3 else ZARRAY_JSON - p = path.strip("/") - return f"{p}/{fname}" if p else fname +def _is_all_fill_value( + arr: np.ndarray[Any, np.dtype[Any]], fill_value: Any, dtype: np.dtype[Any] +) -> bool: + """Whether every element of `arr` equals the fill value (NaN-aware for floats).""" + if fill_value is None: + return False + fill = np.asarray(fill_value, dtype=dtype) + if np.issubdtype(dtype, np.floating) or np.issubdtype(dtype, np.complexfloating): + return bool(np.array_equal(arr, np.broadcast_to(fill, arr.shape), equal_nan=True)) + return bool(np.all(arr == fill)) class ReferenceBackend: @@ -63,47 +69,50 @@ class ReferenceBackend: subset reads, which is exactly the `BasicIndexer` + codec-pipeline read path. """ + async def _node_exists(self, store: Store, path: str) -> bool: + proto = default_buffer_prototype() + sp = StorePath(store, path.strip("/")) + for meta_key in (ZARR_JSON, ZARRAY_JSON, ZGROUP_JSON): + if await (sp / meta_key).get(prototype=proto) is not None: + return True + return False + async def create_array( self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool ) -> None: meta_obj = parse_array_metadata(metadata) - await self._create(store, path, meta_obj, meta_obj.zarr_format, overwrite=overwrite) + await self._create(store, path, meta_obj, overwrite=overwrite) async def create_group( self, store: Store, path: str, metadata: Mapping[str, JSON], *, overwrite: bool ) -> None: meta_obj = GroupMetadata.from_dict(dict(metadata)) - await self._create(store, path, meta_obj, meta_obj.zarr_format, overwrite=overwrite) + await self._create(store, path, meta_obj, overwrite=overwrite) - async def _create( - self, store: Store, path: str, meta_obj: Any, zarr_format: int, *, overwrite: bool - ) -> None: + async def _create(self, store: Store, path: str, meta_obj: Any, *, overwrite: bool) -> None: sp = StorePath(store, path.strip("/")) - proto = default_buffer_prototype() if overwrite: await store.delete_dir(path.strip("/")) - else: - key = _meta_key(path, zarr_format) - if await store.get(key, prototype=proto) is not None: - raise NodeExistsError(f"a node already exists at path {path!r}") + elif await self._node_exists(store, path): + raise NodeExistsError(f"a node already exists at path {path!r}") await save_metadata(sp, meta_obj, ensure_parents=True) async def read_metadata(self, store: Store, path: str) -> dict[str, JSON]: from zarr.core._json import buffer_to_json_object proto = default_buffer_prototype() - p = path.strip("/") - sp = StorePath(store, p) + sp = StorePath(store, path.strip("/")) buf = await (sp / ZARR_JSON).get(prototype=proto) if buf is not None: return buffer_to_json_object(buf) - buf2 = await (sp / ZARRAY_JSON).get(prototype=proto) - if buf2 is not None: - doc = buffer_to_json_object(buf2) - zattrs = await (sp / ZATTRS_JSON).get(prototype=proto) - if zattrs is not None: - doc["attributes"] = buffer_to_json_object(zattrs) - return doc + for meta_key in (ZARRAY_JSON, ZGROUP_JSON): + b = await (sp / meta_key).get(prototype=proto) + if b is not None: + doc = buffer_to_json_object(b) + zattrs = await (sp / ZATTRS_JSON).get(prototype=proto) + if zattrs is not None: + doc["attributes"] = buffer_to_json_object(zattrs) + return doc raise NodeNotFoundError(f"no node found at path {path!r}") async def read_chunk( @@ -157,6 +166,9 @@ async def write_chunk( sp = StorePath(store, path.strip("/")) chunk_key = meta_obj.encode_chunk_key(coords) arr = np.frombuffer(data, dtype=np_dtype).reshape(shape) + if _is_all_fill_value(arr, meta_obj.fill_value, np_dtype): + await (sp / chunk_key).delete() + return pipeline = create_codec_pipeline(meta_obj) spec = _array_spec(meta_obj, shape) encoded = list(await pipeline.encode([(NDBuffer.from_ndarray_like(arr), spec)])) @@ -174,34 +186,18 @@ async def delete_chunk( await (sp / meta_obj.encode_chunk_key(coords)).delete() async def delete_node(self, store: Store, path: str) -> None: - proto = default_buffer_prototype() - p = path.strip("/") - sp = StorePath(store, p) - present = ( - await (sp / ZARR_JSON).get(prototype=proto) is not None - or await (sp / ZARRAY_JSON).get(prototype=proto) is not None - ) - if not present: + if not await self._node_exists(store, path): raise NodeNotFoundError(f"no node found at path {path!r}") - await store.delete_dir(p) + await store.delete_dir(path.strip("/")) async def list_children(self, store: Store, path: str) -> list[tuple[str, dict[str, JSON]]]: - proto = default_buffer_prototype() p = path.strip("/") - sp = StorePath(store, p) - if ( - await (sp / ZARR_JSON).get(prototype=proto) is None - and await (sp / ZARRAY_JSON).get(prototype=proto) is None - ): + if not await self._node_exists(store, path): raise NodeNotFoundError(f"no node found at path {path!r}") prefix = f"{p}/" if p else "" children: list[tuple[str, dict[str, JSON]]] = [] async for name in store.list_dir(prefix): child_path = f"{p}/{name}" if p else name - child_sp = StorePath(store, child_path) - if ( - await (child_sp / ZARR_JSON).get(prototype=proto) is not None - or await (child_sp / ZARRAY_JSON).get(prototype=proto) is not None - ): + if await self._node_exists(store, child_path): children.append((name, await self.read_metadata(store, child_path))) return children diff --git a/tests/crud/test_crud.py b/tests/crud/test_crud.py index 2b406c0a5f..d58256c1d7 100644 --- a/tests/crud/test_crud.py +++ b/tests/crud/test_crud.py @@ -32,6 +32,7 @@ from zarr.abc.store import Store GROUP_META: dict[str, Any] = {"zarr_format": 3, "node_type": "group", "attributes": {"answer": 42}} +GROUP_META_V2: dict[str, Any] = {"zarr_format": 2, "attributes": {"answer": 42}} # --- node lifecycle --- @@ -111,6 +112,23 @@ async def test_list_children(backend: str, store: Store) -> None: assert not any(p.startswith("/") for p in by_path) +async def test_create_read_delete_v2_group(backend: str, store: Store) -> None: + await create_new_group(GROUP_META_V2, store, "g2", backend=backend) + meta = await read_metadata(store, "g2", backend=backend) + assert meta["zarr_format"] == 2 + with pytest.raises(NodeExistsError): + await create_new_group(GROUP_META_V2, store, "g2", backend=backend) + await delete_node(store, "g2", backend=backend) + with pytest.raises(NodeNotFoundError): + await read_metadata(store, "g2", backend=backend) + + +async def test_read_metadata_v2_array(backend: str, store: Store) -> None: + await create_new_array(array_metadata(zarr_format=2), store, "arr", backend=backend) + meta = await read_metadata(store, "arr", backend=backend) + assert meta["zarr_format"] == 2 + + # --- chunk I/O --- @@ -197,6 +215,26 @@ async def test_delete_chunk(backend: str, store: Store) -> None: assert not await store.exists("a/c/0/0") +async def test_write_all_fill_chunk_is_dropped(backend: str, store: Store) -> None: + arr = zarr.create_array( + store=store, name="a", shape=(8, 8), chunks=(4, 4), dtype="uint16", fill_value=0 + ) + meta = dict(arr.metadata.to_dict()) + await write_chunk(meta, store, "a", (0, 0), np.zeros((4, 4), dtype="uint16"), backend=backend) + assert not await store.exists("a/c/0/0") + np.testing.assert_array_equal( + await read_chunk(meta, store, "a", (0, 0), backend=backend), + np.zeros((4, 4), dtype="uint16"), + ) + + +async def test_overwrite_chunk_with_fill_removes_it(backend: str, store: Store) -> None: + _data, meta = filled(store) # chunk (0,0) exists with nonzero data, fill_value default 0 + assert await store.exists("a/c/0/0") + await write_chunk(meta, store, "a", (0, 0), np.zeros((4, 4), dtype="uint16"), backend=backend) + assert not await store.exists("a/c/0/0") + + async def test_read_encoded_chunk_matches_store(backend: str, store: Store) -> None: _, meta = filled(store) raw = await read_encoded_chunk(meta, store, "a", (0, 0), backend=backend) From 75d44aec5e0b35580b2a8a802640157d08423294 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 15 Jun 2026 19:27:04 +0200 Subject: [PATCH 40/41] test: track zarrs v2-group attribute divergence as strict xfail; document it Co-Authored-By: Claude Fable 5 --- src/zarr/zarrs/_backend.py | 7 +++++++ tests/crud/test_crud.py | 21 +++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/src/zarr/zarrs/_backend.py b/src/zarr/zarrs/_backend.py index 196cc10af0..e95759660b 100644 --- a/src/zarr/zarrs/_backend.py +++ b/src/zarr/zarrs/_backend.py @@ -41,6 +41,13 @@ class ZarrsBackend: the `/`-prefixed node-path form, store resolution, offloading the blocking Rust calls to a worker thread, and translating binding exceptions to the canonical `zarr.crud` / `zarr.errors` types. + + Known limitation: creating a Zarr v2 *group* with attributes writes a + non-standard `.zattrs` (the attributes nested under an ``"attributes"`` key) + that zarr-python and other readers interpret incorrectly. This is a + zarrs-crate behavior; the pure-Python reference backend writes the standard + layout. Prefer the reference backend for writing v2 groups until the zarrs + crate is fixed. """ async def create_array( diff --git a/tests/crud/test_crud.py b/tests/crud/test_crud.py index d58256c1d7..d4aa79e334 100644 --- a/tests/crud/test_crud.py +++ b/tests/crud/test_crud.py @@ -43,6 +43,27 @@ async def test_create_new_group(backend: str, store: Store) -> None: assert dict(zarr.open_group(store=store, path="foo", mode="r").attrs) == {"answer": 42} +async def test_v2_group_attrs_zarr_python_compatible_reference(store: Store) -> None: + # The reference backend writes standard v2 `.zattrs` (the bare attributes + # dict), so zarr-python and other readers see the right attributes. + await create_new_group(GROUP_META_V2, store, "g2", backend="reference") + assert dict(zarr.open_group(store=store, path="g2", mode="r").attrs) == {"answer": 42} + + +@pytest.mark.xfail( + reason="the zarrs backend writes v2 group attributes in a non-standard `.zattrs` " + "layout (nested under an 'attributes' key) that zarr-python reads back wrong; " + "tracked zarrs-crate limitation", + strict=True, +) +async def test_v2_group_attrs_zarr_python_compatible_zarrs(store: Store) -> None: + pytest.importorskip("_zarrs_bindings", reason="zarrs-bindings is not installed") + import zarr.zarrs + + await create_new_group(GROUP_META_V2, store, "g2", backend="zarrs") + assert dict(zarr.open_group(store=store, path="g2", mode="r").attrs) == {"answer": 42} + + async def test_create_new_group_existing_raises(backend: str, store: Store) -> None: await create_new_group(GROUP_META, store, "foo", backend=backend) with pytest.raises(NodeExistsError): From 0094048efca9e71af8a6fae1a4d0f95c962eb7d8 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 16 Jun 2026 09:07:58 +0200 Subject: [PATCH 41/41] refactor: move zarrs-bindings crate under packages/ (repo convention) Matches the existing packages/zarr-metadata subpackage layout. Updates the uv source path, sdist exclude, gitignore, and design doc accordingly. Co-Authored-By: Claude Fable 5 --- .gitignore | 2 +- .../specs/2026-06-11-zarrs-functional-api-design.md | 3 ++- {zarrs-bindings => packages/zarrs-bindings}/Cargo.lock | 0 {zarrs-bindings => packages/zarrs-bindings}/Cargo.toml | 0 {zarrs-bindings => packages/zarrs-bindings}/pyproject.toml | 0 {zarrs-bindings => packages/zarrs-bindings}/src/chunk.rs | 0 {zarrs-bindings => packages/zarrs-bindings}/src/lib.rs | 0 {zarrs-bindings => packages/zarrs-bindings}/src/node.rs | 0 {zarrs-bindings => packages/zarrs-bindings}/src/store.rs | 0 pyproject.toml | 4 ++-- uv.lock | 4 ++-- 11 files changed, 7 insertions(+), 6 deletions(-) rename {zarrs-bindings => packages/zarrs-bindings}/Cargo.lock (100%) rename {zarrs-bindings => packages/zarrs-bindings}/Cargo.toml (100%) rename {zarrs-bindings => packages/zarrs-bindings}/pyproject.toml (100%) rename {zarrs-bindings => packages/zarrs-bindings}/src/chunk.rs (100%) rename {zarrs-bindings => packages/zarrs-bindings}/src/lib.rs (100%) rename {zarrs-bindings => packages/zarrs-bindings}/src/node.rs (100%) rename {zarrs-bindings => packages/zarrs-bindings}/src/store.rs (100%) diff --git a/.gitignore b/.gitignore index e5474b7c1c..ae184fa731 100644 --- a/.gitignore +++ b/.gitignore @@ -96,4 +96,4 @@ zarr.egg-info/ packages/zarr-metadata/uv.lock # zarrs-bindings Rust build artifacts -zarrs-bindings/target/ +packages/zarrs-bindings/target/ diff --git a/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md b/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md index 0c7deb8eac..22cb0b5785 100644 --- a/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md +++ b/docs/superpowers/specs/2026-06-11-zarrs-functional-api-design.md @@ -39,7 +39,8 @@ codec-pipeline registry through this API (possible later), fancy Two distributions in this repo, hard boundary between them: -1. **Rust crate `zarrs-bindings`** at the repo root (`zarrs-bindings/`), +1. **Rust crate `zarrs-bindings`** under `packages/` (`packages/zarrs-bindings/`, + alongside the existing `zarr-metadata` subpackage), built with maturin (PyO3, `abi3-py312`), publishing wheel `zarrs-bindings` with native module `_zarrs_bindings`. It is a thin, mechanical binding over `zarrs`: functions/pyclasses take metadata as a **JSON string**, a diff --git a/zarrs-bindings/Cargo.lock b/packages/zarrs-bindings/Cargo.lock similarity index 100% rename from zarrs-bindings/Cargo.lock rename to packages/zarrs-bindings/Cargo.lock diff --git a/zarrs-bindings/Cargo.toml b/packages/zarrs-bindings/Cargo.toml similarity index 100% rename from zarrs-bindings/Cargo.toml rename to packages/zarrs-bindings/Cargo.toml diff --git a/zarrs-bindings/pyproject.toml b/packages/zarrs-bindings/pyproject.toml similarity index 100% rename from zarrs-bindings/pyproject.toml rename to packages/zarrs-bindings/pyproject.toml diff --git a/zarrs-bindings/src/chunk.rs b/packages/zarrs-bindings/src/chunk.rs similarity index 100% rename from zarrs-bindings/src/chunk.rs rename to packages/zarrs-bindings/src/chunk.rs diff --git a/zarrs-bindings/src/lib.rs b/packages/zarrs-bindings/src/lib.rs similarity index 100% rename from zarrs-bindings/src/lib.rs rename to packages/zarrs-bindings/src/lib.rs diff --git a/zarrs-bindings/src/node.rs b/packages/zarrs-bindings/src/node.rs similarity index 100% rename from zarrs-bindings/src/node.rs rename to packages/zarrs-bindings/src/node.rs diff --git a/zarrs-bindings/src/store.rs b/packages/zarrs-bindings/src/store.rs similarity index 100% rename from zarrs-bindings/src/store.rs rename to packages/zarrs-bindings/src/store.rs diff --git a/pyproject.toml b/pyproject.toml index d24917bc5f..89e05068ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ exclude = [ "/.github", "/bench", "/docs", - "/zarrs-bindings", + "/packages/zarrs-bindings", ] [project] @@ -499,4 +499,4 @@ ignore-words-list = "astroid" zarr = "zarr.testing" [tool.uv.sources] -zarrs-bindings = { path = "zarrs-bindings" } +zarrs-bindings = { path = "packages/zarrs-bindings" } diff --git a/uv.lock b/uv.lock index a1c94a2d4b..838feef237 100644 --- a/uv.lock +++ b/uv.lock @@ -4190,9 +4190,9 @@ zarrs = [ { name = "pytest-xdist" }, { name = "tomlkit" }, { name = "uv" }, - { name = "zarrs-bindings", directory = "zarrs-bindings" }, + { name = "zarrs-bindings", directory = "packages/zarrs-bindings" }, ] [[package]] name = "zarrs-bindings" -source = { directory = "zarrs-bindings" } +source = { directory = "packages/zarrs-bindings" }