Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/3285.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add `zarr.core.json_parse.parse_json`, a unified runtime type checker that validates JSON-decoded metadata values against a type annotation and returns data assignable to it (coercing sequences to tuples), or raises a clear error. It handles the JSON-relevant type categories used by array metadata: primitives (`None`, `str`, `int`, `float`, `bool`), `Literal`, unions/`Optional`, fixed and variadic `tuple`, `Sequence`/`list`, `Mapping`/`dict`, and `TypedDict`. This begins de-duplicating the scattered per-field `parse_*` helpers into a single shared validation path (see #3285).
40 changes: 26 additions & 14 deletions src/zarr/codecs/blosc.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,27 +104,39 @@ class BloscCname(metaclass=_DeprecatedStrEnumMeta):


def parse_typesize(data: JSON) -> int:
if isinstance(data, int):
if data > 0:
return data
else:
raise ValueError(
f"Value must be greater than 0. Got {data}, which is less or equal to 0."
)
raise TypeError(f"Value must be an int. Got {type(data)} instead.")
from zarr.core.json_parse import parse_json

try:
parsed: int = parse_json(data, int)
except (ValueError, TypeError) as exc:
raise TypeError(f"Value must be an int. Got {type(data)} instead.") from exc
if parsed > 0:
return parsed
else:
raise ValueError(
f"Value must be greater than 0. Got {parsed}, which is less or equal to 0."
)


# todo: real validation
def parse_clevel(data: JSON) -> int:
if isinstance(data, int):
return data
raise TypeError(f"Value should be an int. Got {type(data)} instead.")
from zarr.core.json_parse import parse_json

try:
parsed: int = parse_json(data, int)
except (ValueError, TypeError) as exc:
raise TypeError(f"Value should be an int. Got {type(data)} instead.") from exc
return parsed


def parse_blocksize(data: JSON) -> int:
if isinstance(data, int):
return data
raise TypeError(f"Value should be an int. Got {type(data)} instead.")
from zarr.core.json_parse import parse_json

try:
parsed: int = parse_json(data, int)
except (ValueError, TypeError) as exc:
raise TypeError(f"Value should be an int. Got {type(data)} instead.") from exc
return parsed


def _parse_cname(data: object) -> BloscCnameLiteral:
Expand Down
14 changes: 9 additions & 5 deletions src/zarr/codecs/gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,17 @@


def parse_gzip_level(data: JSON) -> int:
if not isinstance(data, (int)):
raise TypeError(f"Expected int, got {type(data)}")
if data not in range(10):
from zarr.core.json_parse import parse_json

try:
parsed: int = parse_json(data, int)
except (ValueError, TypeError) as exc:
raise TypeError(f"Expected int, got {type(data)}") from exc
if parsed not in range(10):
raise ValueError(
f"Expected an integer from the inclusive range (0, 9). Got {data} instead."
f"Expected an integer from the inclusive range (0, 9). Got {parsed} instead."
)
return data
return parsed


@dataclass(frozen=True)
Expand Down
24 changes: 16 additions & 8 deletions src/zarr/codecs/zstd.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,25 @@


def parse_zstd_level(data: JSON) -> int:
if isinstance(data, int):
if data >= 23:
raise ValueError(f"Value must be less than or equal to 22. Got {data} instead.")
return data
raise TypeError(f"Got value with type {type(data)}, but expected an int.")
from zarr.core.json_parse import parse_json

try:
parsed: int = parse_json(data, int)
except (ValueError, TypeError) as exc:
raise TypeError(f"Got value with type {type(data)}, but expected an int.") from exc
if parsed >= 23:
raise ValueError(f"Value must be less than or equal to 22. Got {parsed} instead.")
return parsed


def parse_checksum(data: JSON) -> bool:
if isinstance(data, bool):
return data
raise TypeError(f"Expected bool. Got {type(data)}.")
from zarr.core.json_parse import parse_json

try:
parsed: bool = parse_json(data, bool)
except (ValueError, TypeError) as exc:
raise TypeError(f"Expected bool. Got {type(data)}.") from exc
return parsed


@dataclass(frozen=True)
Expand Down
9 changes: 6 additions & 3 deletions src/zarr/core/chunk_key_encodings.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@


def parse_separator(data: JSON) -> SeparatorLiteral:
if data not in (".", "/"):
raise ValueError(f"Expected an '.' or '/' separator. Got {data} instead.")
return cast("SeparatorLiteral", data)
from zarr.core.json_parse import parse_json

try:
return cast("SeparatorLiteral", parse_json(data, Literal[".", "/"]))
except (ValueError, TypeError) as exc:
raise ValueError(f"Expected an '.' or '/' separator. Got {data} instead.") from exc


class ChunkKeyEncodingParams(TypedDict):
Expand Down
27 changes: 15 additions & 12 deletions src/zarr/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,15 @@ def parse_enum[E: Enum](data: object, cls: type[E]) -> E:


def parse_name(data: JSON, expected: str | None = None) -> str:
if isinstance(data, str):
if expected is None or data == expected:
return data
raise ValueError(f"Expected '{expected}'. Got {data} instead.")
else:
raise TypeError(f"Expected a string, got an instance of {type(data)}.")
from zarr.core.json_parse import parse_json

try:
data = cast("str", parse_json(data, str))
except (ValueError, TypeError) as exc:
raise TypeError(f"Expected a string, got an instance of {type(data)}.") from exc
if expected is None or data == expected:
return data
raise ValueError(f"Expected '{expected}'. Got {data} instead.")


def parse_configuration(data: JSON) -> JSON:
Expand Down Expand Up @@ -204,15 +207,15 @@ def parse_fill_value(data: Any) -> Any:


def parse_order(data: Any) -> Literal["C", "F"]:
if data in ("C", "F"):
return cast("Literal['C', 'F']", data)
raise ValueError(f"Expected one of ('C', 'F'), got {data} instead.")
from zarr.core.json_parse import parse_json

return cast("Literal['C', 'F']", parse_json(data, Literal["C", "F"]))


def parse_bool(data: Any) -> bool:
if isinstance(data, bool):
return data
raise ValueError(f"Expected bool, got {data} instead.")
from zarr.core.json_parse import parse_json

return cast("bool", parse_json(data, bool))


def parse_int(data: Any) -> int:
Expand Down
7 changes: 3 additions & 4 deletions src/zarr/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,6 @@ def enable_gpu(self) -> ConfigSet:


def parse_indexing_order(data: Any) -> Literal["C", "F"]:
if data in ("C", "F"):
return cast("Literal['C', 'F']", data)
msg = f"Expected one of ('C', 'F'), got {data} instead."
raise ValueError(msg)
from zarr.core.json_parse import parse_json

return cast("Literal['C', 'F']", parse_json(data, Literal["C", "F"]))
22 changes: 14 additions & 8 deletions src/zarr/core/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,24 @@

def parse_zarr_format(data: Any) -> ZarrFormat:
"""Parse the zarr_format field from metadata."""
if data in (2, 3):
return cast("ZarrFormat", data)
msg = f"Invalid zarr_format. Expected one of 2 or 3. Got {data}."
raise ValueError(msg)
from zarr.core.json_parse import parse_json

try:
return cast("ZarrFormat", parse_json(data, Literal[2, 3]))
except (ValueError, TypeError) as exc:
msg = f"Invalid zarr_format. Expected one of 2 or 3. Got {data}."
raise ValueError(msg) from exc


def parse_node_type(data: Any) -> NodeType:
"""Parse the node_type field from metadata."""
if data in ("array", "group"):
return cast("Literal['array', 'group']", data)
msg = f"Invalid value for 'node_type'. Expected 'array' or 'group'. Got '{data}'."
raise MetadataValidationError(msg)
from zarr.core.json_parse import parse_json

try:
return cast("Literal['array', 'group']", parse_json(data, Literal["array", "group"]))
except (ValueError, TypeError) as exc:
msg = f"Invalid value for 'node_type'. Expected 'array' or 'group'. Got '{data}'."
raise MetadataValidationError(msg) from exc


# todo: convert None to empty dict
Expand Down
Loading
Loading