Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,27 @@ uv run ome-iris verify
uv run ome-iris export-rocrate --dataset nf1-cellpainting-shrunken
```

Download a reproducible subset for local development or benchmarking:

```bash
uv run ome-iris download nf1 \
--output .benchmark-data/ome-iris/nf1 \
--preset tiny \
--channel DAPI
```

Python API:

```python
from ome_iris import datasets

datasets.download(
"nf1",
output_dir=".benchmark-data/ome-iris/nf1",
subset={"images": 20, "channels": ["DAPI"]},
)
```

Fetch output modes:

```bash
Expand Down Expand Up @@ -82,6 +103,25 @@ uv run ome-iris fetch --data-dir /tmp/ome-iris-data
uv run ome-iris verify --data-dir /tmp/ome-iris-data
```

## What `download` does

`ome-iris download` creates a small, reproducible subset under the exact `--output`
directory. It supports named dataset aliases such as `nf1`, preset sizes
(`tiny`, `small`, `benchmark`), image limits, channel filters, plate/well/site
filters, and Z/T/C ranges where filenames expose those values.

Downloaded subsets include `manifest.json` with the source dataset, selected
subset options, downloaded file paths, source URLs, SHA-256 checksums, file
sizes, image shapes, dtypes, and file metadata. Existing files are reused and
included in the manifest. Use `--validate-only` to verify an existing subset
cache against its manifest without downloading data:

```bash
uv run ome-iris download nf1 \
--output .benchmark-data/ome-iris/nf1 \
--validate-only
```

## Add a dataset

1. Add or update a dataset manifest and catalog metadata.
Expand Down
8 changes: 8 additions & 0 deletions docs/src/python-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@
:undoc-members:
```

## Dataset subsets

```{eval-rst}
.. automodule:: OME_IRIS.datasets
:members:
:undoc-members:
```

## Verify

```{eval-rst}
Expand Down
10 changes: 7 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,19 @@ docs = [
]

[tool.setuptools]
package-dir = { "" = "src" }
package-dir = { "" = "src", ome_iris = "src/OME_IRIS" }
include-package-data = true
package-data.OME_IRIS = [
"data/*.yaml",
"data/*.csv",
"data/datasets/*.yaml",
]
packages.find.where = [ "src" ]
packages.find.include = [ "OME_IRIS*" ]
package-data.ome_iris = [
"data/*.yaml",
"data/*.csv",
"data/datasets/*.yaml",
]
packages = [ "OME_IRIS", "ome_iris" ]

[tool.setuptools_scm]
write_to = "src/OME_IRIS/_version.py"
Expand Down
11 changes: 10 additions & 1 deletion src/OME_IRIS/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
"""OME-IRIS package."""

__all__ = ["__version__"]
from __future__ import annotations

import sys

from . import datasets

__all__ = ["__version__", "datasets"]

try:
from ._version import version as __version__
except ImportError: # pragma: no cover
__version__ = "0+unknown"

sys.modules.setdefault("ome_iris", sys.modules[__name__])
sys.modules.setdefault("ome_iris.datasets", datasets)
51 changes: 51 additions & 0 deletions src/OME_IRIS/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path

from OME_IRIS.clean import clean_local_data
from OME_IRIS.datasets import download
from OME_IRIS.fetch import fetch_datasets
from OME_IRIS.rocrate import export_rocrate_metadata
from OME_IRIS.scaffold import scaffold_dataset_manifest
Expand All @@ -16,6 +17,24 @@ def build_parser() -> argparse.ArgumentParser:
)
sub = parser.add_subparsers(dest="command", required=True)

download_cmd = sub.add_parser(
"download", help="Download a reproducible dataset subset"
)
download_cmd.add_argument("dataset")
download_cmd.add_argument("--output", required=True)
download_cmd.add_argument("--preset", choices=["tiny", "small", "benchmark"])
download_cmd.add_argument("--limit-images", type=int)
download_cmd.add_argument("--channel", dest="channels", action="append")
download_cmd.add_argument("--plate", action="append")
download_cmd.add_argument("--well", action="append")
download_cmd.add_argument("--site", action="append")
download_cmd.add_argument("--z-range", nargs=2, type=int, metavar=("START", "STOP"))
download_cmd.add_argument("--t-range", nargs=2, type=int, metavar=("START", "STOP"))
download_cmd.add_argument("--c-range", nargs=2, type=int, metavar=("START", "STOP"))
download_cmd.add_argument("--validate-only", action="store_true")
download_cmd.add_argument("--manifests-dir", default="src/OME_IRIS/data/datasets")
download_cmd.add_argument("--silent", action="store_true")

fetch_cmd = sub.add_parser("fetch", help="Fetch dataset files")
fetch_cmd.add_argument("--dataset", dest="dataset_id")
fetch_cmd.add_argument("--tier", choices=["tiny", "small", "realistic"])
Expand Down Expand Up @@ -68,6 +87,38 @@ def main() -> int:
parser = build_parser()
args = parser.parse_args()

if args.command == "download":
subset = {
"images": args.limit_images,
"channels": args.channels,
"plate": args.plate,
"well": args.well,
"site": args.site,
"z": tuple(args.z_range) if args.z_range else None,
"t": tuple(args.t_range) if args.t_range else None,
"c": tuple(args.c_range) if args.c_range else None,
}
result = download(
args.dataset,
output_dir=Path(args.output),
subset=subset,
preset=args.preset,
manifests_dir=Path(args.manifests_dir),
validate_only=args.validate_only,
silent=args.silent,
)
print(f"Downloaded: {result.downloaded}")
print(f"Skipped: {result.skipped}")
print(f"Validated: {result.validated}")
if result.manifest_path:
print(f"Manifest: {result.manifest_path}")
if result.failed:
print("Failed:")
for item in result.failed:
print(f"- {item}")
return 1
return 0

if args.command == "fetch":
result = fetch_datasets(
manifests_dir=Path(args.manifests_dir),
Expand Down
Loading
Loading