From 3de9c302ce18ac1769e2f48530e8afc03ea4b4d6 Mon Sep 17 00:00:00 2001 From: scawful Date: Tue, 30 Dec 2025 13:21:56 -0500 Subject: [PATCH] test: add pytest coverage for plugin utilities --- AGENTS.md | 2 +- pyproject.toml | 8 ++++++++ tests/conftest.py | 9 +++++++++ tests/test_config.py | 33 +++++++++++++++++++++++++++++++++ tests/test_registry.py | 24 ++++++++++++++++++++++++ tests/test_resource_index.py | 27 +++++++++++++++++++++++++++ 6 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 tests/conftest.py create mode 100644 tests/test_config.py create mode 100644 tests/test_registry.py create mode 100644 tests/test_resource_index.py diff --git a/AGENTS.md b/AGENTS.md index f4ab9c1..f5a1f7d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -19,4 +19,4 @@ - Concise, engineering notebook tone. ## How to verify (tests/commands) -- Unknown / needs verification (no test harness yet). +- `pytest` diff --git a/pyproject.toml b/pyproject.toml index 0c9b87d..1edf202 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,14 @@ authors = [ {name = "scawful"} ] +[project.optional-dependencies] +test = [ + "pytest>=7.4" +] + +[tool.pytest.ini_options] +testpaths = ["tests"] + [build-system] requires = ["setuptools>=68"] build-backend = "setuptools.build_meta" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..fdcbc1f --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +SRC = ROOT / "src" +if str(SRC) not in sys.path: + sys.path.insert(0, str(SRC)) diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..f4bd758 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from pathlib import Path + +from afs_scawful.config import load_training_paths, load_training_resources + + +def test_load_training_paths_expands_paths(tmp_path: Path) -> None: + config_path = tmp_path / "training_paths.toml" + config_path.write_text( + "[paths]\n" + "training_root = \"~/training\"\n" + "datasets = \"~/training/datasets\"\n", + encoding="utf-8", + ) + + data = load_training_paths(config_path=config_path) + paths = data["paths"] + assert paths["training_root"] == (Path.home() / "training").resolve() + assert paths["datasets"] == (Path.home() / "training" / "datasets").resolve() + + +def test_load_training_resources_expands_roots(tmp_path: Path) -> None: + config_path = tmp_path / "training_resources.toml" + config_path.write_text( + "[resource_discovery]\n" + f"resource_roots = [\"{tmp_path}\"]\n", + encoding="utf-8", + ) + + data = load_training_resources(config_path=config_path) + roots = data["resource_discovery"]["resource_roots"] + assert roots == [tmp_path.resolve()] diff --git a/tests/test_registry.py b/tests/test_registry.py new file mode 100644 index 0000000..8110220 --- /dev/null +++ b/tests/test_registry.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from pathlib import Path + +from afs_scawful.registry import build_dataset_registry + + +def test_build_dataset_registry(tmp_path: Path) -> None: + datasets_root = tmp_path / "datasets" + dataset_dir = datasets_root / "alpha" + dataset_dir.mkdir(parents=True) + + (dataset_dir / "train.jsonl").write_text("{}\n", encoding="utf-8") + (dataset_dir / "stats.json").write_text("{\"samples\": 1}\n", encoding="utf-8") + (dataset_dir / "metadata.json").write_text("{\"source\": \"test\"}\n", encoding="utf-8") + + registry = build_dataset_registry(datasets_root) + datasets = registry["datasets"] + + assert len(datasets) == 1 + entry = datasets[0] + assert entry["name"] == "alpha" + assert entry["stats"]["samples"] == 1 + assert entry["metadata"]["source"] == "test" diff --git a/tests/test_resource_index.py b/tests/test_resource_index.py new file mode 100644 index 0000000..d045dc8 --- /dev/null +++ b/tests/test_resource_index.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from pathlib import Path + +from afs_scawful.resource_index import ResourceIndexer + + +def test_resource_indexer_dedupes(tmp_path: Path) -> None: + root = tmp_path / "resources" + root.mkdir() + + (root / "a.txt").write_text("same\n", encoding="utf-8") + (root / "b.txt").write_text("same\n", encoding="utf-8") + (root / "c.md").write_text("diff\n", encoding="utf-8") + + indexer = ResourceIndexer( + resource_roots=[root], + search_patterns=["**/*.txt", "**/*.md"], + exclude_patterns=[], + index_path=tmp_path / "index.json", + ) + + result = indexer.build_index() + assert result.total_files == 2 + assert result.duplicates_found == 1 + assert result.by_type.get("txt", 0) == 1 + assert result.by_type.get("md", 0) == 1