Source code for pgsi_analyzer.benchmarks.discovery

"""
Benchmark discovery helpers for built-in and user-defined benchmarks.

User-defined benchmarks are discovered from a folder with this layout:
    <benchmarks_dir>/<algorithm>/<method>/main.py

Methods should match pgsi execution methods (cpython, pypy, cython, ctypes, py_compile).
"""

import json
from pathlib import Path
from typing import Dict, List, Optional

from .registry import BENCHMARKS as BUILTIN_BENCHMARKS
from .registry import VALID_METHODS


RegistryMap = Dict[str, Dict[str, str]]
USER_REGISTRY_FILENAME = "pgsi_registry.json"


[docs] def discover_user_benchmarks(benchmarks_dir: Path) -> RegistryMap: """Discover user-defined benchmarks from an external directory.""" root = Path(benchmarks_dir) if not root.exists() or not root.is_dir(): raise ValueError(f"Benchmarks directory does not exist or is not a directory: {root}") discovered: RegistryMap = {} for algorithm_dir in sorted(root.iterdir()): if not algorithm_dir.is_dir(): continue algorithm_name = algorithm_dir.name methods: Dict[str, str] = {} for method in VALID_METHODS: method_dir = algorithm_dir / method main_py = method_dir / "main.py" if method in ("cython", "ctypes"): # Build-based methods must resolve to the directory so the build # step can find setup.py / *.c next to main.py. if method_dir.exists() and method_dir.is_dir(): methods[method] = str(method_dir.resolve()) elif main_py.exists() and main_py.is_file(): methods[method] = str(main_py.resolve()) if methods: discovered[algorithm_name] = methods return discovered
[docs] def load_user_registry(benchmarks_dir: Path) -> RegistryMap: """ Load optional user registry file from benchmarks_dir/pgsi_registry.json. Expected JSON structure:: {"benchmarks": {"algo-name": {"cpython": "...", ...}}} """ root = Path(benchmarks_dir) registry_file = root / USER_REGISTRY_FILENAME if not registry_file.exists(): return {} data = json.loads(registry_file.read_text(encoding="utf-8")) raw = data.get("benchmarks", {}) loaded: RegistryMap = {} for algorithm, methods in raw.items(): if not isinstance(methods, dict): continue normalized_methods: Dict[str, str] = {} for method, relative_or_abs in methods.items(): if method not in VALID_METHODS or not isinstance(relative_or_abs, str): continue candidate = Path(relative_or_abs) full = candidate if candidate.is_absolute() else (root / candidate) normalized_methods[method] = str(full.resolve()) if normalized_methods: loaded[algorithm] = normalized_methods return loaded
[docs] def build_registry(benchmarks_dir: Optional[Path] = None) -> RegistryMap: """ Build effective benchmark registry. Built-ins are always included. User-defined algorithms from benchmarks_dir are merged in. If names collide, user-defined entries override built-ins. """ registry: RegistryMap = { algorithm: methods.copy() for algorithm, methods in BUILTIN_BENCHMARKS.items() } if benchmarks_dir is None: return registry user_registry = discover_user_benchmarks(benchmarks_dir) file_registry = load_user_registry(benchmarks_dir) for algorithm, methods in file_registry.items(): user_registry[algorithm] = methods for algorithm, methods in user_registry.items(): registry[algorithm] = methods return registry
[docs] def list_algorithms_from_registry(registry: RegistryMap) -> List[str]: return sorted(registry.keys())
[docs] def list_methods_from_registry(registry: RegistryMap, algorithm: Optional[str] = None) -> List[str]: if algorithm is None: return VALID_METHODS.copy() if algorithm not in registry: raise ValueError(f"Unknown algorithm: {algorithm}. Available: {list_algorithms_from_registry(registry)}") return [m for m in VALID_METHODS if m in registry[algorithm]]
[docs] def get_benchmark_path_from_registry(registry: RegistryMap, algorithm: str, method: str) -> Path: if algorithm not in registry: raise ValueError(f"Unknown algorithm: {algorithm}") if method not in registry[algorithm]: raise ValueError(f"Unknown method '{method}' for algorithm '{algorithm}'") return Path(registry[algorithm][method])