"""
Scaffolding helpers for user benchmark template generation.
"""
import json
import re
import shutil
from pathlib import Path
from typing import Iterable, List
from .registry import list_algorithms
from .discovery import USER_REGISTRY_FILENAME
PYTHON_MAIN_TEMPLATE = '''"""
PGSI benchmark template for: {algorithm} / {method}
How to use:
1) Implement your workload inside `run_workload(...)`.
2) Keep both decorators below; PGSI reads these CSV outputs.
3) Run from CLI with:
pgsi-analyzer benchmark run --algorithms {algorithm} --methods {method} --benchmarks-dir <this-folder>
"""
from pgsi_analyzer.measurement import measure_energy_to_csv, measure_time_to_csv
from pgsi_analyzer.config import get_measurement_runs
def run_workload() -> None:
# TODO: Replace this with your actual algorithm implementation.
# Keep it deterministic where possible for stable benchmark measurements.
total = 0
for i in range(10_000):
total += i * i
_ = total
@measure_energy_to_csv(n=get_measurement_runs("{algorithm}"), csv_filename="{algorithm}_{method}")
def run_energy_benchmark() -> None:
run_workload()
@measure_time_to_csv(n=get_measurement_runs("{algorithm}"), csv_filename="{algorithm}_{method}")
def run_time_benchmark() -> None:
run_workload()
if __name__ == "__main__":
run_energy_benchmark()
run_time_benchmark()
'''
CYTHON_RAW_TEMPLATE = '''# cython: language_level=3
# TODO: Move performance-critical parts from main.py here.
# Example signature:
# def work(int n):
# cdef int i
# cdef long total = 0
# for i in range(n):
# total += i * i
# return total
'''
CYTHON_SETUP_TEMPLATE = '''from setuptools import setup
from Cython.Build import cythonize
# TODO: Update extension module name/path if you rename raw.pyx.
setup(
ext_modules=cythonize("raw.pyx", language_level=3),
)
'''
CTYPES_C_TEMPLATE = r'''#include <stdint.h>
// TODO: Add your native function(s) and call from main.py via ctypes.
// Example export for Windows/Linux:
// __declspec(dllexport) on Windows, default visibility elsewhere.
#ifdef _WIN32
#define API __declspec(dllexport)
#else
#define API
#endif
API int64_t work(int32_t n) {
int64_t total = 0;
for (int32_t i = 0; i < n; i++) {
total += (int64_t)i * (int64_t)i;
}
return total;
}
'''
ROOT_README_TEMPLATE = """# PGSI User Benchmark Template
This folder was generated by:
`pgsi-analyzer benchmark init-template`
## Structure
- `<algorithm>/<method>/main.py` for all methods
- `<algorithm>/cython/raw.pyx` + `setup.py`
- `<algorithm>/ctypes/raw.c`
## Run
```bash
pgsi-analyzer benchmark list --algorithms --benchmarks-dir .
pgsi-analyzer benchmark run --algorithms all --methods all --benchmarks-dir . --runs 5
```
"""
def _validate_algorithms(algorithms: Iterable[str]) -> List[str]:
"""Validate algorithm names requested for scaffold generation.
Validation happens early so CLI users get actionable errors before any filesystem
writes occur. This avoids partially generated templates when a typo slips into an
algorithm list.
Args:
algorithms: User-requested benchmark algorithm identifiers.
Returns:
List[str]: Sorted unique list of valid algorithm identifiers.
Raises:
ValueError: If any requested algorithm is not present in built-in registry.
Examples:
>>> _validate_algorithms(["hanoi", "hanoi"])
['hanoi']
"""
requested = sorted(set(algorithms))
valid = set(list_algorithms())
invalid = [name for name in requested if name not in valid]
if invalid:
raise ValueError(f"Invalid algorithms for template generation: {invalid}. Available: {sorted(valid)}")
return requested
[docs]
def generate_benchmark_template(output_dir: Path, algorithms: Iterable[str], force: bool = False) -> Path:
"""
Generate a benchmark project tree for external user projects using
pre-implemented built-in source files.
The generator prefers copying shipped benchmark source files so users start from
realistic implementations. If a source method does not exist in the package, it
falls back to templates that are still runnable and instrumentation-ready.
Args:
output_dir: Target root directory for generated benchmark project.
algorithms: Algorithm names to include in the scaffold.
force: Whether to allow generation inside a non-empty directory.
Returns:
Path: Path to the generated benchmark project root.
Raises:
ValueError: If algorithm list is invalid or target directory is non-empty
and ``force`` is ``False``.
OSError: If files/directories cannot be created or copied.
Examples:
>>> from pathlib import Path
>>> root = generate_benchmark_template(
... output_dir=Path("my-benchmarks"),
... algorithms=["hanoi", "fasta"],
... force=True,
... )
>>> root.name
'my-benchmarks'
"""
root = Path(output_dir)
selected_algorithms = _validate_algorithms(algorithms)
if root.exists() and any(root.iterdir()) and not force:
raise ValueError(
f"Output directory '{root}' is not empty. Use --force to scaffold anyway."
)
root.mkdir(parents=True, exist_ok=True)
(root / "README.md").write_text(ROOT_README_TEMPLATE, encoding="utf-8")
package_benchmarks_root = Path(__file__).resolve().parent
methods = ["cpython", "pypy", "cython", "ctypes", "py_compile"]
for algorithm in selected_algorithms:
source_algorithm_dir = package_benchmarks_root / algorithm
for method in methods:
method_source_dir = source_algorithm_dir / method
method_target_dir = root / algorithm / method
method_target_dir.mkdir(parents=True, exist_ok=True)
# Copy key source files only (avoid build artifacts / result CSVs).
for pattern in ("*.py", "*.pyx", "*.pxd", "*.c", "*.h"):
for source_file in method_source_dir.glob(pattern):
if source_file.is_file():
shutil.copy2(source_file, method_target_dir / source_file.name)
# If no source file exists for this method in package, fallback to template.
if not any(method_target_dir.glob("*")):
(method_target_dir / "main.py").write_text(
PYTHON_MAIN_TEMPLATE.format(algorithm=algorithm, method=method),
encoding="utf-8",
)
if method == "cython":
(method_target_dir / "raw.pyx").write_text(CYTHON_RAW_TEMPLATE, encoding="utf-8")
(method_target_dir / "setup.py").write_text(CYTHON_SETUP_TEMPLATE, encoding="utf-8")
elif method == "ctypes":
(method_target_dir / "raw.c").write_text(CTYPES_C_TEMPLATE, encoding="utf-8")
return root
def _validate_benchmark_name(name: str) -> str:
"""Validate and normalize a custom benchmark identifier.
Enforcing a conservative naming pattern prevents invalid paths on different
operating systems and keeps registry references stable across CLI operations.
Args:
name: User-provided benchmark name.
Returns:
str: Trimmed benchmark name that passed validation.
Raises:
ValueError: If the name is empty or contains unsupported characters.
Examples:
>>> _validate_benchmark_name("my_algo-1")
'my_algo-1'
"""
clean = name.strip()
if not clean:
raise ValueError("Benchmark name cannot be empty.")
if not re.match(r"^[A-Za-z0-9][A-Za-z0-9_-]*$", clean):
raise ValueError(
"Invalid benchmark name. Use letters, numbers, '-' or '_' (no spaces/symbols)."
)
return clean
def _upsert_user_registry(benchmarks_dir: Path, benchmark_name: str) -> None:
"""Insert or update a benchmark entry in the user registry file.
Registry upsert keeps `create benchmark` idempotent: reruns update paths for the
same benchmark key instead of producing duplicate entries.
Args:
benchmarks_dir: Root directory that contains ``pgsi_registry.json``.
benchmark_name: Benchmark identifier to register.
Returns:
None
Raises:
ValueError: If existing registry file contains invalid JSON.
OSError: If registry file cannot be read or written.
Examples:
>>> from pathlib import Path
>>> _upsert_user_registry(Path("benchmarks"), "my_algo")
"""
registry_path = benchmarks_dir / USER_REGISTRY_FILENAME
if registry_path.exists():
try:
data = json.loads(registry_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as exc:
raise ValueError(f"Invalid JSON in {registry_path}: {exc}") from exc
else:
data = {"benchmarks": {}}
benchmarks = data.setdefault("benchmarks", {})
benchmarks[benchmark_name] = {
"cpython": f"{benchmark_name}/cpython/main.py",
"pypy": f"{benchmark_name}/pypy/main.py",
"cython": f"{benchmark_name}/cython",
"ctypes": f"{benchmark_name}/ctypes",
"py_compile": f"{benchmark_name}/py_compile/main.py",
}
registry_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
[docs]
def create_benchmark_scaffold(
benchmarks_dir: Path,
benchmark_name: str,
force: bool = False,
register: bool = True,
) -> Path:
"""
Create a single benchmark scaffold under benchmarks_dir and register it.
This function is optimized for incremental project growth: users can add one
algorithm at a time while preserving the same folder contract expected by
discovery and orchestrator modules.
Args:
benchmarks_dir: Benchmark project root where scaffold will be created.
benchmark_name: Name of the new benchmark folder/key.
force: Whether to allow writing into an existing non-empty benchmark folder.
register: Whether to update ``pgsi_registry.json`` with the new benchmark.
Returns:
Path: Path to the created benchmark directory.
Raises:
ValueError: If benchmark name is invalid or target directory exists and is
non-empty while ``force`` is ``False``.
OSError: If files/directories cannot be created or written.
Examples:
>>> from pathlib import Path
>>> created = create_benchmark_scaffold(
... benchmarks_dir=Path("benchmarks"),
... benchmark_name="my_algo",
... force=True,
... register=True,
... )
>>> created.name
'my_algo'
"""
root = Path(benchmarks_dir)
root.mkdir(parents=True, exist_ok=True)
name = _validate_benchmark_name(benchmark_name)
target = root / name
if target.exists() and any(target.rglob("*")) and not force:
raise ValueError(
f"Benchmark directory '{target}' already exists and is not empty. Use --force to overwrite files."
)
methods = ["cpython", "pypy", "cython", "ctypes", "py_compile"]
for method in methods:
method_dir = target / method
method_dir.mkdir(parents=True, exist_ok=True)
(method_dir / "main.py").write_text(
PYTHON_MAIN_TEMPLATE.format(algorithm=name, method=method),
encoding="utf-8",
)
if method == "cython":
(method_dir / "raw.pyx").write_text(CYTHON_RAW_TEMPLATE, encoding="utf-8")
(method_dir / "setup.py").write_text(CYTHON_SETUP_TEMPLATE, encoding="utf-8")
elif method == "ctypes":
(method_dir / "raw.c").write_text(CTYPES_C_TEMPLATE, encoding="utf-8")
if register:
_upsert_user_registry(root, name)
return target