Source code for pgsi_analyzer.benchmarks.template

"""
Scaffolding helpers for user benchmark template generation.
"""

import json
import re
import shutil
from pathlib import Path
from typing import Iterable, List

from .registry import list_algorithms
from .discovery import USER_REGISTRY_FILENAME

PYTHON_MAIN_TEMPLATE = '''"""
PGSI benchmark template for: {algorithm} / {method}

How to use:
1) Implement your workload inside `run_workload(...)`.
2) Keep both decorators below; PGSI reads these CSV outputs.
3) Run from CLI with:
   pgsi-analyzer benchmark run --algorithms {algorithm} --methods {method} --benchmarks-dir <this-folder>
"""

from pgsi_analyzer.measurement import measure_energy_to_csv, measure_time_to_csv
from pgsi_analyzer.config import get_measurement_runs


def run_workload() -> None:
    # TODO: Replace this with your actual algorithm implementation.
    # Keep it deterministic where possible for stable benchmark measurements.
    total = 0
    for i in range(10_000):
        total += i * i
    _ = total


@measure_energy_to_csv(n=get_measurement_runs("{algorithm}"), csv_filename="{algorithm}_{method}")
def run_energy_benchmark() -> None:
    run_workload()


@measure_time_to_csv(n=get_measurement_runs("{algorithm}"), csv_filename="{algorithm}_{method}")
def run_time_benchmark() -> None:
    run_workload()


if __name__ == "__main__":
    run_energy_benchmark()
    run_time_benchmark()
'''

CYTHON_RAW_TEMPLATE = '''# cython: language_level=3

# TODO: Move performance-critical parts from main.py here.
# Example signature:
# def work(int n):
#     cdef int i
#     cdef long total = 0
#     for i in range(n):
#         total += i * i
#     return total
'''

CYTHON_SETUP_TEMPLATE = '''from setuptools import setup
from Cython.Build import cythonize

# TODO: Update extension module name/path if you rename raw.pyx.
setup(
    ext_modules=cythonize("raw.pyx", language_level=3),
)
'''

CTYPES_C_TEMPLATE = r'''#include <stdint.h>

// TODO: Add your native function(s) and call from main.py via ctypes.
// Example export for Windows/Linux:
//   __declspec(dllexport) on Windows, default visibility elsewhere.
#ifdef _WIN32
#define API __declspec(dllexport)
#else
#define API
#endif

API int64_t work(int32_t n) {
    int64_t total = 0;
    for (int32_t i = 0; i < n; i++) {
        total += (int64_t)i * (int64_t)i;
    }
    return total;
}
'''

ROOT_README_TEMPLATE = """# PGSI User Benchmark Template

This folder was generated by:
`pgsi-analyzer benchmark init-template`

## Structure

- `<algorithm>/<method>/main.py` for all methods
- `<algorithm>/cython/raw.pyx` + `setup.py`
- `<algorithm>/ctypes/raw.c`

## Run

```bash
pgsi-analyzer benchmark list --algorithms --benchmarks-dir .
pgsi-analyzer benchmark run --algorithms all --methods all --benchmarks-dir . --runs 5
```
"""


def _validate_algorithms(algorithms: Iterable[str]) -> List[str]:
    """Validate algorithm names requested for scaffold generation.

    Validation happens early so CLI users get actionable errors before any filesystem
    writes occur. This avoids partially generated templates when a typo slips into an
    algorithm list.

    Args:
        algorithms: User-requested benchmark algorithm identifiers.

    Returns:
        List[str]: Sorted unique list of valid algorithm identifiers.

    Raises:
        ValueError: If any requested algorithm is not present in built-in registry.

    Examples:
        >>> _validate_algorithms(["hanoi", "hanoi"])
        ['hanoi']
    """
    requested = sorted(set(algorithms))
    valid = set(list_algorithms())
    invalid = [name for name in requested if name not in valid]
    if invalid:
        raise ValueError(f"Invalid algorithms for template generation: {invalid}. Available: {sorted(valid)}")
    return requested



[docs]
def generate_benchmark_template(output_dir: Path, algorithms: Iterable[str], force: bool = False) -> Path:
    """
    Generate a benchmark project tree for external user projects using
    pre-implemented built-in source files.

    The generator prefers copying shipped benchmark source files so users start from
    realistic implementations. If a source method does not exist in the package, it
    falls back to templates that are still runnable and instrumentation-ready.

    Args:
        output_dir: Target root directory for generated benchmark project.
        algorithms: Algorithm names to include in the scaffold.
        force: Whether to allow generation inside a non-empty directory.

    Returns:
        Path: Path to the generated benchmark project root.

    Raises:
        ValueError: If algorithm list is invalid or target directory is non-empty
            and ``force`` is ``False``.
        OSError: If files/directories cannot be created or copied.

    Examples:
        >>> from pathlib import Path
        >>> root = generate_benchmark_template(
        ...     output_dir=Path("my-benchmarks"),
        ...     algorithms=["hanoi", "fasta"],
        ...     force=True,
        ... )
        >>> root.name
        'my-benchmarks'
    """
    root = Path(output_dir)
    selected_algorithms = _validate_algorithms(algorithms)

    if root.exists() and any(root.iterdir()) and not force:
        raise ValueError(
            f"Output directory '{root}' is not empty. Use --force to scaffold anyway."
        )
    root.mkdir(parents=True, exist_ok=True)

    (root / "README.md").write_text(ROOT_README_TEMPLATE, encoding="utf-8")

    package_benchmarks_root = Path(__file__).resolve().parent
    methods = ["cpython", "pypy", "cython", "ctypes", "py_compile"]
    for algorithm in selected_algorithms:
        source_algorithm_dir = package_benchmarks_root / algorithm
        for method in methods:
            method_source_dir = source_algorithm_dir / method
            method_target_dir = root / algorithm / method
            method_target_dir.mkdir(parents=True, exist_ok=True)

            # Copy key source files only (avoid build artifacts / result CSVs).
            for pattern in ("*.py", "*.pyx", "*.pxd", "*.c", "*.h"):
                for source_file in method_source_dir.glob(pattern):
                    if source_file.is_file():
                        shutil.copy2(source_file, method_target_dir / source_file.name)

            # If no source file exists for this method in package, fallback to template.
            if not any(method_target_dir.glob("*")):
                (method_target_dir / "main.py").write_text(
                    PYTHON_MAIN_TEMPLATE.format(algorithm=algorithm, method=method),
                    encoding="utf-8",
                )
                if method == "cython":
                    (method_target_dir / "raw.pyx").write_text(CYTHON_RAW_TEMPLATE, encoding="utf-8")
                    (method_target_dir / "setup.py").write_text(CYTHON_SETUP_TEMPLATE, encoding="utf-8")
                elif method == "ctypes":
                    (method_target_dir / "raw.c").write_text(CTYPES_C_TEMPLATE, encoding="utf-8")

    return root



def _validate_benchmark_name(name: str) -> str:
    """Validate and normalize a custom benchmark identifier.

    Enforcing a conservative naming pattern prevents invalid paths on different
    operating systems and keeps registry references stable across CLI operations.

    Args:
        name: User-provided benchmark name.

    Returns:
        str: Trimmed benchmark name that passed validation.

    Raises:
        ValueError: If the name is empty or contains unsupported characters.

    Examples:
        >>> _validate_benchmark_name("my_algo-1")
        'my_algo-1'
    """
    clean = name.strip()
    if not clean:
        raise ValueError("Benchmark name cannot be empty.")
    if not re.match(r"^[A-Za-z0-9][A-Za-z0-9_-]*$", clean):
        raise ValueError(
            "Invalid benchmark name. Use letters, numbers, '-' or '_' (no spaces/symbols)."
        )
    return clean


def _upsert_user_registry(benchmarks_dir: Path, benchmark_name: str) -> None:
    """Insert or update a benchmark entry in the user registry file.

    Registry upsert keeps `create benchmark` idempotent: reruns update paths for the
    same benchmark key instead of producing duplicate entries.

    Args:
        benchmarks_dir: Root directory that contains ``pgsi_registry.json``.
        benchmark_name: Benchmark identifier to register.

    Returns:
        None

    Raises:
        ValueError: If existing registry file contains invalid JSON.
        OSError: If registry file cannot be read or written.

    Examples:
        >>> from pathlib import Path
        >>> _upsert_user_registry(Path("benchmarks"), "my_algo")
    """
    registry_path = benchmarks_dir / USER_REGISTRY_FILENAME
    if registry_path.exists():
        try:
            data = json.loads(registry_path.read_text(encoding="utf-8"))
        except json.JSONDecodeError as exc:
            raise ValueError(f"Invalid JSON in {registry_path}: {exc}") from exc
    else:
        data = {"benchmarks": {}}

    benchmarks = data.setdefault("benchmarks", {})
    benchmarks[benchmark_name] = {
        "cpython": f"{benchmark_name}/cpython/main.py",
        "pypy": f"{benchmark_name}/pypy/main.py",
        "cython": f"{benchmark_name}/cython",
        "ctypes": f"{benchmark_name}/ctypes",
        "py_compile": f"{benchmark_name}/py_compile/main.py",
    }
    registry_path.write_text(json.dumps(data, indent=2), encoding="utf-8")



[docs]
def create_benchmark_scaffold(
    benchmarks_dir: Path,
    benchmark_name: str,
    force: bool = False,
    register: bool = True,
) -> Path:
    """
    Create a single benchmark scaffold under benchmarks_dir and register it.

    This function is optimized for incremental project growth: users can add one
    algorithm at a time while preserving the same folder contract expected by
    discovery and orchestrator modules.

    Args:
        benchmarks_dir: Benchmark project root where scaffold will be created.
        benchmark_name: Name of the new benchmark folder/key.
        force: Whether to allow writing into an existing non-empty benchmark folder.
        register: Whether to update ``pgsi_registry.json`` with the new benchmark.

    Returns:
        Path: Path to the created benchmark directory.

    Raises:
        ValueError: If benchmark name is invalid or target directory exists and is
            non-empty while ``force`` is ``False``.
        OSError: If files/directories cannot be created or written.

    Examples:
        >>> from pathlib import Path
        >>> created = create_benchmark_scaffold(
        ...     benchmarks_dir=Path("benchmarks"),
        ...     benchmark_name="my_algo",
        ...     force=True,
        ...     register=True,
        ... )
        >>> created.name
        'my_algo'
    """
    root = Path(benchmarks_dir)
    root.mkdir(parents=True, exist_ok=True)
    name = _validate_benchmark_name(benchmark_name)
    target = root / name

    if target.exists() and any(target.rglob("*")) and not force:
        raise ValueError(
            f"Benchmark directory '{target}' already exists and is not empty. Use --force to overwrite files."
        )

    methods = ["cpython", "pypy", "cython", "ctypes", "py_compile"]
    for method in methods:
        method_dir = target / method
        method_dir.mkdir(parents=True, exist_ok=True)
        (method_dir / "main.py").write_text(
            PYTHON_MAIN_TEMPLATE.format(algorithm=name, method=method),
            encoding="utf-8",
        )
        if method == "cython":
            (method_dir / "raw.pyx").write_text(CYTHON_RAW_TEMPLATE, encoding="utf-8")
            (method_dir / "setup.py").write_text(CYTHON_SETUP_TEMPLATE, encoding="utf-8")
        elif method == "ctypes":
            (method_dir / "raw.c").write_text(CTYPES_C_TEMPLATE, encoding="utf-8")

    if register:
        _upsert_user_registry(root, name)
    return target