FacereDataset/tools/epro2/__main__.py

"""CLI: replay every document in a Pro 3.x project and print a summary.

Usage:
    uv run python -m tools.epro2 data/raw/oshwhub/<project_uuid>
    uv run python -m tools.epro2 data/raw/oshwhub/<uuid> --dump-doc <doc_uuid>

Designed for sanity-checking the parser/replay against ESP-VoCat first;
later we'll diff replayed state against the editor-rendered ground truth.
"""

from __future__ import annotations

import argparse
import json
import sys
from collections import Counter
from pathlib import Path

from .relations import Relations
from .replay import Project, replay_project


def _print_summary(proj: Project) -> None:
    print(f"Project:        {proj.project_uuid}")
    print(f"Editor version: {proj.editor_version}")
    print(f"Documents:      {len(proj.documents)}  (parse_errors={len(proj.parse_errors)})")
    print()

    by_type = proj.by_doc_type()
    print(f"{'count':>6}  {'docType':<14s}  {'objects':>10s}  {'ops':>10s}  {'deletes':>8s}  {'untyped_ops':>11s}")
    for t in sorted(by_type, key=lambda k: -len(by_type[k])):
        ds = by_type[t]
        objs = sum(len(d.objects) for d in ds)
        ops = sum(d.op_counts.total() for d in ds)
        dels = sum(d.deletes for d in ds)
        untyped = sum(d.untyped_ops for d in ds)
        print(f"{len(ds):>6d}  {t:<14s}  {objs:>10d}  {ops:>10d}  {dels:>8d}  {untyped:>11d}")
    print()

    print(f"Top 25 op types across project:")
    for t, n in proj.aggregate_op_counts().most_common(25):
        print(f"  {n:>9d}  {t}")

    if proj.parse_errors:
        print()
        print(f"Parse errors ({len(proj.parse_errors)}):")
        for u, e in proj.parse_errors[:10]:
            print(f"  {u[:32]}  {e}")


def _dump_doc(proj: Project, doc_uuid: str, n_objects: int = 5) -> None:
    if doc_uuid not in proj.documents:
        # try prefix match
        candidates = [u for u in proj.documents if u.startswith(doc_uuid)]
        if len(candidates) != 1:
            print(f"  no unique match for {doc_uuid!r} (matches: {candidates[:5]})", file=sys.stderr)
            return
        doc_uuid = candidates[0]
    d = proj.documents[doc_uuid]
    print()
    print("=" * 72)
    print(f"Document: {d.doc_uuid}")
    print(f"docType:  {d.doc_type}")
    print(f"head:     {json.dumps(d.head, ensure_ascii=False)[:200]}")
    print(f"op_counts (top 15):")
    for t, n in d.op_counts.most_common(15):
        print(f"  {n:>7d}  {t}")
    print(f"objects:  {len(d.objects)}  deletes:  {d.deletes}  untyped_ops:  {d.untyped_ops}")
    if d.objects:
        print(f"\nFirst {n_objects} objects:")
        for k, v in list(d.objects.items())[:n_objects]:
            print(f"  {k}  →  {json.dumps(v, ensure_ascii=False)[:240]}")


def _print_relations(proj: Project) -> None:
    """Per-doc Relations summary aggregated across the project."""
    print()
    print("=" * 72)
    print("Relations (per docType, summed)")
    print("-" * 72)

    # group docs by docType, build relations, sum stats
    aggregated: dict[str, Counter[str]] = {}
    samples: dict[str, str] = {}  # docType → first doc_uuid (for --dump-relations)
    for d in proj.documents.values():
        rel = Relations.build(d)
        agg = aggregated.setdefault(d.doc_type or "?", Counter())
        for k, v in rel.summary().items():
            agg[k] += v
        samples.setdefault(d.doc_type or "?", d.doc_uuid)

    if not aggregated:
        print("  (no documents)")
        return

    # ordered by doc count desc
    order = sorted(
        aggregated,
        key=lambda t: -sum(1 for d in proj.documents.values() if (d.doc_type or "?") == t),
    )
    cols = [
        "parts", "components", "pins", "pads", "wires", "nets", "layers", "rules",
        "lines_grouped", "attrs_attached", "pad_nets",
        "unresolved_parents", "unresolved_wires", "unresolved_layers",
    ]
    print(f"  {'docType':<12s}  " + "  ".join(f"{c:>16s}" for c in cols))
    for t in order:
        row = aggregated[t]
        print(
            f"  {t:<12s}  "
            + "  ".join(f"{row.get(c, 0):>16d}" for c in cols)
        )


def main(argv: list[str] | None = None) -> int:
    ap = argparse.ArgumentParser(description="Replay an EPRO2 project and summarize.")
    ap.add_argument("project_dir", type=Path, help="data/raw/oshwhub/<project_uuid>/")
    ap.add_argument(
        "--dump-doc",
        action="append",
        default=[],
        help="dump replayed state of one document (uuid or unique prefix); repeatable",
    )
    ap.add_argument(
        "--relations",
        action="store_true",
        help="build cross-object indices and print per-docType summary",
    )
    args = ap.parse_args(argv)

    proj = replay_project(args.project_dir)
    _print_summary(proj)
    for doc_id in args.dump_doc:
        _dump_doc(proj, doc_id)
    if args.relations:
        _print_relations(proj)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())