FacereDataset/tools/epro2/__main__.py

"""CLI: replay every document in a Pro 3.x project and print a summary.

Usage:
    uv run python -m tools.epro2 data/raw/oshwhub/<project_uuid>
    uv run python -m tools.epro2 data/raw/oshwhub/<uuid> --dump-doc <doc_uuid>

Designed for sanity-checking the parser/replay against ESP-VoCat first;
later we'll diff replayed state against the editor-rendered ground truth.
"""

from __future__ import annotations

import argparse
import json
import sys
from collections import Counter
from pathlib import Path

from .project_relations import ProjectRelations
from .relations import Relations
from .replay import Project, replay_project


def _print_summary(proj: Project) -> None:
    print(f"Project:        {proj.project_uuid}")
    print(f"Editor version: {proj.editor_version}")
    print(f"Documents:      {len(proj.documents)}  (parse_errors={len(proj.parse_errors)})")
    print()

    by_type = proj.by_doc_type()
    print(f"{'count':>6}  {'docType':<14s}  {'objects':>10s}  {'ops':>10s}  {'deletes':>8s}  {'untyped_ops':>11s}")
    for t in sorted(by_type, key=lambda k: -len(by_type[k])):
        ds = by_type[t]
        objs = sum(len(d.objects) for d in ds)
        ops = sum(d.op_counts.total() for d in ds)
        dels = sum(d.deletes for d in ds)
        untyped = sum(d.untyped_ops for d in ds)
        print(f"{len(ds):>6d}  {t:<14s}  {objs:>10d}  {ops:>10d}  {dels:>8d}  {untyped:>11d}")
    print()

    print(f"Top 25 op types across project:")
    for t, n in proj.aggregate_op_counts().most_common(25):
        print(f"  {n:>9d}  {t}")

    if proj.parse_errors:
        print()
        print(f"Parse errors ({len(proj.parse_errors)}):")
        for u, e in proj.parse_errors[:10]:
            print(f"  {u[:32]}  {e}")


def _dump_doc(proj: Project, doc_uuid: str, n_objects: int = 5) -> None:
    if doc_uuid not in proj.documents:
        # try prefix match
        candidates = [u for u in proj.documents if u.startswith(doc_uuid)]
        if len(candidates) != 1:
            print(f"  no unique match for {doc_uuid!r} (matches: {candidates[:5]})", file=sys.stderr)
            return
        doc_uuid = candidates[0]
    d = proj.documents[doc_uuid]
    print()
    print("=" * 72)
    print(f"Document: {d.doc_uuid}")
    print(f"docType:  {d.doc_type}")
    print(f"head:     {json.dumps(d.head, ensure_ascii=False)[:200]}")
    print(f"op_counts (top 15):")
    for t, n in d.op_counts.most_common(15):
        print(f"  {n:>7d}  {t}")
    print(f"objects:  {len(d.objects)}  deletes:  {d.deletes}  untyped_ops:  {d.untyped_ops}")
    if d.objects:
        print(f"\nFirst {n_objects} objects:")
        for k, v in list(d.objects.items())[:n_objects]:
            print(f"  {k}  →  {json.dumps(v, ensure_ascii=False)[:240]}")


def _print_relations(proj: Project) -> None:
    """Per-doc Relations summary aggregated across the project."""
    print()
    print("=" * 72)
    print("Relations (per docType, summed)")
    print("-" * 72)

    # group docs by docType, build relations, sum stats
    aggregated: dict[str, Counter[str]] = {}
    samples: dict[str, str] = {}  # docType → first doc_uuid (for --dump-relations)
    for d in proj.documents.values():
        rel = Relations.build(d)
        agg = aggregated.setdefault(d.doc_type or "?", Counter())
        for k, v in rel.summary().items():
            agg[k] += v
        samples.setdefault(d.doc_type or "?", d.doc_uuid)

    if not aggregated:
        print("  (no documents)")
        return

    # ordered by doc count desc
    order = sorted(
        aggregated,
        key=lambda t: -sum(1 for d in proj.documents.values() if (d.doc_type or "?") == t),
    )
    cols = [
        "parts", "components", "pins", "pads", "wires", "nets", "layers", "rules",
        "lines_grouped", "attrs_attached", "pad_nets",
        "unresolved_parents", "unresolved_wires", "unresolved_layers",
    ]
    print(f"  {'docType':<12s}  " + "  ".join(f"{c:>16s}" for c in cols))
    for t in order:
        row = aggregated[t]
        print(
            f"  {t:<12s}  "
            + "  ".join(f"{row.get(c, 0):>16d}" for c in cols)
        )


def _print_project_relations(proj: Project) -> None:
    """Cross-doc resolution stats + a few sample resolutions for sanity."""
    pr = ProjectRelations.build(proj)
    s = pr.summary()
    print()
    print("=" * 72)
    print("Project Relations (cross-doc)")
    print("-" * 72)
    for k, v in s.items():
        print(f"  {k:<40s}  {v}")

    # Show 3 sample SCH_PAGE component → SYMBOL resolutions
    print()
    print("Sample sch component → symbol resolutions:")
    n = 0
    for sch_uuid in pr.docs_by_type.get("SCH_PAGE", []):
        for cid in pr.per_doc[sch_uuid].components:
            symbols = pr.resolve_symbol_docs(sch_uuid, cid)
            pid = pr.component_to_partid.get((sch_uuid, cid))
            if symbols:
                print(f"  sch={sch_uuid[:12]} comp={cid}  partId={pid!r}  →  symbol={symbols[0][:12]} (+{len(symbols)-1})")
                n += 1
                if n >= 3: break
        if n >= 3: break

    # Show 3 sample PCB component → FOOTPRINT resolutions, and a PAD_NET cross-doc resolution
    print()
    print("Sample pcb component → footprint + first PAD_NET cross-doc:")
    n = 0
    for pcb_uuid in pr.docs_by_type.get("PCB", []):
        rel = pr.per_doc[pcb_uuid]
        for cid in rel.components:
            fp = pr.resolve_footprint_doc(pcb_uuid, cid)
            if not fp: continue
            attrs = pr.attrs_for_pcb_component(pcb_uuid, cid)
            print(f"  pcb={pcb_uuid[:12]} comp={cid}  →  fp={fp[:12]}  Designator={attrs.get('Designator')!r}  Value={attrs.get('Value')!r}")
            # Find a PAD_NET referencing this comp and try cross-doc resolve
            for pad_id, records in rel.pad_nets_by_pad.items():
                for rec in records:
                    if rec["comp"] != cid: continue
                    resolved = pr.resolve_pcb_pad_net(pcb_uuid, cid, rec["pin"], rec["pad"])
                    if resolved:
                        pad = resolved["pad"]
                        print(f"    PAD_NET pin={rec['pin']} pad={rec['pad']} net={rec['net_name']}  →  pad@({pad.get('centerX')},{pad.get('centerY')})")
                        break
                else:
                    continue
                break
            n += 1
            if n >= 3: break
        if n >= 3: break


def main(argv: list[str] | None = None) -> int:
    ap = argparse.ArgumentParser(description="Replay an EPRO2 project and summarize.")
    ap.add_argument("project_dir", type=Path, help="data/raw/oshwhub/<project_uuid>/")
    ap.add_argument(
        "--dump-doc",
        action="append",
        default=[],
        help="dump replayed state of one document (uuid or unique prefix); repeatable",
    )
    ap.add_argument(
        "--relations",
        action="store_true",
        help="build cross-object indices and print per-docType summary",
    )
    ap.add_argument(
        "--project-relations",
        action="store_true",
        help="build cross-document indices (partId → SYMBOL, comp → FOOTPRINT, PAD_NET cross-doc)",
    )
    args = ap.parse_args(argv)

    proj = replay_project(args.project_dir)
    _print_summary(proj)
    for doc_id in args.dump_doc:
        _dump_doc(proj, doc_id)
    if args.relations:
        _print_relations(proj)
    if args.project_relations:
        _print_project_relations(proj)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())