"""CLI: replay every document in a Pro 3.x project and print a summary. Usage: uv run python -m tools.epro2 data/raw/oshwhub/ uv run python -m tools.epro2 data/raw/oshwhub/ --dump-doc Designed for sanity-checking the parser/replay against ESP-VoCat first; later we'll diff replayed state against the editor-rendered ground truth. """ from __future__ import annotations import argparse import json import sys from collections import Counter from pathlib import Path from .replay import Project, replay_project def _print_summary(proj: Project) -> None: print(f"Project: {proj.project_uuid}") print(f"Editor version: {proj.editor_version}") print(f"Documents: {len(proj.documents)} (parse_errors={len(proj.parse_errors)})") print() by_type = proj.by_doc_type() print(f"{'count':>6} {'docType':<14s} {'objects':>10s} {'ops':>10s} {'deletes':>8s} {'untyped_ops':>11s}") for t in sorted(by_type, key=lambda k: -len(by_type[k])): ds = by_type[t] objs = sum(len(d.objects) for d in ds) ops = sum(d.op_counts.total() for d in ds) dels = sum(d.deletes for d in ds) untyped = sum(d.untyped_ops for d in ds) print(f"{len(ds):>6d} {t:<14s} {objs:>10d} {ops:>10d} {dels:>8d} {untyped:>11d}") print() print(f"Top 25 op types across project:") for t, n in proj.aggregate_op_counts().most_common(25): print(f" {n:>9d} {t}") if proj.parse_errors: print() print(f"Parse errors ({len(proj.parse_errors)}):") for u, e in proj.parse_errors[:10]: print(f" {u[:32]} {e}") def _dump_doc(proj: Project, doc_uuid: str, n_objects: int = 5) -> None: if doc_uuid not in proj.documents: # try prefix match candidates = [u for u in proj.documents if u.startswith(doc_uuid)] if len(candidates) != 1: print(f" no unique match for {doc_uuid!r} (matches: {candidates[:5]})", file=sys.stderr) return doc_uuid = candidates[0] d = proj.documents[doc_uuid] print() print("=" * 72) print(f"Document: {d.doc_uuid}") print(f"docType: {d.doc_type}") print(f"head: {json.dumps(d.head, ensure_ascii=False)[:200]}") print(f"op_counts (top 15):") for t, n in d.op_counts.most_common(15): print(f" {n:>7d} {t}") print(f"objects: {len(d.objects)} deletes: {d.deletes} untyped_ops: {d.untyped_ops}") if d.objects: print(f"\nFirst {n_objects} objects:") for k, v in list(d.objects.items())[:n_objects]: print(f" {k} → {json.dumps(v, ensure_ascii=False)[:240]}") def main(argv: list[str] | None = None) -> int: ap = argparse.ArgumentParser(description="Replay an EPRO2 project and summarize.") ap.add_argument("project_dir", type=Path, help="data/raw/oshwhub//") ap.add_argument( "--dump-doc", action="append", default=[], help="dump replayed state of one document (uuid or unique prefix); repeatable", ) args = ap.parse_args(argv) proj = replay_project(args.project_dir) _print_summary(proj) for doc_id in args.dump_doc: _dump_doc(proj, doc_id) return 0 if __name__ == "__main__": raise SystemExit(main())