Add tools/epro2 — EPRO2 parser + replay prototype

为 Pro 3.x .epro2 工程源数据写解析骨架，下游做 EPRO2→KiCad 转换器前的基础设施。在 ESP-VoCat (278 docs / 7.5 MB) + 220V 桌面电源 (771 docs / 26 MB) 端到端跑通，0 parse errors。模块结构： tools/epro2/parser.py 单行 → Op：rstrip("|") + split("||") + json.loads tools/epro2/replay.py state-machine：DOCHEAD 设头；其它 op 按 id 做 upsert（payload=None 当 delete）；EDIT_HEAD/ META/CANVAS/PREFERENCE/PANELIZE 当 doc 级单例存 tools/epro2/__main__.py CLI：传项目目录走 manifest.json 重放每个 doc，按 docType 聚合输出 + 可选 --dump-doc 看单文档详情 tools/epro2/tests/ 6 个单测 pin 死 trailing-pipe / 三段消息 / id-only-no-payload / 嵌入管道符等坑 ESP-VoCat 输出示例： Documents: 278 (parse_errors=0) count docType objects ops deletes untyped_ops 105 SYMBOL 4124 4439 0 0 88 DEVICE 88 264 0 0 55 FOOTPRINT 4641 4855 0 0 9 SCH_PAGE 7982 8167 42 0 6 PCB 8428 8547 38 0 6 BOARD 9 18 0 0 6 SCH 9 26 0 0 1 BLOB 4 8 0 0 1 FONT 16 28 0 0 1 CONFIG 2 3 0 0 Top ops: ATTR 7035 / ELE_PLACEHOLDER 4225 / LINE 3005 / LAYER 2318 ... PCB 文档单 dump 验证语义正确：META 含 title (PCB-EchoEar-CoreBoard-V1_0) + board 引用；CANVAS 含 origin/grid/unit (mm)；LAYER 1/2/3 = TOP/BOTTOM/ TOP_SILK 配色齐全。跑法： uv run python -m tools.epro2 data/raw/oshwhub/<project_uuid> uv run python -m tools.epro2 data/raw/oshwhub/<uuid> --dump-doc <doc_uuid> 下一步（不在本 commit）： 1. 把对象间关系建起来（COMPONENT.partId → PART；LINE.lineGroup → WIRE； PAD_NET id → PAD + NET 三方关联）—— 当前 replay 只做扁平 dict 2. EPRO2 → KiCad 序列化层（Forge 投影硬门槛） 3. 在 Pro 3.x 三个项目做整体回归（X86 主板 7374 docs 可作压力测试） Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 22:10:27 +08:00
parent c721e08c93
commit 3c57e75d51
7 changed files with 377 additions and 0 deletions
--- a/tools/epro2/main.py
+++ b/tools/epro2/main.py
@@ -0,0 +1,93 @@
+"""CLI: replay every document in a Pro 3.x project and print a summary.
+
+Usage:
+    uv run python -m tools.epro2 data/raw/oshwhub/<project_uuid>
+    uv run python -m tools.epro2 data/raw/oshwhub/<uuid> --dump-doc <doc_uuid>
+
+Designed for sanity-checking the parser/replay against ESP-VoCat first;
+later we'll diff replayed state against the editor-rendered ground truth.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from collections import Counter
+from pathlib import Path
+
+from .replay import Project, replay_project
+
+
+def _print_summary(proj: Project) -> None:
+    print(f"Project:        {proj.project_uuid}")
+    print(f"Editor version: {proj.editor_version}")
+    print(f"Documents:      {len(proj.documents)}  (parse_errors={len(proj.parse_errors)})")
+    print()
+
+    by_type = proj.by_doc_type()
+    print(f"{'count':>6}  {'docType':<14s}  {'objects':>10s}  {'ops':>10s}  {'deletes':>8s}  {'untyped_ops':>11s}")
+    for t in sorted(by_type, key=lambda k: -len(by_type[k])):
+        ds = by_type[t]
+        objs = sum(len(d.objects) for d in ds)
+        ops = sum(d.op_counts.total() for d in ds)
+        dels = sum(d.deletes for d in ds)
+        untyped = sum(d.untyped_ops for d in ds)
+        print(f"{len(ds):>6d}  {t:<14s}  {objs:>10d}  {ops:>10d}  {dels:>8d}  {untyped:>11d}")
+    print()
+
+    print(f"Top 25 op types across project:")
+    for t, n in proj.aggregate_op_counts().most_common(25):
+        print(f"  {n:>9d}  {t}")
+
+    if proj.parse_errors:
+        print()
+        print(f"Parse errors ({len(proj.parse_errors)}):")
+        for u, e in proj.parse_errors[:10]:
+            print(f"  {u[:32]}  {e}")
+
+
+def _dump_doc(proj: Project, doc_uuid: str, n_objects: int = 5) -> None:
+    if doc_uuid not in proj.documents:
+        # try prefix match
+        candidates = [u for u in proj.documents if u.startswith(doc_uuid)]
+        if len(candidates) != 1:
+            print(f"  no unique match for {doc_uuid!r} (matches: {candidates[:5]})", file=sys.stderr)
+            return
+        doc_uuid = candidates[0]
+    d = proj.documents[doc_uuid]
+    print()
+    print("=" * 72)
+    print(f"Document: {d.doc_uuid}")
+    print(f"docType:  {d.doc_type}")
+    print(f"head:     {json.dumps(d.head, ensure_ascii=False)[:200]}")
+    print(f"op_counts (top 15):")
+    for t, n in d.op_counts.most_common(15):
+        print(f"  {n:>7d}  {t}")
+    print(f"objects:  {len(d.objects)}  deletes:  {d.deletes}  untyped_ops:  {d.untyped_ops}")
+    if d.objects:
+        print(f"\nFirst {n_objects} objects:")
+        for k, v in list(d.objects.items())[:n_objects]:
+            print(f"  {k}  →  {json.dumps(v, ensure_ascii=False)[:240]}")
+
+
+def main(argv: list[str] | None = None) -> int:
+    ap = argparse.ArgumentParser(description="Replay an EPRO2 project and summarize.")
+    ap.add_argument("project_dir", type=Path, help="data/raw/oshwhub/<project_uuid>/")
+    ap.add_argument(
+        "--dump-doc",
+        action="append",
+        default=[],
+        help="dump replayed state of one document (uuid or unique prefix); repeatable",
+    )
+    args = ap.parse_args(argv)
+
+    proj = replay_project(args.project_dir)
+    _print_summary(proj)
+    for doc_id in args.dump_doc:
+        _dump_doc(proj, doc_id)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())