Files
FacereDataset/tools/epro2/replay.py
Knowit 3c57e75d51 Add tools/epro2 — EPRO2 parser + replay prototype
为 Pro 3.x .epro2 工程源数据写解析骨架,下游做 EPRO2→KiCad 转换器
前的基础设施。在 ESP-VoCat (278 docs / 7.5 MB) + 220V 桌面电源
(771 docs / 26 MB) 端到端跑通,0 parse errors。

模块结构:
  tools/epro2/parser.py    单行 → Op:rstrip("|") + split("||") + json.loads
  tools/epro2/replay.py    state-machine:DOCHEAD 设头;其它 op 按 id 做
                           upsert(payload=None 当 delete);EDIT_HEAD/
                           META/CANVAS/PREFERENCE/PANELIZE 当 doc 级单
                           例存
  tools/epro2/__main__.py  CLI:传项目目录走 manifest.json 重放每个 doc,
                           按 docType 聚合输出 + 可选 --dump-doc 看单文
                           档详情
  tools/epro2/tests/       6 个单测 pin 死 trailing-pipe / 三段消息 /
                           id-only-no-payload / 嵌入管道符等坑

ESP-VoCat 输出示例:
  Documents: 278  (parse_errors=0)
   count  docType         objects        ops  deletes  untyped_ops
     105  SYMBOL             4124       4439        0            0
      88  DEVICE               88        264        0            0
      55  FOOTPRINT          4641       4855        0            0
       9  SCH_PAGE           7982       8167       42            0
       6  PCB                8428       8547       38            0
       6  BOARD                 9         18        0            0
       6  SCH                   9         26        0            0
       1  BLOB                  4          8        0            0
       1  FONT                 16         28        0            0
       1  CONFIG                2          3        0            0
  Top ops: ATTR 7035 / ELE_PLACEHOLDER 4225 / LINE 3005 / LAYER 2318 ...

PCB 文档单 dump 验证语义正确:META 含 title (PCB-EchoEar-CoreBoard-V1_0)
+ board 引用;CANVAS 含 origin/grid/unit (mm);LAYER 1/2/3 = TOP/BOTTOM/
TOP_SILK 配色齐全。

跑法:
  uv run python -m tools.epro2 data/raw/oshwhub/<project_uuid>
  uv run python -m tools.epro2 data/raw/oshwhub/<uuid> --dump-doc <doc_uuid>

下一步(不在本 commit):
1. 把对象间关系建起来(COMPONENT.partId → PART;LINE.lineGroup → WIRE;
   PAD_NET id → PAD + NET 三方关联)—— 当前 replay 只做扁平 dict
2. EPRO2 → KiCad 序列化层(Forge 投影硬门槛)
3. 在 Pro 3.x 三个项目做整体回归(X86 主板 7374 docs 可作压力测试)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 22:10:27 +08:00

123 lines
4.2 KiB
Python

"""EPRO2 state-machine replay.
Each ``.epro2`` file is a per-document op stream (already partitioned by
DOCHEAD during crawl). Replaying that stream yields the document's final
state — a dict keyed by object id with the latest payload.
This is a *prototype*. Semantics intentionally minimal:
- DOCHEAD sets the document head (docType, uuid, editVersion, ...).
- Any other op with an ``id`` upserts ``objects[id]`` with its payload.
- A ``null`` / missing payload on a normally-payloaded op is treated as
a deletion. (Empirically uncommon — flagged for review when seen.)
- We do *not* yet model relationships (lineGroup → WIRE, NET ↔ PAD_NET, ...).
Those belong in a higher-level translator, not the raw replay.
"""
from __future__ import annotations
import json
from collections import Counter
from dataclasses import dataclass, field
from pathlib import Path
from .parser import Op, iter_ops
@dataclass
class Document:
"""Replayed state of a single Pro document."""
doc_uuid: str
doc_type: str | None = None
head: dict = field(default_factory=dict)
objects: dict[str, dict] = field(default_factory=dict)
op_counts: Counter[str] = field(default_factory=Counter)
deletes: int = 0
untyped_ops: int = 0 # ops with no `id` and not DOCHEAD/EDIT_HEAD/META/CANVAS
def apply(self, op: Op) -> None:
self.op_counts[op.type] += 1
if op.type == "DOCHEAD":
if op.payload:
self.head = op.payload
self.doc_type = op.payload.get("docType")
return
# Document-level singletons that don't have an `id` field.
if op.type in {"EDIT_HEAD", "META", "CANVAS", "PREFERENCE", "PANELIZE"}:
if op.payload is not None:
self.objects[op.type] = {"_type": op.type, **op.payload}
return
if op.id is None:
# Op carries no addressable id — keep a tally so we know if our
# model is missing a category.
self.untyped_ops += 1
return
if op.payload is None:
# Empty payload on an id-keyed op — treat as deletion.
if op.id in self.objects:
del self.objects[op.id]
self.deletes += 1
return
self.objects[op.id] = {"_type": op.type, **op.payload}
@dataclass
class Project:
"""Replayed state across all documents of a project."""
project_uuid: str
editor_version: str | None = None
documents: dict[str, Document] = field(default_factory=dict)
parse_errors: list[tuple[str, str]] = field(default_factory=list)
def by_doc_type(self) -> dict[str, list[Document]]:
out: dict[str, list[Document]] = {}
for d in self.documents.values():
out.setdefault(d.doc_type or "?", []).append(d)
return out
def aggregate_op_counts(self) -> Counter[str]:
agg: Counter[str] = Counter()
for d in self.documents.values():
agg += d.op_counts
return agg
def replay_document(epro2_path: Path | str) -> Document:
"""Replay a single ``.epro2`` file. Document UUID is taken from filename."""
p = Path(epro2_path)
doc_uuid = p.stem # crawler writes <doc_uuid>.epro2
d = Document(doc_uuid=doc_uuid)
for op in iter_ops(p):
d.apply(op)
return d
def replay_project(project_dir: Path | str) -> Project:
"""Replay every document under ``<project_dir>/source/`` per its manifest.json."""
pdir = Path(project_dir)
src = pdir / "source"
manifest_path = src / "manifest.json"
if not manifest_path.exists():
raise FileNotFoundError(f"missing {manifest_path}")
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
proj = Project(
project_uuid=manifest["project_uuid"],
editor_version=manifest.get("editor_version"),
)
for entry in manifest.get("documents", []):
epro2_path = pdir / entry["path"]
try:
d = replay_document(epro2_path)
except Exception as e: # noqa: BLE001 — surface as parse_errors
proj.parse_errors.append((entry["doc_uuid"], f"{type(e).__name__}: {e}"))
continue
proj.documents[d.doc_uuid] = d
return proj