Files
FacereDataset/tools/epro2/__main__.py
Knowit 7f9e2fad73 tools/epro2: add Relations layer for cross-object navigation
在 replay 的扁平 objects[id] -> payload 之上盖一层 Relations,建索引和
反向引用,把孤立对象拼成可遍历的图,是后续 EPRO2 → KiCad 转换器的
中间表示前置。

Relations.build(doc) 单遍扫所有对象,得到:

主集合(按类型分桶):
  parts / components / pins / pads / wires / nets / layers / rules

复合 ID 解析(关键):
  '["LAYER",1]'                          → layers[1]
  '["NET","GND"]'                        → nets["GND"]
  '["PAD_NET","e0","1","e7"]'            → pad_nets_by_pad/by_net
  '["RULE","SAFE","copperThickness1oz"]' → rules[("RULE","SAFE",...)]

反向引用:
  obj_ids_by_part         partId            → 引用对象 ids(lib 内 RECT/TEXT/PIN 都带 partId)
  components_by_part      partId            → component ids
  attrs_by_parent         parentId          → ATTR ids
  lines_by_wire           WIRE.id           → LINE ids(wire 由若干 LINE 段组成)
  pad_nets_by_pad         PAD.id            → PAD_NET 记录
  pad_nets_by_net         net name          → PAD_NET 记录
  objects_on_layer / objects_in_net  字段反查

便捷 accessor:
  attrs_dict(parent_id)   折叠所有 ATTR ops 到 {key: value} dict(last
                          write wins),KiCad 转换时按 component 拿
                          Designator/Value/Footprint 的常用入口

ATTR.parentId 解析(实测发现的两种坑):
1. 不仅指向 COMPONENT/PART —— 也大量指向 WIRE(schematic 上的网络
   标签 / 网络属性)。原查重函数漏算,636 个 false positive
   unresolved;改为"任意 doc.objects[parentId] 命中即算 resolved"
2. 复合形式 `<comp_id>-<pin_id>` 用于把 ATTR 挂在某 component 的某个
   pin 上(如 PinName)。`_resolve_parent()` 用 split("-",1) 兜底

CLI 加 --relations,按 docType 聚合 stats:
  uv run python -m tools.epro2 data/raw/oshwhub/<uuid> --relations

ESP-VoCat 验证:
  SCH_PAGE 9 docs : 572 components, 563 wires, 934 lines_grouped,
                    4111 attrs_attached, 0 unresolved_parents
  PCB      6 docs : 206 components, 807 pad_nets, 173 nets, 544 layers
  SYMBOL 105 docs : 106 parts, 560 pins, 1680 attrs_attached
  FOOTPRINT 55 docs: 496 pads, 9 nets, 1771 layers, 140 rules

注:PCB 内 pads=6 vs pad_nets=807 不矛盾 —— PAD 实例存在 FOOTPRINT
文档里,PCB stream 用 ["PAD_NET",comp,pin,pad] 复合 id 跨文档引用;
解析"comp 的某 pin 通过哪个 footprint 的哪个 pad"需要 project-级
Relations 聚合(下个 task)。

测试:tools/epro2/tests/test_relations.py 9 个单测覆盖复合 id 解析、
lineGroup 链接、parentId 直/复合解析、partId 反查、attrs 折叠。
parser + relations 共 15/15 通过。

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 22:17:28 +08:00

142 lines
4.8 KiB
Python

"""CLI: replay every document in a Pro 3.x project and print a summary.
Usage:
uv run python -m tools.epro2 data/raw/oshwhub/<project_uuid>
uv run python -m tools.epro2 data/raw/oshwhub/<uuid> --dump-doc <doc_uuid>
Designed for sanity-checking the parser/replay against ESP-VoCat first;
later we'll diff replayed state against the editor-rendered ground truth.
"""
from __future__ import annotations
import argparse
import json
import sys
from collections import Counter
from pathlib import Path
from .relations import Relations
from .replay import Project, replay_project
def _print_summary(proj: Project) -> None:
print(f"Project: {proj.project_uuid}")
print(f"Editor version: {proj.editor_version}")
print(f"Documents: {len(proj.documents)} (parse_errors={len(proj.parse_errors)})")
print()
by_type = proj.by_doc_type()
print(f"{'count':>6} {'docType':<14s} {'objects':>10s} {'ops':>10s} {'deletes':>8s} {'untyped_ops':>11s}")
for t in sorted(by_type, key=lambda k: -len(by_type[k])):
ds = by_type[t]
objs = sum(len(d.objects) for d in ds)
ops = sum(d.op_counts.total() for d in ds)
dels = sum(d.deletes for d in ds)
untyped = sum(d.untyped_ops for d in ds)
print(f"{len(ds):>6d} {t:<14s} {objs:>10d} {ops:>10d} {dels:>8d} {untyped:>11d}")
print()
print(f"Top 25 op types across project:")
for t, n in proj.aggregate_op_counts().most_common(25):
print(f" {n:>9d} {t}")
if proj.parse_errors:
print()
print(f"Parse errors ({len(proj.parse_errors)}):")
for u, e in proj.parse_errors[:10]:
print(f" {u[:32]} {e}")
def _dump_doc(proj: Project, doc_uuid: str, n_objects: int = 5) -> None:
if doc_uuid not in proj.documents:
# try prefix match
candidates = [u for u in proj.documents if u.startswith(doc_uuid)]
if len(candidates) != 1:
print(f" no unique match for {doc_uuid!r} (matches: {candidates[:5]})", file=sys.stderr)
return
doc_uuid = candidates[0]
d = proj.documents[doc_uuid]
print()
print("=" * 72)
print(f"Document: {d.doc_uuid}")
print(f"docType: {d.doc_type}")
print(f"head: {json.dumps(d.head, ensure_ascii=False)[:200]}")
print(f"op_counts (top 15):")
for t, n in d.op_counts.most_common(15):
print(f" {n:>7d} {t}")
print(f"objects: {len(d.objects)} deletes: {d.deletes} untyped_ops: {d.untyped_ops}")
if d.objects:
print(f"\nFirst {n_objects} objects:")
for k, v in list(d.objects.items())[:n_objects]:
print(f" {k}{json.dumps(v, ensure_ascii=False)[:240]}")
def _print_relations(proj: Project) -> None:
"""Per-doc Relations summary aggregated across the project."""
print()
print("=" * 72)
print("Relations (per docType, summed)")
print("-" * 72)
# group docs by docType, build relations, sum stats
aggregated: dict[str, Counter[str]] = {}
samples: dict[str, str] = {} # docType → first doc_uuid (for --dump-relations)
for d in proj.documents.values():
rel = Relations.build(d)
agg = aggregated.setdefault(d.doc_type or "?", Counter())
for k, v in rel.summary().items():
agg[k] += v
samples.setdefault(d.doc_type or "?", d.doc_uuid)
if not aggregated:
print(" (no documents)")
return
# ordered by doc count desc
order = sorted(
aggregated,
key=lambda t: -sum(1 for d in proj.documents.values() if (d.doc_type or "?") == t),
)
cols = [
"parts", "components", "pins", "pads", "wires", "nets", "layers", "rules",
"lines_grouped", "attrs_attached", "pad_nets",
"unresolved_parents", "unresolved_wires", "unresolved_layers",
]
print(f" {'docType':<12s} " + " ".join(f"{c:>16s}" for c in cols))
for t in order:
row = aggregated[t]
print(
f" {t:<12s} "
+ " ".join(f"{row.get(c, 0):>16d}" for c in cols)
)
def main(argv: list[str] | None = None) -> int:
ap = argparse.ArgumentParser(description="Replay an EPRO2 project and summarize.")
ap.add_argument("project_dir", type=Path, help="data/raw/oshwhub/<project_uuid>/")
ap.add_argument(
"--dump-doc",
action="append",
default=[],
help="dump replayed state of one document (uuid or unique prefix); repeatable",
)
ap.add_argument(
"--relations",
action="store_true",
help="build cross-object indices and print per-docType summary",
)
args = ap.parse_args(argv)
proj = replay_project(args.project_dir)
_print_summary(proj)
for doc_id in args.dump_doc:
_dump_doc(proj, doc_id)
if args.relations:
_print_relations(proj)
return 0
if __name__ == "__main__":
raise SystemExit(main())