tools/epro2: add ProjectRelations for cross-document resolution

per-doc Relations 在大量 cross-doc 引用前是不够的:PCB 的 PAD_NET 复合
id [PAD_NET, comp, pin, pad] 里的 pad 实际是 FOOTPRINT 文档里的 pad
实例;SCH_PAGE 的 COMPONENT.partId 指向某个 SYMBOL 文档的 PART.id。

ProjectRelations 在 per-doc Relations 之上做项目级聚合,把这些跨文档
引用拼起来。

Probe 阶段(ESP-VoCat)发现的映射规则(已写入 docstring):

1. SCH_PAGE COMPONENT.partId  ===  PART.id in some SYMBOL doc
   - 命名两种风格:'pid<hex>' (anonymous/系统 part) + '<name>.<n>' (具
     名 SKU),但都直接相等 PART.id,**不**是不同 namespace
   - 同一 PART.id 可能出现在多个 SYMBOL 文档里(库快照),
     parts_by_id 保留全部,consumer 通常取第一个

2. PCB COMPONENT.id  →  FOOTPRINT 文档 UUID  via 单独 ATTR op:
       ATTR(parentId=<comp>, key="Footprint", value=<fp_doc_uuid>)
   COMPONENT.attrs 子 dict 只有内务字段(Unique ID / Channel ID / ...),
   **不**含 footprint 引用。这跟 schematic 的 partId 在 COMPONENT 上的
   做法不一样,是 EPRO2 流的一处不对称

3. PCB PAD_NET[comp,pin,pad] 里的 pad 是 FOOTPRINT 文档内部的 pad id;
   解析链: comp → ATTR Footprint → FOOTPRINT relations.pads[pad]

API:
  ProjectRelations.build(project) — 单遍构建
  resolve_symbol_docs(sch_uuid, comp_id) → [SYMBOL doc uuids]
  resolve_footprint_doc(pcb_uuid, comp_id) → FOOTPRINT doc uuid | None
  pad_in_footprint(fp_uuid, pad_id) → PAD payload | None
  resolve_pcb_pad_net(pcb_uuid, comp, pin, pad) → {footprint, pad} | None
  attrs_for_pcb_component(pcb_uuid, comp_id) → {key: value} 折叠

CLI 加 --project-relations,跑 ESP-VoCat:
  documents                                 278
  distinct_parts                             87
  duplicated_parts                            9
  pcb_components_with_footprint             206
  pcb_components_unresolved_footprint         0
  sch_components_with_partid                572
  sch_components_unresolved_part              0

PCB 样本验证:comp=e0 → fp=1069352d81c6 Designator='U8',
PAD_NET pin=1 pad=e7 net=GND 跨文档解到坐标 (-37.4,-45.24)。

测试:6 个新单测覆盖 partId→symbol、comp→footprint、PAD_NET 跨文档、
attrs 折叠、unresolved 计数。parser + relations + project_relations
共 21/21 通过。

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-28 22:22:39 +08:00
parent 7f9e2fad73
commit 3052e42991
4 changed files with 377 additions and 0 deletions

View File

@@ -4,6 +4,7 @@ See docs/sources/easyeda_pro_source.md §3 for the format spec.
"""
from .parser import Op, iter_ops, parse_line
from .project_relations import ProjectRelations
from .relations import Relations, parse_composite_id
from .replay import Document, Project, replay_document, replay_project
@@ -17,4 +18,5 @@ __all__ = [
"replay_project",
"Relations",
"parse_composite_id",
"ProjectRelations",
]

View File

@@ -16,6 +16,7 @@ import sys
from collections import Counter
from pathlib import Path
from .project_relations import ProjectRelations
from .relations import Relations
from .replay import Project, replay_project
@@ -112,6 +113,59 @@ def _print_relations(proj: Project) -> None:
)
def _print_project_relations(proj: Project) -> None:
"""Cross-doc resolution stats + a few sample resolutions for sanity."""
pr = ProjectRelations.build(proj)
s = pr.summary()
print()
print("=" * 72)
print("Project Relations (cross-doc)")
print("-" * 72)
for k, v in s.items():
print(f" {k:<40s} {v}")
# Show 3 sample SCH_PAGE component → SYMBOL resolutions
print()
print("Sample sch component → symbol resolutions:")
n = 0
for sch_uuid in pr.docs_by_type.get("SCH_PAGE", []):
for cid in pr.per_doc[sch_uuid].components:
symbols = pr.resolve_symbol_docs(sch_uuid, cid)
pid = pr.component_to_partid.get((sch_uuid, cid))
if symbols:
print(f" sch={sch_uuid[:12]} comp={cid} partId={pid!r} → symbol={symbols[0][:12]} (+{len(symbols)-1})")
n += 1
if n >= 3: break
if n >= 3: break
# Show 3 sample PCB component → FOOTPRINT resolutions, and a PAD_NET cross-doc resolution
print()
print("Sample pcb component → footprint + first PAD_NET cross-doc:")
n = 0
for pcb_uuid in pr.docs_by_type.get("PCB", []):
rel = pr.per_doc[pcb_uuid]
for cid in rel.components:
fp = pr.resolve_footprint_doc(pcb_uuid, cid)
if not fp: continue
attrs = pr.attrs_for_pcb_component(pcb_uuid, cid)
print(f" pcb={pcb_uuid[:12]} comp={cid} → fp={fp[:12]} Designator={attrs.get('Designator')!r} Value={attrs.get('Value')!r}")
# Find a PAD_NET referencing this comp and try cross-doc resolve
for pad_id, records in rel.pad_nets_by_pad.items():
for rec in records:
if rec["comp"] != cid: continue
resolved = pr.resolve_pcb_pad_net(pcb_uuid, cid, rec["pin"], rec["pad"])
if resolved:
pad = resolved["pad"]
print(f" PAD_NET pin={rec['pin']} pad={rec['pad']} net={rec['net_name']} → pad@({pad.get('centerX')},{pad.get('centerY')})")
break
else:
continue
break
n += 1
if n >= 3: break
if n >= 3: break
def main(argv: list[str] | None = None) -> int:
ap = argparse.ArgumentParser(description="Replay an EPRO2 project and summarize.")
ap.add_argument("project_dir", type=Path, help="data/raw/oshwhub/<project_uuid>/")
@@ -126,6 +180,11 @@ def main(argv: list[str] | None = None) -> int:
action="store_true",
help="build cross-object indices and print per-docType summary",
)
ap.add_argument(
"--project-relations",
action="store_true",
help="build cross-document indices (partId → SYMBOL, comp → FOOTPRINT, PAD_NET cross-doc)",
)
args = ap.parse_args(argv)
proj = replay_project(args.project_dir)
@@ -134,6 +193,8 @@ def main(argv: list[str] | None = None) -> int:
_dump_doc(proj, doc_id)
if args.relations:
_print_relations(proj)
if args.project_relations:
_print_project_relations(proj)
return 0

View File

@@ -0,0 +1,185 @@
"""Project-level relations: aggregate per-document Relations to resolve
cross-document references that are unresolvable in isolation.
Empirical mapping rules (probed on ESP-VoCat, 2026-04-28):
SCH_PAGE COMPONENT.partId = PART.id in some SYMBOL doc.
- "pid<hex>" → anonymous/system parts (frame, page border, ...)
- "<name>.<n>"→ named parts with SKU (e.g. "CL05A105KA5NQNC.1", "电阻.1")
- Same PART.id may appear in multiple SYMBOL docs (lib snapshots).
PCB COMPONENT.id → FOOTPRINT doc UUID via a separate ATTR op:
ATTR(parentId=<comp_id>, key="Footprint", value=<footprint_doc_uuid>).
The COMPONENT.attrs sub-dict carries unrelated bookkeeping
(Unique ID, Channel ID, ...), NOT the footprint reference.
PCB PAD_NET id ["PAD_NET", <comp>, <pin>, <pad>]:
The <pad> string is the id of a PAD object **inside the FOOTPRINT doc**
that the PCB COMPONENT instantiates. To resolve the pad geometry, walk
component → footprint → footprint.pads[<pad>].
"""
from __future__ import annotations
from collections import defaultdict
from dataclasses import dataclass, field
from .relations import Relations
from .replay import Document, Project
@dataclass
class ProjectRelations:
"""Project-wide cross-document index. Built once from a replayed Project."""
project: Project
# Per-doc relations cache (lazy / one-shot at build time).
per_doc: dict[str, Relations] = field(default_factory=dict)
# docType partitions (doc_uuid lists, in iteration order)
docs_by_type: dict[str, list[str]] = field(default_factory=lambda: defaultdict(list))
# PART.id → SYMBOL doc uuids that contain this part definition.
# Multiple SYMBOL docs can share the same PART.id (lib snapshots) — we
# keep them all; consumers usually pick the first.
parts_by_id: dict[str, list[str]] = field(default_factory=lambda: defaultdict(list))
# (pcb_doc_uuid, component_id) → FOOTPRINT doc uuid.
# Resolved via ATTR(parent=comp, key="Footprint", value=fp_uuid).
component_to_footprint: dict[tuple[str, str], str] = field(default_factory=dict)
# (sch_doc_uuid, component_id) → COMPONENT.partId (raw partId string,
# which is also a PART.id key into parts_by_id).
component_to_partid: dict[tuple[str, str], str] = field(default_factory=dict)
# Diagnostics
components_with_unresolved_footprint: int = 0
components_with_unresolved_part: int = 0
# ----------------------------------------------------------------------
@classmethod
def build(cls, project: Project) -> "ProjectRelations":
pr = cls(project=project)
# 1. per-doc Relations + docType partition
for doc_uuid, doc in project.documents.items():
pr.per_doc[doc_uuid] = Relations.build(doc)
pr.docs_by_type[doc.doc_type or "?"].append(doc_uuid)
# 2. parts_by_id from SYMBOL docs (and FOOTPRINT, for completeness —
# FOOTPRINTs don't have PART containers per probe, but defensive)
for sym_uuid in pr.docs_by_type.get("SYMBOL", []) + pr.docs_by_type.get("FOOTPRINT", []):
for part_id in pr.per_doc[sym_uuid].parts:
pr.parts_by_id[part_id].append(sym_uuid)
# 3. PCB components → FOOTPRINT doc (via separate ATTR op key="Footprint")
footprint_doc_set = set(pr.docs_by_type.get("FOOTPRINT", []))
for pcb_uuid in pr.docs_by_type.get("PCB", []):
doc = project.documents[pcb_uuid]
for oid, payload in doc.objects.items():
if payload.get("_type") != "ATTR":
continue
if payload.get("key") != "Footprint":
continue
parent = payload.get("parentId")
if not parent:
continue
parent_obj = doc.objects.get(parent)
if not parent_obj or parent_obj.get("_type") != "COMPONENT":
continue
fp_uuid = payload.get("value")
if fp_uuid in footprint_doc_set:
pr.component_to_footprint[(pcb_uuid, parent)] = fp_uuid
# Count unresolved PCB components (those that have NO Footprint ATTR
# mapping or whose value isn't a FOOTPRINT doc).
for cid in pr.per_doc[pcb_uuid].components:
if (pcb_uuid, cid) not in pr.component_to_footprint:
pr.components_with_unresolved_footprint += 1
# 4. SCH_PAGE components → partId (and its SYMBOL doc resolution)
for sch_uuid in pr.docs_by_type.get("SCH_PAGE", []):
doc = project.documents[sch_uuid]
for cid, comp in pr.per_doc[sch_uuid].components.items():
pid = comp.get("partId")
if not pid:
continue
pr.component_to_partid[(sch_uuid, cid)] = str(pid)
if str(pid) not in pr.parts_by_id:
pr.components_with_unresolved_part += 1
return pr
# Accessor helpers -----------------------------------------------------
def resolve_footprint_doc(self, pcb_doc_uuid: str, component_id: str) -> str | None:
"""PCB component → its FOOTPRINT doc uuid (if mapped via ATTR key=Footprint)."""
return self.component_to_footprint.get((pcb_doc_uuid, component_id))
def resolve_symbol_docs(self, sch_doc_uuid: str, component_id: str) -> list[str]:
"""SCH_PAGE component → SYMBOL doc(s) hosting its PART. May return [] for unresolved.
Multiple SYMBOL docs can host the same PART.id; downstream usually
picks the first (they're expected to be identical lib snapshots).
"""
pid = self.component_to_partid.get((sch_doc_uuid, component_id))
if not pid:
return []
return list(self.parts_by_id.get(pid, ()))
def pad_in_footprint(self, footprint_doc_uuid: str, pad_id: str) -> dict | None:
"""Look up a PAD payload inside a FOOTPRINT doc by its local id."""
rel = self.per_doc.get(footprint_doc_uuid)
if not rel:
return None
return rel.pads.get(pad_id)
def resolve_pcb_pad_net(
self,
pcb_doc_uuid: str,
comp_id: str,
pin: str,
pad_id: str,
) -> dict | None:
"""Resolve a PCB PAD_NET composite → footprint pad payload (cross-doc).
Returns ``{"footprint": <fp_uuid>, "pad": <pad payload>}`` or None if
the chain breaks.
"""
fp_uuid = self.resolve_footprint_doc(pcb_doc_uuid, comp_id)
if not fp_uuid:
return None
pad = self.pad_in_footprint(fp_uuid, pad_id)
if not pad:
return None
return {"footprint": fp_uuid, "pad": pad}
def attrs_for_pcb_component(
self,
pcb_doc_uuid: str,
comp_id: str,
) -> dict[str, object]:
"""Collapse all ATTR ops on a PCB component into ``{key: value}``.
Includes the Footprint UUID, Designator, Value, Symbol, etc.
"""
rel = self.per_doc.get(pcb_doc_uuid)
if not rel:
return {}
return rel.attrs_dict(comp_id)
def summary(self) -> dict[str, int]:
return {
"documents": len(self.project.documents),
"doc_types": len(self.docs_by_type),
"distinct_parts": len(self.parts_by_id),
"duplicated_parts": sum(
1 for uuids in self.parts_by_id.values() if len(uuids) > 1
),
"pcb_components_with_footprint": len(self.component_to_footprint),
"pcb_components_unresolved_footprint": self.components_with_unresolved_footprint,
"sch_components_with_partid": len(self.component_to_partid),
"sch_components_unresolved_part": self.components_with_unresolved_part,
}

View File

@@ -0,0 +1,129 @@
"""ProjectRelations regression tests with synthetic micro-projects.
Covers the cross-document resolution rules empirically derived from ESP-VoCat:
- SCH COMPONENT.partId → SYMBOL doc(s) with matching PART.id
- PCB COMPONENT → FOOTPRINT doc via ATTR(parent=comp, key=Footprint, value=fp_uuid)
- PCB PAD_NET[comp,pin,pad] → pad payload inside FOOTPRINT (cross-doc)
"""
from tools.epro2.project_relations import ProjectRelations
from tools.epro2.replay import Document, Project
def _doc(uuid, doc_type, objs):
d = Document(doc_uuid=uuid, doc_type=doc_type)
for k, v in objs:
d.objects[k] = v
return d
def _project(*docs):
p = Project(project_uuid="testproj")
for d in docs:
p.documents[d.doc_uuid] = d
return p
def test_partid_to_symbol_resolution():
sym1 = _doc("sym_uuid_1", "SYMBOL", [
("MyPart.1", {"_type": "PART", "BBOX": [0, 0, 10, 10], "title": "MyPart.1"}),
])
sym2 = _doc("sym_uuid_2", "SYMBOL", [
("MyPart.1", {"_type": "PART", "BBOX": [0, 0, 10, 10], "title": "MyPart.1"}), # dup
])
sym3 = _doc("sym_uuid_3", "SYMBOL", [
("OtherPart.1", {"_type": "PART", "title": "OtherPart.1"}),
])
sch = _doc("sch_uuid_1", "SCH_PAGE", [
("e1", {"_type": "COMPONENT", "partId": "MyPart.1", "x": 0, "y": 0}),
("e2", {"_type": "COMPONENT", "partId": "OtherPart.1", "x": 5, "y": 5}),
])
pr = ProjectRelations.build(_project(sym1, sym2, sym3, sch))
syms = pr.resolve_symbol_docs("sch_uuid_1", "e1")
assert sorted(syms) == ["sym_uuid_1", "sym_uuid_2"]
assert pr.resolve_symbol_docs("sch_uuid_1", "e2") == ["sym_uuid_3"]
s = pr.summary()
assert s["distinct_parts"] == 2
assert s["duplicated_parts"] == 1 # MyPart.1 lives in 2 syms
assert s["sch_components_with_partid"] == 2
assert s["sch_components_unresolved_part"] == 0
def test_pcb_component_to_footprint():
fp = _doc("fp_uuid_X", "FOOTPRINT", [
("e7", {"_type": "PAD", "centerX": 1.0, "centerY": 2.0, "layerId": 1}),
])
pcb = _doc("pcb_uuid_1", "PCB", [
("e0", {"_type": "COMPONENT", "x": 0, "y": 0, "attrs": {}}),
("attr_fp", {"_type": "ATTR", "parentId": "e0", "key": "Footprint", "value": "fp_uuid_X"}),
("attr_des", {"_type": "ATTR", "parentId": "e0", "key": "Designator", "value": "R1"}),
('["PAD_NET","e0","1","e7"]', {"_type": "PAD_NET", "padNet": "GND"}),
])
pr = ProjectRelations.build(_project(fp, pcb))
assert pr.resolve_footprint_doc("pcb_uuid_1", "e0") == "fp_uuid_X"
assert pr.attrs_for_pcb_component("pcb_uuid_1", "e0") == {
"Footprint": "fp_uuid_X",
"Designator": "R1",
}
def test_pad_net_cross_doc_resolution():
fp = _doc("fp_uuid_X", "FOOTPRINT", [
("e7", {"_type": "PAD", "centerX": 1.5, "centerY": -2.5, "num": "1"}),
("e8", {"_type": "PAD", "centerX": 3.0, "centerY": 0.0, "num": "2"}),
])
pcb = _doc("pcb_uuid_1", "PCB", [
("e0", {"_type": "COMPONENT", "x": 0, "y": 0}),
("attr_fp", {"_type": "ATTR", "parentId": "e0", "key": "Footprint", "value": "fp_uuid_X"}),
('["PAD_NET","e0","1","e7"]', {"_type": "PAD_NET", "padNet": "GND"}),
('["PAD_NET","e0","2","e8"]', {"_type": "PAD_NET", "padNet": "VCC"}),
])
pr = ProjectRelations.build(_project(fp, pcb))
pad1 = pr.resolve_pcb_pad_net("pcb_uuid_1", "e0", "1", "e7")
assert pad1 is not None
assert pad1["footprint"] == "fp_uuid_X"
assert pad1["pad"]["centerX"] == 1.5
pad2 = pr.resolve_pcb_pad_net("pcb_uuid_1", "e0", "2", "e8")
assert pad2["pad"]["centerY"] == 0.0
# Unknown pad on known footprint → None
assert pr.resolve_pcb_pad_net("pcb_uuid_1", "e0", "99", "e_ghost") is None
def test_unresolved_counts_get_recorded():
# PCB component without Footprint ATTR → should count unresolved
pcb = _doc("pcb_uuid_1", "PCB", [
("e0", {"_type": "COMPONENT", "x": 0, "y": 0}), # no ATTR Footprint
])
sch = _doc("sch_uuid_1", "SCH_PAGE", [
("e1", {"_type": "COMPONENT", "partId": "GhostPart.1"}),
])
pr = ProjectRelations.build(_project(pcb, sch))
s = pr.summary()
assert s["pcb_components_unresolved_footprint"] == 1
assert s["sch_components_unresolved_part"] == 1
def test_attrs_for_pcb_component_collapses_multiple_attrs():
pcb = _doc("pcb_uuid_1", "PCB", [
("e0", {"_type": "COMPONENT", "x": 0, "y": 0}),
("a1", {"_type": "ATTR", "parentId": "e0", "key": "Designator", "value": "R1"}),
("a2", {"_type": "ATTR", "parentId": "e0", "key": "Value", "value": "10kΩ"}),
("a3", {"_type": "ATTR", "parentId": "e0", "key": "Designator", "value": "R2"}), # later wins
])
pr = ProjectRelations.build(_project(pcb))
flat = pr.attrs_for_pcb_component("pcb_uuid_1", "e0")
assert flat == {"Designator": "R2", "Value": "10kΩ"}
def test_summary_keys_present():
pr = ProjectRelations.build(_project())
s = pr.summary()
for k in ("documents", "doc_types", "distinct_parts", "duplicated_parts",
"pcb_components_with_footprint", "pcb_components_unresolved_footprint",
"sch_components_with_partid", "sch_components_unresolved_part"):
assert k in s