FacereDataset/tools/epro2/project_relations.py

"""Project-level relations: aggregate per-document Relations to resolve
cross-document references that are unresolvable in isolation.

Empirical mapping rules (probed on ESP-VoCat, 2026-04-28):

  SCH_PAGE COMPONENT.partId  =  PART.id  in some SYMBOL doc.
    - "pid<hex>"  → anonymous/system parts (frame, page border, ...)
    - "<name>.<n>"→ named parts with SKU (e.g. "CL05A105KA5NQNC.1", "电阻.1")
    - Same PART.id may appear in multiple SYMBOL docs (lib snapshots).

  PCB COMPONENT.id   →  FOOTPRINT doc UUID via a separate ATTR op:
    ATTR(parentId=<comp_id>, key="Footprint", value=<footprint_doc_uuid>).
    The COMPONENT.attrs sub-dict carries unrelated bookkeeping
    (Unique ID, Channel ID, ...), NOT the footprint reference.

  PCB PAD_NET id ["PAD_NET", <comp>, <pin>, <pad>]:
    The <pad> string is the id of a PAD object **inside the FOOTPRINT doc**
    that the PCB COMPONENT instantiates. To resolve the pad geometry, walk
    component → footprint → footprint.pads[<pad>].
"""

from __future__ import annotations

from collections import defaultdict
from dataclasses import dataclass, field

from .relations import Relations
from .replay import Document, Project


@dataclass
class ProjectRelations:
    """Project-wide cross-document index. Built once from a replayed Project."""

    project: Project

    # Per-doc relations cache (lazy / one-shot at build time).
    per_doc: dict[str, Relations] = field(default_factory=dict)

    # docType partitions (doc_uuid lists, in iteration order)
    docs_by_type: dict[str, list[str]] = field(default_factory=lambda: defaultdict(list))

    # PART.id → SYMBOL doc uuids that contain this part definition.
    # Multiple SYMBOL docs can share the same PART.id (lib snapshots) — we
    # keep them all; consumers usually pick the first.
    parts_by_id: dict[str, list[str]] = field(default_factory=lambda: defaultdict(list))

    # (pcb_doc_uuid, component_id) → FOOTPRINT doc uuid.
    # Resolved via ATTR(parent=comp, key="Footprint", value=fp_uuid).
    component_to_footprint: dict[tuple[str, str], str] = field(default_factory=dict)

    # (sch_doc_uuid, component_id) → COMPONENT.partId  (raw partId string,
    # which is also a PART.id key into parts_by_id).
    component_to_partid: dict[tuple[str, str], str] = field(default_factory=dict)

    # Diagnostics
    components_with_unresolved_footprint: int = 0
    components_with_unresolved_part: int = 0

    # ----------------------------------------------------------------------

    @classmethod
    def build(cls, project: Project) -> "ProjectRelations":
        pr = cls(project=project)

        # 1. per-doc Relations + docType partition
        for doc_uuid, doc in project.documents.items():
            pr.per_doc[doc_uuid] = Relations.build(doc)
            pr.docs_by_type[doc.doc_type or "?"].append(doc_uuid)

        # 2. parts_by_id from SYMBOL docs (and FOOTPRINT, for completeness —
        # FOOTPRINTs don't have PART containers per probe, but defensive)
        for sym_uuid in pr.docs_by_type.get("SYMBOL", []) + pr.docs_by_type.get("FOOTPRINT", []):
            for part_id in pr.per_doc[sym_uuid].parts:
                pr.parts_by_id[part_id].append(sym_uuid)

        # 3. PCB components → FOOTPRINT doc (via separate ATTR op key="Footprint")
        footprint_doc_set = set(pr.docs_by_type.get("FOOTPRINT", []))
        for pcb_uuid in pr.docs_by_type.get("PCB", []):
            doc = project.documents[pcb_uuid]
            for oid, payload in doc.objects.items():
                if payload.get("_type") != "ATTR":
                    continue
                if payload.get("key") != "Footprint":
                    continue
                parent = payload.get("parentId")
                if not parent:
                    continue
                parent_obj = doc.objects.get(parent)
                if not parent_obj or parent_obj.get("_type") != "COMPONENT":
                    continue
                fp_uuid = payload.get("value")
                if fp_uuid in footprint_doc_set:
                    pr.component_to_footprint[(pcb_uuid, parent)] = fp_uuid

            # Count unresolved PCB components (those that have NO Footprint ATTR
            # mapping or whose value isn't a FOOTPRINT doc).
            for cid in pr.per_doc[pcb_uuid].components:
                if (pcb_uuid, cid) not in pr.component_to_footprint:
                    pr.components_with_unresolved_footprint += 1

        # 4. SCH_PAGE components → partId (and its SYMBOL doc resolution)
        for sch_uuid in pr.docs_by_type.get("SCH_PAGE", []):
            doc = project.documents[sch_uuid]
            for cid, comp in pr.per_doc[sch_uuid].components.items():
                pid = comp.get("partId")
                if not pid:
                    continue
                pr.component_to_partid[(sch_uuid, cid)] = str(pid)
                if str(pid) not in pr.parts_by_id:
                    pr.components_with_unresolved_part += 1

        return pr

    # Accessor helpers -----------------------------------------------------

    def resolve_footprint_doc(self, pcb_doc_uuid: str, component_id: str) -> str | None:
        """PCB component → its FOOTPRINT doc uuid (if mapped via ATTR key=Footprint)."""
        return self.component_to_footprint.get((pcb_doc_uuid, component_id))

    def resolve_symbol_docs(self, sch_doc_uuid: str, component_id: str) -> list[str]:
        """SCH_PAGE component → SYMBOL doc(s) hosting its PART. May return [] for unresolved.

        Multiple SYMBOL docs can host the same PART.id; downstream usually
        picks the first (they're expected to be identical lib snapshots).
        """
        pid = self.component_to_partid.get((sch_doc_uuid, component_id))
        if not pid:
            return []
        return list(self.parts_by_id.get(pid, ()))

    def pad_in_footprint(self, footprint_doc_uuid: str, pad_id: str) -> dict | None:
        """Look up a PAD payload inside a FOOTPRINT doc by its local id."""
        rel = self.per_doc.get(footprint_doc_uuid)
        if not rel:
            return None
        return rel.pads.get(pad_id)

    def resolve_pcb_pad_net(
        self,
        pcb_doc_uuid: str,
        comp_id: str,
        pin: str,
        pad_id: str,
    ) -> dict | None:
        """Resolve a PCB PAD_NET composite → footprint pad payload (cross-doc).

        Returns ``{"footprint": <fp_uuid>, "pad": <pad payload>}`` or None if
        the chain breaks.
        """
        fp_uuid = self.resolve_footprint_doc(pcb_doc_uuid, comp_id)
        if not fp_uuid:
            return None
        pad = self.pad_in_footprint(fp_uuid, pad_id)
        if not pad:
            return None
        return {"footprint": fp_uuid, "pad": pad}

    def attrs_for_pcb_component(
        self,
        pcb_doc_uuid: str,
        comp_id: str,
    ) -> dict[str, object]:
        """Collapse all ATTR ops on a PCB component into ``{key: value}``.

        Includes the Footprint UUID, Designator, Value, Symbol, etc.
        """
        rel = self.per_doc.get(pcb_doc_uuid)
        if not rel:
            return {}
        return rel.attrs_dict(comp_id)

    def summary(self) -> dict[str, int]:
        return {
            "documents": len(self.project.documents),
            "doc_types": len(self.docs_by_type),
            "distinct_parts": len(self.parts_by_id),
            "duplicated_parts": sum(
                1 for uuids in self.parts_by_id.values() if len(uuids) > 1
            ),
            "pcb_components_with_footprint": len(self.component_to_footprint),
            "pcb_components_unresolved_footprint": self.components_with_unresolved_footprint,
            "sch_components_with_partid": len(self.component_to_partid),
            "sch_components_unresolved_part": self.components_with_unresolved_part,
        }