tools/epro2: add std/ writer — EPRO2 → EasyEDA Std-format JSON for downstream

The downstream colleague consumes oshwhub Std (lceda) dict-format JSON,
not KiCad. The EPRO2 decryption part (per-doc plaintext .epro2 streams
in data/raw/<uuid>/source/) is what we already provide; the missing
piece is converting EPRO2 op-streams into the same `dataStr.shape`
tilde-delimited format their parser already speaks.

New tools/epro2/std/ module, peer of tools/epro2/kicad/, kept
deliberately separate so the KiCad path stays untouched:

  - pcb_writer.write_pcb_std() — high-fidelity, validated against a Std
    PCB sample at data/raw/oshwhub/3e2f893d.../25931ddab8.json. Maps
    LINE→TRACK, VIA→VIA, POUR→COPPERAREA (with SVG `M..L..Z` path),
    POLY→CIRCLE/SOLIDREGION, COMPONENT+FOOTPRINT→LIB nested with
    #@$-separated PADs (placement rotation + translate applied so pad
    coords land at PCB-absolute positions). Layer-id mapping (EPRO2 5↔7
    flipped vs Std solder/paste, 11→10 outline, 12→11 multi, SIGNAL
    inner 15+ → Std 21+) noted inline.

  - sch_writer.write_sch_std() — best-effort. Our corpus has zero Std
    schematic samples (docType=1) so verb field orders follow the
    EasyEDA Std public spec, not direct observation. Emits W (wire),
    N (net flag, including the 5-Voltage Global Net Name power-port
    pattern), T (text), LIB (placement with #@$-nested PIN/T). If
    downstream's parser bails the fix is almost certainly a positional
    field tweak, not a re-architecture.

  - __main__.py — flat output `<doc_uuid>.json` per doc directly under
    --out (mirrors Std's own data layout); --all-pcb / --all-sch / --all.

Smoke test on ESP-VoCat: 6 PCB + 9 SCH = 15 JSON files, libs_unresolved=0
across the board. Compact JSON (separators=(",",":")) matches Std's
single-line format. Numbers use _num() — integers without trailing .0,
floats trimmed.

71 → 82 unit tests pass.

Open questions for downstream: (1) confirm SCH verb field orders, (2)
do they want any of the upstream metadata fields we drop (master,
owner, created_at, etc — those live on the crawler side, not the
schematic itself)?

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-29 01:16:39 +08:00
parent ed713fa557
commit fe6971f3f9
6 changed files with 1155 additions and 0 deletions

View File

@@ -0,0 +1,202 @@
"""Std writer regression: synthetic EPRO2 docs → Std-format JSON dicts."""
import json
from collections import Counter
from tools.epro2.project_relations import ProjectRelations
from tools.epro2.replay import Document, Project
from tools.epro2.std.pcb_writer import write_pcb_std
from tools.epro2.std.sch_writer import write_sch_std
def _doc(typ, uuid="d"):
d = Document(doc_uuid=uuid, doc_type=typ)
d.head = {"docType": typ}
return d
def _empty_pr(*docs):
p = Project(project_uuid="p")
for doc in docs:
p.documents[doc.doc_uuid] = doc
return ProjectRelations.build(p)
def _verbs(payload):
return Counter(s.split("~")[0] for s in payload["result"]["dataStr"]["shape"])
# -- PCB ---------------------------------------------------------------
def test_pcb_envelope_matches_std_shape():
"""Top-level envelope must be `{success, code, result}` with
`result.docType == 3` and `result.dataStr.shape` as a list — that's
the contract Std parsers key off. Anything else and downstream's
parser bails before the shape array is even read."""
d = _doc("PCB", "p1")
d.objects["META"] = {"_type": "META", "title": "Test"}
payload = write_pcb_std(d, project_relations=_empty_pr(d))
assert payload["success"] is True
assert payload["code"] == 0
r = payload["result"]
assert r["docType"] == 3
assert r["uuid"] == "p1"
assert isinstance(r["dataStr"]["shape"], list)
# Inner SIGNAL layers extension keeps the layer block consistent
assert any("TopLayer" in s for s in r["dataStr"]["layers"])
def test_pcb_line_emits_track_with_layer_and_net():
"""LINE on a copper layer becomes a Std TRACK string. Field order is
`TRACK~width~layer~net~points~uuid~locked` — same as Std produces;
a wrong order means tracks land on the wrong layer in downstream
renders even if the parser doesn't crash."""
d = _doc("PCB", "p1")
d.objects["ln1"] = {
"_type": "LINE", "layerId": 1, "netName": "GND", "width": 6,
"startX": 100, "startY": 200, "endX": 500, "endY": 200,
}
payload = write_pcb_std(d, project_relations=_empty_pr(d))
tracks = [s for s in payload["result"]["dataStr"]["shape"] if s.startswith("TRACK~")]
assert len(tracks) == 1
fields = tracks[0].split("~")
assert fields[0] == "TRACK"
assert fields[1] == "6" # width
assert fields[2] == "1" # std layer 1 = TopLayer
assert fields[3] == "GND" # net name
assert "100 200 500 200" in fields[4]
def test_pcb_via_emits_correct_field_order():
d = _doc("PCB", "p1")
d.objects["v1"] = {
"_type": "VIA", "centerX": 100, "centerY": 200,
"viaDiameter": 24, "holeDiameter": 12, "netName": "VCC",
}
payload = write_pcb_std(d, project_relations=_empty_pr(d))
via = next(s for s in payload["result"]["dataStr"]["shape"] if s.startswith("VIA~"))
f = via.split("~")
# VIA~x~y~outerD~net~innerD~uuid~locked
assert f[1] == "100"
assert f[2] == "200"
assert f[3] == "24"
assert f[4] == "VCC"
assert f[5] == "12"
def test_pcb_pour_rectangle_becomes_copperarea_with_svg_path():
"""POUR on a copper layer must emit a COPPERAREA with an SVG `M..L..Z`
path — Std uses SVG path syntax for filled regions, and downstream
fills are computed from this path. A `R x y w h` rectangle expands
to an explicit four-corner Z-closed polygon."""
d = _doc("PCB", "p1")
d.objects["p1"] = {
"_type": "POUR", "layerId": 1, "netName": "GND",
"path": [["R", 0, 0, 1000, 1000]],
}
payload = write_pcb_std(d, project_relations=_empty_pr(d))
ca = next(s for s in payload["result"]["dataStr"]["shape"] if s.startswith("COPPERAREA~"))
assert "M 0 0" in ca
assert " Z" in ca
assert "GND" in ca
def test_pcb_lib_nests_pads_via_separator():
"""A footprint placement must emit a LIB outer string with PAD inner
shapes joined by `#@$` — that's how Std writes one symbol-with-pads
per shape entry. If we emit pads as separate top-level shapes,
downstream's symbol-grouping breaks (pads end up unowned)."""
fp = _doc("FOOTPRINT", "fp1")
fp.objects["META"] = {"_type": "META", "title": "0402"}
fp.objects["pad1"] = {
"_type": "PAD", "num": "1", "centerX": -20, "centerY": 0,
"padAngle": 0, "layerId": 1, "hole": None,
"defaultPad": {"padType": "RECT", "width": 30, "height": 20},
}
pcb = _doc("PCB", "pcb1")
pcb.objects["C1"] = {"_type": "COMPONENT", "x": 100, "y": 100, "angle": 0}
pcb.objects["a1"] = {
"_type": "ATTR", "parentId": "C1", "key": "Footprint", "value": "fp1",
}
payload = write_pcb_std(pcb, project_relations=_empty_pr(fp, pcb))
libs = [s for s in payload["result"]["dataStr"]["shape"] if s.startswith("LIB~")]
assert len(libs) == 1
# Nested children separated by '#@$'
parts = libs[0].split("#@$")
assert parts[0].startswith("LIB~")
assert any(p.startswith("PAD~") for p in parts[1:])
# Std treats each LIB-rooted block as the unit shape entry, not the
# nested PADs — verify no top-level PAD leaked
assert not any(s.startswith("PAD~") for s in payload["result"]["dataStr"]["shape"])
# -- SCH ---------------------------------------------------------------
def test_sch_envelope_carries_doctype_1():
"""Std schematic docs are docType=1. Downstream filters on this to
pick which parser to invoke (PCB parser vs SCH parser); a wrong
docType silently routes the file to the wrong parser."""
d = _doc("SCH_PAGE", "s1")
d.objects["META"] = {"_type": "META", "title": "Test"}
payload = write_sch_std(d, project_relations=_empty_pr(d))
assert payload["result"]["docType"] == 1
def test_sch_named_wire_emits_wire_plus_netflag():
"""A LINE whose lineGroup carries a NET attr must produce both a W
(the wire segment) and an N (a net flag at one endpoint, named
after the net). Same-named flags on distinct wire segments is how
Std unifies named nets — without the N, the wire is anonymous."""
d = _doc("SCH_PAGE", "s1")
d.objects["w1"] = {"_type": "WIRE"}
d.objects["a1"] = {"_type": "ATTR", "parentId": "w1", "key": "NET", "value": "GND"}
d.objects["ln1"] = {
"_type": "LINE", "lineGroup": "w1",
"startX": 0, "startY": 0, "endX": 100, "endY": 0,
}
payload = write_sch_std(d, project_relations=_empty_pr(d))
v = _verbs(payload)
assert v["W"] == 1
assert v["N"] == 1
def test_sch_power_port_component_emits_extra_netflag():
"""The 5-Voltage / generic placeholder COMPONENT (Global Net Name
ATTR carries the rail name) must emit an N flag at the placement
so the symbol's pin connects to the global rail. Same fix as the
KiCad path's global_label handling."""
sym = _doc("SYMBOL", "sym1")
sym.objects["pid8a0e77bacb214e"] = {"_type": "PART", "title": ""}
sym.objects["pin1"] = {
"_type": "PIN", "partId": "pid8a0e77bacb214e",
"x": 0, "y": 0, "length": 5, "rotation": 0,
}
sch = _doc("SCH_PAGE", "s1")
sch.objects["e1"] = {
"_type": "COMPONENT", "partId": "pid8a0e77bacb214e",
"x": 100, "y": 50, "rotation": 0,
}
sch.objects["a1"] = {
"_type": "ATTR", "parentId": "e1",
"key": "Global Net Name", "value": "VBUS",
}
payload = write_sch_std(sch, project_relations=_empty_pr(sym, sch))
flags = [s for s in payload["result"]["dataStr"]["shape"] if s.startswith("N~")]
assert any("VBUS" in s for s in flags), \
"expected an N flag named VBUS for the power-port placement"
def test_writers_round_trip_through_json_dump():
"""Whatever we build has to survive json.dumps without errors —
ints/floats/strings/lists only, no datetime / Decimal / bytes
sneaking in. Catches type leaks early."""
d = _doc("PCB", "p1")
d.objects["META"] = {"_type": "META", "title": "Test"}
payload = write_pcb_std(d, project_relations=_empty_pr(d))
json.dumps(payload)
d2 = _doc("SCH_PAGE", "s1")
d2.objects["META"] = {"_type": "META", "title": "Test"}
payload2 = write_sch_std(d2, project_relations=_empty_pr(d2))
json.dumps(payload2)