Pro 2.x stores some doc payloads (notably Taishan's PCB) externally at
modules.lceda.cn keyed by dataStrId, AES-256-GCM encrypted with the
iv/key fields stored alongside. Same crypto pattern as Pro 3.x EPRO2:
last 16 bytes are the GCM auth tag, rest is gzip(plaintext-op-stream).
The CDN doesn't require auth.
- pro2_writer.fetch_encrypted_plaintext(): fetch + decrypt + gunzip,
cache result at source/<uuid>.decrypted.txt so re-runs skip the
network round-trip. Heavy imports (httpx, pycryptodome) are
deferred to call-time so the pure-replay path doesn't pay for them.
- pro2_writer.split_plaintext_by_doctype(): walk the multi-doc
plaintext (Pro 2.x bundles N FOOTPRINTs + 1 PCB into one blob), yield
(label, sub_text) per inner doc. Label = HEAD.uuid if present, else
fallback `<kind>_<idx>`.
- __main__._convert_pro2_encrypted(): for each sub-doc, write a
synthetic inline-Pro-2.x JSON next to the original and re-route
through write_pro2_doc — re-uses BBox / layers / objects-extraction
instead of duplicating the logic. Output filename
`<parent_uuid>__<sub_label>.json` makes the parent association
visible.
Smoke (Taishan): 28 inline SCHs → 55 total. Decrypts:
- one PCB blob (3.4 MB plaintext, 20267-object PCB + 25 FOOTPRINT
sub-docs of 130-580 objects each)
- one SCH-typed encrypted doc (1 sub-SCH of 891 objects)
86 unit tests still pass; new fetch/decrypt path is covered manually
via the smoke test rather than mocking httpx + AES.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
292 lines
11 KiB
Python
292 lines
11 KiB
Python
"""CLI: dump EPRO2 docs to Std-shaped JSON files for downstream consumers.
|
|
|
|
The output is "Option 2" per the downstream colleague's spec: Std envelope
|
|
with a raw EPRO2 ``objects: {id: payload}`` dict in place of the usual
|
|
``shape[]`` tilde-string array. Their ~100-LoC adapter walks ``objects``
|
|
and dispatches by ``_type`` to build real Std shapes — see
|
|
``docs/sources/epro2_to_std_mapping.md`` for the OPTYPE → Std verb table.
|
|
|
|
Usage:
|
|
uv run python -m tools.epro2.std <project_dir> --all-pcb --out <dir>
|
|
uv run python -m tools.epro2.std <project_dir> --all-sch --out <dir>
|
|
uv run python -m tools.epro2.std <project_dir> --all --out <dir>
|
|
|
|
Output: flat ``<doc_uuid>.json`` per doc — mirrors Std's own data layout
|
|
so a downstream pipeline that already iterates ``source/*.json`` works
|
|
unchanged.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from ..replay import Project, replay_project
|
|
from .pcb_writer import write_pcb_std
|
|
from .pro2_writer import (
|
|
fetch_encrypted_plaintext,
|
|
split_plaintext_by_doctype,
|
|
write_pro2_doc,
|
|
)
|
|
from .sch_writer import write_sch_std
|
|
|
|
|
|
def _detect_pro2(project_dir: Path) -> tuple[bool, str]:
|
|
"""Return ``(is_pro2, editor_version)`` from manifest.json.
|
|
|
|
Pro 2.x and Pro 3.x EPRO2 share the manifest filename + per-doc-uuid
|
|
layout, but Pro 2.x sets ``editor_version`` to a 2.x string like
|
|
``"2.1.40"`` and stores documents as ``<uuid>.json`` (vs Pro 3.x's
|
|
``<uuid>.epro2``). The cheap test is just to read the editor_version
|
|
string — falls through to the existing EPRO2 path on any mismatch.
|
|
"""
|
|
mani_path = project_dir / "source" / "manifest.json"
|
|
if not mani_path.exists():
|
|
return (False, "")
|
|
try:
|
|
m = json.loads(mani_path.read_text(encoding="utf-8"))
|
|
except json.JSONDecodeError:
|
|
return (False, "")
|
|
ev = str(m.get("editor_version") or "")
|
|
return (ev.startswith("2."), ev)
|
|
|
|
|
|
def _dump(payload: dict, out_path: Path, project_uuid: str) -> None:
|
|
payload["result"]["puuid"] = project_uuid or ""
|
|
out_path.write_text(
|
|
json.dumps(payload, ensure_ascii=False, separators=(",", ":")),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
|
|
def _convert_pcbs(proj: Project, out_dir: Path) -> int:
|
|
uuids = [u for u, d in proj.documents.items() if d.doc_type == "PCB"]
|
|
if not uuids:
|
|
return 0
|
|
print(f"PCB: dumping {len(uuids)} doc(s) → {out_dir}")
|
|
for u in uuids:
|
|
try:
|
|
payload = write_pcb_std(proj.documents[u])
|
|
except Exception as e: # noqa: BLE001
|
|
print(f" FAIL {u[:12]}: {e}", file=sys.stderr)
|
|
continue
|
|
_dump(payload, out_dir / f"{u}.json", proj.project_uuid or "")
|
|
s = getattr(write_pcb_std, "last_stats", None)
|
|
if s:
|
|
print(
|
|
f" {u[:12]}.json: objects={s.objects} layers={s.layers_emitted} "
|
|
f"BBox=({s.bbox_x:g},{s.bbox_y:g},{s.bbox_w:g},{s.bbox_h:g})"
|
|
)
|
|
return len(uuids)
|
|
|
|
|
|
def _convert_schs(proj: Project, out_dir: Path) -> int:
|
|
uuids = [u for u, d in proj.documents.items() if d.doc_type == "SCH_PAGE"]
|
|
if not uuids:
|
|
return 0
|
|
print(f"SCH: dumping {len(uuids)} doc(s) → {out_dir}")
|
|
for u in uuids:
|
|
try:
|
|
payload = write_sch_std(proj.documents[u])
|
|
except Exception as e: # noqa: BLE001
|
|
print(f" FAIL {u[:12]}: {e}", file=sys.stderr)
|
|
continue
|
|
_dump(payload, out_dir / f"{u}.json", proj.project_uuid or "")
|
|
s = getattr(write_sch_std, "last_stats", None)
|
|
if s:
|
|
print(
|
|
f" {u[:12]}.json: objects={s.objects} "
|
|
f"BBox=({s.bbox_x:g},{s.bbox_y:g},{s.bbox_w:g},{s.bbox_h:g})"
|
|
)
|
|
return len(uuids)
|
|
|
|
|
|
def _convert_pro2_encrypted(
|
|
json_path: Path, out_dir: Path,
|
|
project_uuid: str, editor_version: str, parent_uuid: str,
|
|
) -> int:
|
|
"""Try fetch + AES-256-GCM decrypt + gunzip the encrypted-external
|
|
blob, then split by DOCTYPE boundary into per-sub-doc JSONs.
|
|
|
|
Pro 2.x bundles N FOOTPRINTs + 1 PCB (or N SYMBOLs + 1 SCH) into one
|
|
blob; we emit each as a separate file named
|
|
``<parent_uuid>__<sub_label>.json`` so the parent association is
|
|
visible in the filename without colliding with other sources.
|
|
"""
|
|
plain = fetch_encrypted_plaintext(json_path)
|
|
if plain is None:
|
|
return 0
|
|
|
|
n = 0
|
|
for sub_label, sub_text in split_plaintext_by_doctype(plain):
|
|
# Re-route the inline path: build a synthetic Pro 2.x doc shape
|
|
# in a temp file so write_pro2_doc + its caching behave normally.
|
|
synth = {
|
|
"uuid": f"{parent_uuid}__{sub_label}",
|
|
"title": sub_label,
|
|
"docType": _doctype_from_first_line(sub_text),
|
|
"dataStr": sub_text,
|
|
}
|
|
# Write the synthetic JSON next to the original, with a name
|
|
# that won't collide with manifest entries.
|
|
synth_path = json_path.parent / f".synth__{parent_uuid}__{sub_label}.json"
|
|
synth_path.write_text(json.dumps(synth, ensure_ascii=False), encoding="utf-8")
|
|
try:
|
|
payload = write_pro2_doc(
|
|
synth_path, project_uuid=project_uuid,
|
|
editor_version_hint=editor_version,
|
|
)
|
|
finally:
|
|
synth_path.unlink(missing_ok=True)
|
|
if payload is None:
|
|
continue
|
|
out_name = f"{parent_uuid}__{sub_label}.json"
|
|
(out_dir / out_name).write_text(
|
|
json.dumps(payload, ensure_ascii=False, separators=(",", ":")),
|
|
encoding="utf-8",
|
|
)
|
|
s = getattr(write_pro2_doc, "last_stats", None)
|
|
if s:
|
|
print(
|
|
f" {parent_uuid[:12]}__{sub_label}: docType={synth['docType']} "
|
|
f"objects={s.objects}"
|
|
)
|
|
n += 1
|
|
return n
|
|
|
|
|
|
def _doctype_from_first_line(text: str) -> int:
|
|
"""Read the leading ``["DOCTYPE","KIND","x.y"]`` line and return the
|
|
Std docType code (1=SCH, 2=SYMBOL, 3=PCB, 4=FOOTPRINT, 5=DEVICE)."""
|
|
for line in text.splitlines():
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
arr = json.loads(line)
|
|
except json.JSONDecodeError:
|
|
continue
|
|
if not (isinstance(arr, list) and arr and arr[0] == "DOCTYPE"):
|
|
continue
|
|
kind = arr[1] if len(arr) > 1 else ""
|
|
return {
|
|
"SCH": 1, "SYMBOL": 2, "PCB": 3, "FOOTPRINT": 4, "DEVICE": 5,
|
|
}.get(kind, 0)
|
|
return 0
|
|
|
|
|
|
def _convert_pro2(project_dir: Path, out_dir: Path,
|
|
editor_version: str, want_pcb: bool, want_sch: bool) -> int:
|
|
"""Pro 2.x path — read each <uuid>.json directly (no EPRO2 replay)
|
|
and run pro2_writer. The manifest tells us per-doc docType so we
|
|
can route to PCB/SCH filters without parsing dataStr first."""
|
|
mani_path = project_dir / "source" / "manifest.json"
|
|
m = json.loads(mani_path.read_text(encoding="utf-8"))
|
|
project_uuid = m.get("project_uuid") or project_dir.name
|
|
|
|
skipped_encrypted = 0
|
|
n = 0
|
|
print(f"Pro 2.x project (editor {editor_version}) → {out_dir}")
|
|
for entry in m["documents"]:
|
|
dt = entry.get("docType")
|
|
if dt == 3 and not want_pcb:
|
|
continue
|
|
if dt == 1 and not want_sch:
|
|
continue
|
|
if dt not in (1, 3):
|
|
continue
|
|
path = project_dir / entry["path"]
|
|
try:
|
|
payload = write_pro2_doc(
|
|
path, project_uuid=project_uuid, editor_version_hint=editor_version,
|
|
)
|
|
except Exception as e: # noqa: BLE001
|
|
print(f" FAIL {entry['doc_uuid'][:12]}: {e}", file=sys.stderr)
|
|
continue
|
|
if payload is None:
|
|
stats = getattr(write_pro2_doc, "last_stats", None)
|
|
if stats and stats.skipped_encrypted:
|
|
# Try fetching + decrypting from modules.lceda.cn. The blob
|
|
# bundles N FOOTPRINTs/SYMBOLs + 1 parent PCB/SCH; we emit
|
|
# one JSON per sub-doc.
|
|
m_n = _convert_pro2_encrypted(
|
|
path, out_dir, project_uuid, editor_version,
|
|
parent_uuid=entry["doc_uuid"],
|
|
)
|
|
if m_n > 0:
|
|
print(
|
|
f" decrypted {entry['doc_uuid'][:12]}: "
|
|
f"{m_n} sub-doc(s) emitted"
|
|
)
|
|
n += m_n
|
|
else:
|
|
print(
|
|
f" SKIP {entry['doc_uuid'][:12]}: encrypted-external "
|
|
f"and fetch/decrypt failed."
|
|
)
|
|
skipped_encrypted += 1
|
|
continue
|
|
out_path = out_dir / f"{entry['doc_uuid']}.json"
|
|
out_path.write_text(
|
|
json.dumps(payload, ensure_ascii=False, separators=(",", ":")),
|
|
encoding="utf-8",
|
|
)
|
|
s = getattr(write_pro2_doc, "last_stats", None)
|
|
if s:
|
|
print(
|
|
f" {entry['doc_uuid'][:12]}.json: docType={dt} "
|
|
f"objects={s.objects} BBox=({s.bbox_x:g},{s.bbox_y:g},"
|
|
f"{s.bbox_w:g},{s.bbox_h:g})"
|
|
)
|
|
n += 1
|
|
if skipped_encrypted:
|
|
print(
|
|
f" ({skipped_encrypted} encrypted-external doc(s) skipped — "
|
|
f"the source/<uuid>.json files still hold the dataStrId/iv/key "
|
|
f"so a future fetch+decrypt pass can recover them.)"
|
|
)
|
|
return n
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
ap = argparse.ArgumentParser(description="EPRO2 / Pro 2.x → EasyEDA Std-shaped JSON dump")
|
|
ap.add_argument("project_dir", type=Path)
|
|
g = ap.add_mutually_exclusive_group(required=True)
|
|
g.add_argument("--all-pcb", action="store_true", help="dump every PCB doc")
|
|
g.add_argument("--all-sch", action="store_true", help="dump every SCH_PAGE doc")
|
|
g.add_argument("--all", action="store_true", help="dump both PCB and SCH_PAGE docs")
|
|
ap.add_argument("--out", type=Path, default=Path("data/processed/std_json"))
|
|
args = ap.parse_args(argv)
|
|
|
|
args.out.mkdir(parents=True, exist_ok=True)
|
|
|
|
is_pro2, editor_version = _detect_pro2(args.project_dir)
|
|
if is_pro2:
|
|
n = _convert_pro2(
|
|
args.project_dir, args.out, editor_version,
|
|
want_pcb=args.all_pcb or args.all,
|
|
want_sch=args.all_sch or args.all,
|
|
)
|
|
if n == 0:
|
|
print("nothing to dump (no Pro 2.x SCH/PCB docs survived)", file=sys.stderr)
|
|
return 1
|
|
return 0
|
|
|
|
# Pro 3.x EPRO2 path — full replay then per-doc dump.
|
|
proj = replay_project(args.project_dir)
|
|
n = 0
|
|
if args.all_pcb or args.all:
|
|
n += _convert_pcbs(proj, args.out)
|
|
if args.all_sch or args.all:
|
|
n += _convert_schs(proj, args.out)
|
|
if n == 0:
|
|
print("nothing to dump (no PCB / SCH_PAGE docs found)", file=sys.stderr)
|
|
return 1
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|