"""CLI: dump EPRO2 docs to Std-shaped JSON files for downstream consumers. The output is "Option 2" per the downstream colleague's spec: Std envelope with a raw EPRO2 ``objects: {id: payload}`` dict in place of the usual ``shape[]`` tilde-string array. Their ~100-LoC adapter walks ``objects`` and dispatches by ``_type`` to build real Std shapes — see ``docs/sources/epro2_to_std_mapping.md`` for the OPTYPE → Std verb table. Usage: uv run python -m tools.epro2.std --all-pcb --out uv run python -m tools.epro2.std --all-sch --out uv run python -m tools.epro2.std --all --out Output: flat ``.json`` per doc — mirrors Std's own data layout so a downstream pipeline that already iterates ``source/*.json`` works unchanged. """ from __future__ import annotations import argparse import json import sys from pathlib import Path from ..replay import Project, replay_project from .pcb_writer import write_pcb_std from .pro2_writer import ( fetch_encrypted_plaintext, split_plaintext_by_doctype, write_pro2_doc, ) from .sch_writer import write_sch_std def _detect_pro2(project_dir: Path) -> tuple[bool, str]: """Return ``(is_pro2, editor_version)`` from manifest.json. Pro 2.x and Pro 3.x EPRO2 share the manifest filename + per-doc-uuid layout, but Pro 2.x sets ``editor_version`` to a 2.x string like ``"2.1.40"`` and stores documents as ``.json`` (vs Pro 3.x's ``.epro2``). The cheap test is just to read the editor_version string — falls through to the existing EPRO2 path on any mismatch. """ mani_path = project_dir / "source" / "manifest.json" if not mani_path.exists(): return (False, "") try: m = json.loads(mani_path.read_text(encoding="utf-8")) except json.JSONDecodeError: return (False, "") ev = str(m.get("editor_version") or "") return (ev.startswith("2."), ev) def _dump(payload: dict, out_path: Path, project_uuid: str) -> None: payload["result"]["puuid"] = project_uuid or "" out_path.write_text( json.dumps(payload, ensure_ascii=False, separators=(",", ":")), encoding="utf-8", ) def _convert_pcbs(proj: Project, out_dir: Path) -> int: uuids = [u for u, d in proj.documents.items() if d.doc_type == "PCB"] if not uuids: return 0 print(f"PCB: dumping {len(uuids)} doc(s) → {out_dir}") for u in uuids: try: payload = write_pcb_std(proj.documents[u]) except Exception as e: # noqa: BLE001 print(f" FAIL {u[:12]}: {e}", file=sys.stderr) continue _dump(payload, out_dir / f"{u}.json", proj.project_uuid or "") s = getattr(write_pcb_std, "last_stats", None) if s: print( f" {u[:12]}.json: objects={s.objects} layers={s.layers_emitted} " f"BBox=({s.bbox_x:g},{s.bbox_y:g},{s.bbox_w:g},{s.bbox_h:g})" ) return len(uuids) def _convert_schs(proj: Project, out_dir: Path) -> int: uuids = [u for u, d in proj.documents.items() if d.doc_type == "SCH_PAGE"] if not uuids: return 0 print(f"SCH: dumping {len(uuids)} doc(s) → {out_dir}") for u in uuids: try: payload = write_sch_std(proj.documents[u]) except Exception as e: # noqa: BLE001 print(f" FAIL {u[:12]}: {e}", file=sys.stderr) continue _dump(payload, out_dir / f"{u}.json", proj.project_uuid or "") s = getattr(write_sch_std, "last_stats", None) if s: print( f" {u[:12]}.json: objects={s.objects} " f"BBox=({s.bbox_x:g},{s.bbox_y:g},{s.bbox_w:g},{s.bbox_h:g})" ) return len(uuids) def _convert_pro2_encrypted( json_path: Path, out_dir: Path, project_uuid: str, editor_version: str, parent_uuid: str, ) -> int: """Try fetch + AES-256-GCM decrypt + gunzip the encrypted-external blob, then split by DOCTYPE boundary into per-sub-doc JSONs. Pro 2.x bundles N FOOTPRINTs + 1 PCB (or N SYMBOLs + 1 SCH) into one blob; we emit each as a separate file named ``__.json`` so the parent association is visible in the filename without colliding with other sources. """ plain = fetch_encrypted_plaintext(json_path) if plain is None: return 0 n = 0 for sub_label, sub_text in split_plaintext_by_doctype(plain): # Re-route the inline path: build a synthetic Pro 2.x doc shape # in a temp file so write_pro2_doc + its caching behave normally. synth = { "uuid": f"{parent_uuid}__{sub_label}", "title": sub_label, "docType": _doctype_from_first_line(sub_text), "dataStr": sub_text, } # Write the synthetic JSON next to the original, with a name # that won't collide with manifest entries. synth_path = json_path.parent / f".synth__{parent_uuid}__{sub_label}.json" synth_path.write_text(json.dumps(synth, ensure_ascii=False), encoding="utf-8") try: payload = write_pro2_doc( synth_path, project_uuid=project_uuid, editor_version_hint=editor_version, ) finally: synth_path.unlink(missing_ok=True) if payload is None: continue out_name = f"{parent_uuid}__{sub_label}.json" (out_dir / out_name).write_text( json.dumps(payload, ensure_ascii=False, separators=(",", ":")), encoding="utf-8", ) s = getattr(write_pro2_doc, "last_stats", None) if s: print( f" {parent_uuid[:12]}__{sub_label}: docType={synth['docType']} " f"objects={s.objects}" ) n += 1 return n def _doctype_from_first_line(text: str) -> int: """Read the leading ``["DOCTYPE","KIND","x.y"]`` line and return the Std docType code (1=SCH, 2=SYMBOL, 3=PCB, 4=FOOTPRINT, 5=DEVICE).""" for line in text.splitlines(): line = line.strip() if not line: continue try: arr = json.loads(line) except json.JSONDecodeError: continue if not (isinstance(arr, list) and arr and arr[0] == "DOCTYPE"): continue kind = arr[1] if len(arr) > 1 else "" return { "SCH": 1, "SYMBOL": 2, "PCB": 3, "FOOTPRINT": 4, "DEVICE": 5, }.get(kind, 0) return 0 def _convert_pro2(project_dir: Path, out_dir: Path, editor_version: str, want_pcb: bool, want_sch: bool) -> int: """Pro 2.x path — read each .json directly (no EPRO2 replay) and run pro2_writer. The manifest tells us per-doc docType so we can route to PCB/SCH filters without parsing dataStr first.""" mani_path = project_dir / "source" / "manifest.json" m = json.loads(mani_path.read_text(encoding="utf-8")) project_uuid = m.get("project_uuid") or project_dir.name skipped_encrypted = 0 n = 0 print(f"Pro 2.x project (editor {editor_version}) → {out_dir}") for entry in m["documents"]: dt = entry.get("docType") if dt == 3 and not want_pcb: continue if dt == 1 and not want_sch: continue if dt not in (1, 3): continue path = project_dir / entry["path"] try: payload = write_pro2_doc( path, project_uuid=project_uuid, editor_version_hint=editor_version, ) except Exception as e: # noqa: BLE001 print(f" FAIL {entry['doc_uuid'][:12]}: {e}", file=sys.stderr) continue if payload is None: stats = getattr(write_pro2_doc, "last_stats", None) if stats and stats.skipped_encrypted: # Try fetching + decrypting from modules.lceda.cn. The blob # bundles N FOOTPRINTs/SYMBOLs + 1 parent PCB/SCH; we emit # one JSON per sub-doc. m_n = _convert_pro2_encrypted( path, out_dir, project_uuid, editor_version, parent_uuid=entry["doc_uuid"], ) if m_n > 0: print( f" decrypted {entry['doc_uuid'][:12]}: " f"{m_n} sub-doc(s) emitted" ) n += m_n else: print( f" SKIP {entry['doc_uuid'][:12]}: encrypted-external " f"and fetch/decrypt failed." ) skipped_encrypted += 1 continue out_path = out_dir / f"{entry['doc_uuid']}.json" out_path.write_text( json.dumps(payload, ensure_ascii=False, separators=(",", ":")), encoding="utf-8", ) s = getattr(write_pro2_doc, "last_stats", None) if s: print( f" {entry['doc_uuid'][:12]}.json: docType={dt} " f"objects={s.objects} BBox=({s.bbox_x:g},{s.bbox_y:g}," f"{s.bbox_w:g},{s.bbox_h:g})" ) n += 1 if skipped_encrypted: print( f" ({skipped_encrypted} encrypted-external doc(s) skipped — " f"the source/.json files still hold the dataStrId/iv/key " f"so a future fetch+decrypt pass can recover them.)" ) return n def main(argv: list[str] | None = None) -> int: ap = argparse.ArgumentParser(description="EPRO2 / Pro 2.x → EasyEDA Std-shaped JSON dump") ap.add_argument("project_dir", type=Path) g = ap.add_mutually_exclusive_group(required=True) g.add_argument("--all-pcb", action="store_true", help="dump every PCB doc") g.add_argument("--all-sch", action="store_true", help="dump every SCH_PAGE doc") g.add_argument("--all", action="store_true", help="dump both PCB and SCH_PAGE docs") ap.add_argument("--out", type=Path, default=Path("data/processed/std_json")) args = ap.parse_args(argv) args.out.mkdir(parents=True, exist_ok=True) is_pro2, editor_version = _detect_pro2(args.project_dir) if is_pro2: n = _convert_pro2( args.project_dir, args.out, editor_version, want_pcb=args.all_pcb or args.all, want_sch=args.all_sch or args.all, ) if n == 0: print("nothing to dump (no Pro 2.x SCH/PCB docs survived)", file=sys.stderr) return 1 return 0 # Pro 3.x EPRO2 path — full replay then per-doc dump. proj = replay_project(args.project_dir) n = 0 if args.all_pcb or args.all: n += _convert_pcbs(proj, args.out) if args.all_sch or args.all: n += _convert_schs(proj, args.out) if n == 0: print("nothing to dump (no PCB / SCH_PAGE docs found)", file=sys.stderr) return 1 return 0 if __name__ == "__main__": raise SystemExit(main())