Files
FacereDataset/tools/epro2/std/__main__.py
Knowit 3720cd176a tools/epro2/std: add Pro 2.x JSON path — Liangshan + Taishan SCH now exportable
The downstream colleague's "encrypted_external" / "string old format"
projects were Pro 2.x, not Pro 3.x EPRO2. Pro 2.x ships each doc as a
JSON file whose `dataStr` is a plaintext op-stream — one JSON array per
line, e.g. `["COMPONENT","e1","",0,0,0,0,{},0]`. Different wire format
from EPRO2's binary tilde/pipe streams; same Std envelope works for
output.

  - tools/epro2/std/pro2_writer.py: parses dataStr line-by-line, keys
    objects by id (position 1 for most ops, OPTYPE for singletons),
    extracts BBox by walking known coord positions per OPTYPE, derives
    layers from LAYER ops directly (Pro 2.x almost matches Std layer
    string format already). PCB blobs that are encrypted-external
    (`dataStrId` URL + `iv` + `key`, no inline dataStr — Taishan PCB)
    return None so the CLI skips with a message instead of stubbing.

  - tools/epro2/std/__main__.py: auto-detect via manifest's
    editor_version. "2.x" → Pro 2.x writer; otherwise the existing
    EPRO2 replay path. CLI surface and output layout unchanged.

  - docs/sources/epro2_to_std_mapping.md: adds a Pro 2.x section.
    Adapter dispatches on `head.epro_format`: absent / "epro2" gets
    dict-shaped objects values, "pro2" gets array-shaped values
    (`[OPTYPE, arg1, ...]`). Lists the Pro 2.x-specific OPTYPEs
    (FONTSTYLE / LINESTYLE / CONNECT / OBJ / REGION / DIMENSION /
    STRING / TEARDROP) the EPRO2 vocabulary doesn't have.

Smoke (re-running --all on all 5 Pro projects): 191 → 222 JSON files.
Liangshan adds 3 (2 SCH + inline 5357-object PCB). Taishan adds 28
(SCH only — PCB skipped, encrypted-external; source/<uuid>.json still
keeps the dataStrId/iv/key for a later fetch+decrypt pass).

84 → 86 unit tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 02:00:37 +08:00

201 lines
7.5 KiB
Python

"""CLI: dump EPRO2 docs to Std-shaped JSON files for downstream consumers.
The output is "Option 2" per the downstream colleague's spec: Std envelope
with a raw EPRO2 ``objects: {id: payload}`` dict in place of the usual
``shape[]`` tilde-string array. Their ~100-LoC adapter walks ``objects``
and dispatches by ``_type`` to build real Std shapes — see
``docs/sources/epro2_to_std_mapping.md`` for the OPTYPE → Std verb table.
Usage:
uv run python -m tools.epro2.std <project_dir> --all-pcb --out <dir>
uv run python -m tools.epro2.std <project_dir> --all-sch --out <dir>
uv run python -m tools.epro2.std <project_dir> --all --out <dir>
Output: flat ``<doc_uuid>.json`` per doc — mirrors Std's own data layout
so a downstream pipeline that already iterates ``source/*.json`` works
unchanged.
"""
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
from ..replay import Project, replay_project
from .pcb_writer import write_pcb_std
from .pro2_writer import write_pro2_doc
from .sch_writer import write_sch_std
def _detect_pro2(project_dir: Path) -> tuple[bool, str]:
"""Return ``(is_pro2, editor_version)`` from manifest.json.
Pro 2.x and Pro 3.x EPRO2 share the manifest filename + per-doc-uuid
layout, but Pro 2.x sets ``editor_version`` to a 2.x string like
``"2.1.40"`` and stores documents as ``<uuid>.json`` (vs Pro 3.x's
``<uuid>.epro2``). The cheap test is just to read the editor_version
string — falls through to the existing EPRO2 path on any mismatch.
"""
mani_path = project_dir / "source" / "manifest.json"
if not mani_path.exists():
return (False, "")
try:
m = json.loads(mani_path.read_text(encoding="utf-8"))
except json.JSONDecodeError:
return (False, "")
ev = str(m.get("editor_version") or "")
return (ev.startswith("2."), ev)
def _dump(payload: dict, out_path: Path, project_uuid: str) -> None:
payload["result"]["puuid"] = project_uuid or ""
out_path.write_text(
json.dumps(payload, ensure_ascii=False, separators=(",", ":")),
encoding="utf-8",
)
def _convert_pcbs(proj: Project, out_dir: Path) -> int:
uuids = [u for u, d in proj.documents.items() if d.doc_type == "PCB"]
if not uuids:
return 0
print(f"PCB: dumping {len(uuids)} doc(s) → {out_dir}")
for u in uuids:
try:
payload = write_pcb_std(proj.documents[u])
except Exception as e: # noqa: BLE001
print(f" FAIL {u[:12]}: {e}", file=sys.stderr)
continue
_dump(payload, out_dir / f"{u}.json", proj.project_uuid or "")
s = getattr(write_pcb_std, "last_stats", None)
if s:
print(
f" {u[:12]}.json: objects={s.objects} layers={s.layers_emitted} "
f"BBox=({s.bbox_x:g},{s.bbox_y:g},{s.bbox_w:g},{s.bbox_h:g})"
)
return len(uuids)
def _convert_schs(proj: Project, out_dir: Path) -> int:
uuids = [u for u, d in proj.documents.items() if d.doc_type == "SCH_PAGE"]
if not uuids:
return 0
print(f"SCH: dumping {len(uuids)} doc(s) → {out_dir}")
for u in uuids:
try:
payload = write_sch_std(proj.documents[u])
except Exception as e: # noqa: BLE001
print(f" FAIL {u[:12]}: {e}", file=sys.stderr)
continue
_dump(payload, out_dir / f"{u}.json", proj.project_uuid or "")
s = getattr(write_sch_std, "last_stats", None)
if s:
print(
f" {u[:12]}.json: objects={s.objects} "
f"BBox=({s.bbox_x:g},{s.bbox_y:g},{s.bbox_w:g},{s.bbox_h:g})"
)
return len(uuids)
def _convert_pro2(project_dir: Path, out_dir: Path,
editor_version: str, want_pcb: bool, want_sch: bool) -> int:
"""Pro 2.x path — read each <uuid>.json directly (no EPRO2 replay)
and run pro2_writer. The manifest tells us per-doc docType so we
can route to PCB/SCH filters without parsing dataStr first."""
mani_path = project_dir / "source" / "manifest.json"
m = json.loads(mani_path.read_text(encoding="utf-8"))
project_uuid = m.get("project_uuid") or project_dir.name
skipped_encrypted = 0
n = 0
print(f"Pro 2.x project (editor {editor_version}) → {out_dir}")
for entry in m["documents"]:
dt = entry.get("docType")
if dt == 3 and not want_pcb:
continue
if dt == 1 and not want_sch:
continue
if dt not in (1, 3):
continue
path = project_dir / entry["path"]
try:
payload = write_pro2_doc(
path, project_uuid=project_uuid, editor_version_hint=editor_version,
)
except Exception as e: # noqa: BLE001
print(f" FAIL {entry['doc_uuid'][:12]}: {e}", file=sys.stderr)
continue
if payload is None:
stats = getattr(write_pro2_doc, "last_stats", None)
if stats and stats.skipped_encrypted:
print(
f" SKIP {entry['doc_uuid'][:12]}: PCB blob is "
f"AES-encrypted external (dataStrId+iv+key); needs "
f"a separate fetch+decrypt step we don't run here."
)
skipped_encrypted += 1
continue
out_path = out_dir / f"{entry['doc_uuid']}.json"
out_path.write_text(
json.dumps(payload, ensure_ascii=False, separators=(",", ":")),
encoding="utf-8",
)
s = getattr(write_pro2_doc, "last_stats", None)
if s:
print(
f" {entry['doc_uuid'][:12]}.json: docType={dt} "
f"objects={s.objects} BBox=({s.bbox_x:g},{s.bbox_y:g},"
f"{s.bbox_w:g},{s.bbox_h:g})"
)
n += 1
if skipped_encrypted:
print(
f" ({skipped_encrypted} encrypted-external doc(s) skipped — "
f"the source/<uuid>.json files still hold the dataStrId/iv/key "
f"so a future fetch+decrypt pass can recover them.)"
)
return n
def main(argv: list[str] | None = None) -> int:
ap = argparse.ArgumentParser(description="EPRO2 / Pro 2.x → EasyEDA Std-shaped JSON dump")
ap.add_argument("project_dir", type=Path)
g = ap.add_mutually_exclusive_group(required=True)
g.add_argument("--all-pcb", action="store_true", help="dump every PCB doc")
g.add_argument("--all-sch", action="store_true", help="dump every SCH_PAGE doc")
g.add_argument("--all", action="store_true", help="dump both PCB and SCH_PAGE docs")
ap.add_argument("--out", type=Path, default=Path("data/processed/std_json"))
args = ap.parse_args(argv)
args.out.mkdir(parents=True, exist_ok=True)
is_pro2, editor_version = _detect_pro2(args.project_dir)
if is_pro2:
n = _convert_pro2(
args.project_dir, args.out, editor_version,
want_pcb=args.all_pcb or args.all,
want_sch=args.all_sch or args.all,
)
if n == 0:
print("nothing to dump (no Pro 2.x SCH/PCB docs survived)", file=sys.stderr)
return 1
return 0
# Pro 3.x EPRO2 path — full replay then per-doc dump.
proj = replay_project(args.project_dir)
n = 0
if args.all_pcb or args.all:
n += _convert_pcbs(proj, args.out)
if args.all_sch or args.all:
n += _convert_schs(proj, args.out)
if n == 0:
print("nothing to dump (no PCB / SCH_PAGE docs found)", file=sys.stderr)
return 1
return 0
if __name__ == "__main__":
raise SystemExit(main())