FacereDataset/tools/epro2/std/__main__.py

"""CLI: dump EPRO2 docs to Std-shaped JSON files for downstream consumers.

The output is "Option 2" per the downstream colleague's spec: Std envelope
with a raw EPRO2 ``objects: {id: payload}`` dict in place of the usual
``shape[]`` tilde-string array. Their ~100-LoC adapter walks ``objects``
and dispatches by ``_type`` to build real Std shapes — see
``docs/sources/epro2_to_std_mapping.md`` for the OPTYPE → Std verb table.

Usage:
    uv run python -m tools.epro2.std <project_dir> --all-pcb --out <dir>
    uv run python -m tools.epro2.std <project_dir> --all-sch --out <dir>
    uv run python -m tools.epro2.std <project_dir> --all     --out <dir>

Output: flat ``<doc_uuid>.json`` per doc — mirrors Std's own data layout
so a downstream pipeline that already iterates ``source/*.json`` works
unchanged.
"""

from __future__ import annotations

import argparse
import json
import sys
from pathlib import Path

from ..replay import Project, replay_project
from .pcb_writer import write_pcb_std
from .pro2_writer import (
    fetch_encrypted_plaintext,
    split_plaintext_by_doctype,
    write_pro2_doc,
)
from .sch_writer import write_sch_std


def _detect_pro2(project_dir: Path) -> tuple[bool, str]:
    """Return ``(is_pro2, editor_version)`` from manifest.json.

    Pro 2.x and Pro 3.x EPRO2 share the manifest filename + per-doc-uuid
    layout, but Pro 2.x sets ``editor_version`` to a 2.x string like
    ``"2.1.40"`` and stores documents as ``<uuid>.json`` (vs Pro 3.x's
    ``<uuid>.epro2``). The cheap test is just to read the editor_version
    string — falls through to the existing EPRO2 path on any mismatch.
    """
    mani_path = project_dir / "source" / "manifest.json"
    if not mani_path.exists():
        return (False, "")
    try:
        m = json.loads(mani_path.read_text(encoding="utf-8"))
    except json.JSONDecodeError:
        return (False, "")
    ev = str(m.get("editor_version") or "")
    return (ev.startswith("2."), ev)


def _dump(payload: dict, out_path: Path, project_uuid: str) -> None:
    payload["result"]["puuid"] = project_uuid or ""
    out_path.write_text(
        json.dumps(payload, ensure_ascii=False, separators=(",", ":")),
        encoding="utf-8",
    )


def _convert_pcbs(proj: Project, out_dir: Path) -> int:
    uuids = [u for u, d in proj.documents.items() if d.doc_type == "PCB"]
    if not uuids:
        return 0
    print(f"PCB: dumping {len(uuids)} doc(s) → {out_dir}")
    for u in uuids:
        try:
            payload = write_pcb_std(proj.documents[u])
        except Exception as e:  # noqa: BLE001
            print(f"  FAIL {u[:12]}: {e}", file=sys.stderr)
            continue
        _dump(payload, out_dir / f"{u}.json", proj.project_uuid or "")
        s = getattr(write_pcb_std, "last_stats", None)
        if s:
            print(
                f"  {u[:12]}.json: objects={s.objects} layers={s.layers_emitted} "
                f"BBox=({s.bbox_x:g},{s.bbox_y:g},{s.bbox_w:g},{s.bbox_h:g})"
            )
    return len(uuids)


def _convert_schs(proj: Project, out_dir: Path) -> int:
    uuids = [u for u, d in proj.documents.items() if d.doc_type == "SCH_PAGE"]
    if not uuids:
        return 0
    print(f"SCH: dumping {len(uuids)} doc(s) → {out_dir}")
    for u in uuids:
        try:
            payload = write_sch_std(proj.documents[u])
        except Exception as e:  # noqa: BLE001
            print(f"  FAIL {u[:12]}: {e}", file=sys.stderr)
            continue
        _dump(payload, out_dir / f"{u}.json", proj.project_uuid or "")
        s = getattr(write_sch_std, "last_stats", None)
        if s:
            print(
                f"  {u[:12]}.json: objects={s.objects} "
                f"BBox=({s.bbox_x:g},{s.bbox_y:g},{s.bbox_w:g},{s.bbox_h:g})"
            )
    return len(uuids)


def _convert_pro2_encrypted(
    json_path: Path, out_dir: Path,
    project_uuid: str, editor_version: str, parent_uuid: str,
) -> int:
    """Try fetch + AES-256-GCM decrypt + gunzip the encrypted-external
    blob, then split by DOCTYPE boundary into per-sub-doc JSONs.

    Pro 2.x bundles N FOOTPRINTs + 1 PCB (or N SYMBOLs + 1 SCH) into one
    blob; we emit each as a separate file named
    ``<parent_uuid>__<sub_label>.json`` so the parent association is
    visible in the filename without colliding with other sources.
    """
    plain = fetch_encrypted_plaintext(json_path)
    if plain is None:
        return 0

    n = 0
    for sub_label, sub_text in split_plaintext_by_doctype(plain):
        # Re-route the inline path: build a synthetic Pro 2.x doc shape
        # in a temp file so write_pro2_doc + its caching behave normally.
        synth = {
            "uuid": f"{parent_uuid}__{sub_label}",
            "title": sub_label,
            "docType": _doctype_from_first_line(sub_text),
            "dataStr": sub_text,
        }
        # Write the synthetic JSON next to the original, with a name
        # that won't collide with manifest entries.
        synth_path = json_path.parent / f".synth__{parent_uuid}__{sub_label}.json"
        synth_path.write_text(json.dumps(synth, ensure_ascii=False), encoding="utf-8")
        try:
            payload = write_pro2_doc(
                synth_path, project_uuid=project_uuid,
                editor_version_hint=editor_version,
            )
        finally:
            synth_path.unlink(missing_ok=True)
        if payload is None:
            continue
        out_name = f"{parent_uuid}__{sub_label}.json"
        (out_dir / out_name).write_text(
            json.dumps(payload, ensure_ascii=False, separators=(",", ":")),
            encoding="utf-8",
        )
        s = getattr(write_pro2_doc, "last_stats", None)
        if s:
            print(
                f"  {parent_uuid[:12]}__{sub_label}: docType={synth['docType']} "
                f"objects={s.objects}"
            )
        n += 1
    return n


def _doctype_from_first_line(text: str) -> int:
    """Read the leading ``["DOCTYPE","KIND","x.y"]`` line and return the
    Std docType code (1=SCH, 2=SYMBOL, 3=PCB, 4=FOOTPRINT, 5=DEVICE)."""
    for line in text.splitlines():
        line = line.strip()
        if not line:
            continue
        try:
            arr = json.loads(line)
        except json.JSONDecodeError:
            continue
        if not (isinstance(arr, list) and arr and arr[0] == "DOCTYPE"):
            continue
        kind = arr[1] if len(arr) > 1 else ""
        return {
            "SCH": 1, "SYMBOL": 2, "PCB": 3, "FOOTPRINT": 4, "DEVICE": 5,
        }.get(kind, 0)
    return 0


def _convert_pro2(project_dir: Path, out_dir: Path,
                   editor_version: str, want_pcb: bool, want_sch: bool) -> int:
    """Pro 2.x path — read each <uuid>.json directly (no EPRO2 replay)
    and run pro2_writer. The manifest tells us per-doc docType so we
    can route to PCB/SCH filters without parsing dataStr first."""
    mani_path = project_dir / "source" / "manifest.json"
    m = json.loads(mani_path.read_text(encoding="utf-8"))
    project_uuid = m.get("project_uuid") or project_dir.name

    skipped_encrypted = 0
    n = 0
    print(f"Pro 2.x project (editor {editor_version}) → {out_dir}")
    for entry in m["documents"]:
        dt = entry.get("docType")
        if dt == 3 and not want_pcb:
            continue
        if dt == 1 and not want_sch:
            continue
        if dt not in (1, 3):
            continue
        path = project_dir / entry["path"]
        try:
            payload = write_pro2_doc(
                path, project_uuid=project_uuid, editor_version_hint=editor_version,
            )
        except Exception as e:  # noqa: BLE001
            print(f"  FAIL {entry['doc_uuid'][:12]}: {e}", file=sys.stderr)
            continue
        if payload is None:
            stats = getattr(write_pro2_doc, "last_stats", None)
            if stats and stats.skipped_encrypted:
                # Try fetching + decrypting from modules.lceda.cn. The blob
                # bundles N FOOTPRINTs/SYMBOLs + 1 parent PCB/SCH; we emit
                # one JSON per sub-doc.
                m_n = _convert_pro2_encrypted(
                    path, out_dir, project_uuid, editor_version,
                    parent_uuid=entry["doc_uuid"],
                )
                if m_n > 0:
                    print(
                        f"  decrypted {entry['doc_uuid'][:12]}: "
                        f"{m_n} sub-doc(s) emitted"
                    )
                    n += m_n
                else:
                    print(
                        f"  SKIP {entry['doc_uuid'][:12]}: encrypted-external "
                        f"and fetch/decrypt failed."
                    )
                    skipped_encrypted += 1
            continue
        out_path = out_dir / f"{entry['doc_uuid']}.json"
        out_path.write_text(
            json.dumps(payload, ensure_ascii=False, separators=(",", ":")),
            encoding="utf-8",
        )
        s = getattr(write_pro2_doc, "last_stats", None)
        if s:
            print(
                f"  {entry['doc_uuid'][:12]}.json: docType={dt} "
                f"objects={s.objects} BBox=({s.bbox_x:g},{s.bbox_y:g},"
                f"{s.bbox_w:g},{s.bbox_h:g})"
            )
        n += 1
    if skipped_encrypted:
        print(
            f"  ({skipped_encrypted} encrypted-external doc(s) skipped — "
            f"the source/<uuid>.json files still hold the dataStrId/iv/key "
            f"so a future fetch+decrypt pass can recover them.)"
        )
    return n


def main(argv: list[str] | None = None) -> int:
    ap = argparse.ArgumentParser(description="EPRO2 / Pro 2.x → EasyEDA Std-shaped JSON dump")
    ap.add_argument("project_dir", type=Path)
    g = ap.add_mutually_exclusive_group(required=True)
    g.add_argument("--all-pcb", action="store_true", help="dump every PCB doc")
    g.add_argument("--all-sch", action="store_true", help="dump every SCH_PAGE doc")
    g.add_argument("--all", action="store_true", help="dump both PCB and SCH_PAGE docs")
    ap.add_argument("--out", type=Path, default=Path("data/processed/std_json"))
    args = ap.parse_args(argv)

    args.out.mkdir(parents=True, exist_ok=True)

    is_pro2, editor_version = _detect_pro2(args.project_dir)
    if is_pro2:
        n = _convert_pro2(
            args.project_dir, args.out, editor_version,
            want_pcb=args.all_pcb or args.all,
            want_sch=args.all_sch or args.all,
        )
        if n == 0:
            print("nothing to dump (no Pro 2.x SCH/PCB docs survived)", file=sys.stderr)
            return 1
        return 0

    # Pro 3.x EPRO2 path — full replay then per-doc dump.
    proj = replay_project(args.project_dir)
    n = 0
    if args.all_pcb or args.all:
        n += _convert_pcbs(proj, args.out)
    if args.all_sch or args.all:
        n += _convert_schs(proj, args.out)
    if n == 0:
        print("nothing to dump (no PCB / SCH_PAGE docs found)", file=sys.stderr)
        return 1
    return 0


if __name__ == "__main__":
    raise SystemExit(main())