tools/epro2/std: fetch + decrypt Pro 2.x encrypted-external blobs

Pro 2.x stores some doc payloads (notably Taishan's PCB) externally at
modules.lceda.cn keyed by dataStrId, AES-256-GCM encrypted with the
iv/key fields stored alongside. Same crypto pattern as Pro 3.x EPRO2:
last 16 bytes are the GCM auth tag, rest is gzip(plaintext-op-stream).
The CDN doesn't require auth.

  - pro2_writer.fetch_encrypted_plaintext(): fetch + decrypt + gunzip,
    cache result at source/<uuid>.decrypted.txt so re-runs skip the
    network round-trip. Heavy imports (httpx, pycryptodome) are
    deferred to call-time so the pure-replay path doesn't pay for them.
  - pro2_writer.split_plaintext_by_doctype(): walk the multi-doc
    plaintext (Pro 2.x bundles N FOOTPRINTs + 1 PCB into one blob), yield
    (label, sub_text) per inner doc. Label = HEAD.uuid if present, else
    fallback `<kind>_<idx>`.
  - __main__._convert_pro2_encrypted(): for each sub-doc, write a
    synthetic inline-Pro-2.x JSON next to the original and re-route
    through write_pro2_doc — re-uses BBox / layers / objects-extraction
    instead of duplicating the logic. Output filename
    `<parent_uuid>__<sub_label>.json` makes the parent association
    visible.

Smoke (Taishan): 28 inline SCHs → 55 total. Decrypts:
  - one PCB blob (3.4 MB plaintext, 20267-object PCB + 25 FOOTPRINT
    sub-docs of 130-580 objects each)
  - one SCH-typed encrypted doc (1 sub-SCH of 891 objects)

86 unit tests still pass; new fetch/decrypt path is covered manually
via the smoke test rather than mocking httpx + AES.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-29 02:07:40 +08:00
parent 3720cd176a
commit d11ca1d3be
2 changed files with 215 additions and 6 deletions

View File

@@ -25,7 +25,11 @@ from pathlib import Path
from ..replay import Project, replay_project
from .pcb_writer import write_pcb_std
from .pro2_writer import write_pro2_doc
from .pro2_writer import (
fetch_encrypted_plaintext,
split_plaintext_by_doctype,
write_pro2_doc,
)
from .sch_writer import write_sch_std
@@ -99,6 +103,80 @@ def _convert_schs(proj: Project, out_dir: Path) -> int:
return len(uuids)
def _convert_pro2_encrypted(
json_path: Path, out_dir: Path,
project_uuid: str, editor_version: str, parent_uuid: str,
) -> int:
"""Try fetch + AES-256-GCM decrypt + gunzip the encrypted-external
blob, then split by DOCTYPE boundary into per-sub-doc JSONs.
Pro 2.x bundles N FOOTPRINTs + 1 PCB (or N SYMBOLs + 1 SCH) into one
blob; we emit each as a separate file named
``<parent_uuid>__<sub_label>.json`` so the parent association is
visible in the filename without colliding with other sources.
"""
plain = fetch_encrypted_plaintext(json_path)
if plain is None:
return 0
n = 0
for sub_label, sub_text in split_plaintext_by_doctype(plain):
# Re-route the inline path: build a synthetic Pro 2.x doc shape
# in a temp file so write_pro2_doc + its caching behave normally.
synth = {
"uuid": f"{parent_uuid}__{sub_label}",
"title": sub_label,
"docType": _doctype_from_first_line(sub_text),
"dataStr": sub_text,
}
# Write the synthetic JSON next to the original, with a name
# that won't collide with manifest entries.
synth_path = json_path.parent / f".synth__{parent_uuid}__{sub_label}.json"
synth_path.write_text(json.dumps(synth, ensure_ascii=False), encoding="utf-8")
try:
payload = write_pro2_doc(
synth_path, project_uuid=project_uuid,
editor_version_hint=editor_version,
)
finally:
synth_path.unlink(missing_ok=True)
if payload is None:
continue
out_name = f"{parent_uuid}__{sub_label}.json"
(out_dir / out_name).write_text(
json.dumps(payload, ensure_ascii=False, separators=(",", ":")),
encoding="utf-8",
)
s = getattr(write_pro2_doc, "last_stats", None)
if s:
print(
f" {parent_uuid[:12]}__{sub_label}: docType={synth['docType']} "
f"objects={s.objects}"
)
n += 1
return n
def _doctype_from_first_line(text: str) -> int:
"""Read the leading ``["DOCTYPE","KIND","x.y"]`` line and return the
Std docType code (1=SCH, 2=SYMBOL, 3=PCB, 4=FOOTPRINT, 5=DEVICE)."""
for line in text.splitlines():
line = line.strip()
if not line:
continue
try:
arr = json.loads(line)
except json.JSONDecodeError:
continue
if not (isinstance(arr, list) and arr and arr[0] == "DOCTYPE"):
continue
kind = arr[1] if len(arr) > 1 else ""
return {
"SCH": 1, "SYMBOL": 2, "PCB": 3, "FOOTPRINT": 4, "DEVICE": 5,
}.get(kind, 0)
return 0
def _convert_pro2(project_dir: Path, out_dir: Path,
editor_version: str, want_pcb: bool, want_sch: bool) -> int:
"""Pro 2.x path — read each <uuid>.json directly (no EPRO2 replay)
@@ -130,12 +208,25 @@ def _convert_pro2(project_dir: Path, out_dir: Path,
if payload is None:
stats = getattr(write_pro2_doc, "last_stats", None)
if stats and stats.skipped_encrypted:
print(
f" SKIP {entry['doc_uuid'][:12]}: PCB blob is "
f"AES-encrypted external (dataStrId+iv+key); needs "
f"a separate fetch+decrypt step we don't run here."
# Try fetching + decrypting from modules.lceda.cn. The blob
# bundles N FOOTPRINTs/SYMBOLs + 1 parent PCB/SCH; we emit
# one JSON per sub-doc.
m_n = _convert_pro2_encrypted(
path, out_dir, project_uuid, editor_version,
parent_uuid=entry["doc_uuid"],
)
skipped_encrypted += 1
if m_n > 0:
print(
f" decrypted {entry['doc_uuid'][:12]}: "
f"{m_n} sub-doc(s) emitted"
)
n += m_n
else:
print(
f" SKIP {entry['doc_uuid'][:12]}: encrypted-external "
f"and fetch/decrypt failed."
)
skipped_encrypted += 1
continue
out_path = out_dir / f"{entry['doc_uuid']}.json"
out_path.write_text(