From c3cac9759344daa041dc128722777e4c4aba3aa7 Mon Sep 17 00:00:00 2001 From: Knowit Date: Wed, 29 Apr 2026 02:23:39 +0800 Subject: [PATCH] crawler: filter Pro 2.x deprecated boards from sch/pcb fetch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pro 2.x project metadata's boards[] can reference sch/pcb UUIDs that the project owner has since deprecated/deleted (e.g. "主控板V1(废弃)"). Such UUIDs are gone from ticket.schematics / ticket.pcbs but still in boards[]. Asking schematic/lists or documents/lists for them returns 401 and aborts the whole project. Filter both lists against the authoritative ticket dict before posting. Verified on 7f7565ef11 (Super Dial 电机旋钮屏): 4 boards but only 3 sch entries in schematics dict, isolating the deprecated 8bc59f to a 401 we now skip. Co-Authored-By: Claude Opus 4.7 (1M context) --- crawlers/oshwhub/crawler.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/crawlers/oshwhub/crawler.py b/crawlers/oshwhub/crawler.py index f416d84..9e87a64 100644 --- a/crawlers/oshwhub/crawler.py +++ b/crawlers/oshwhub/crawler.py @@ -758,7 +758,14 @@ def _fetch_pro_legacy( doc_metas: list[dict] = [] # 2. schematic containers -> sheet UUIDs via /api/schematic/lists - sch_container_uuids = [b["sch"] for b in boards if b.get("sch")] + # Filter `boards[].sch` against `ticket.schematics` keys: a board may + # reference a deprecated/deleted sch (e.g. "主控板V1(废弃)") whose + # UUID is gone from the schematics dict. Asking schematic/lists for it + # returns 401 and aborts the whole batch. Skip it. + valid_sch_uuids = set((manifest_ticket.get("schematics") or {}).keys()) + sch_container_uuids = [ + b["sch"] for b in boards if b.get("sch") and b["sch"] in valid_sch_uuids + ] sheet_uuids: list[str] = [] if sch_container_uuids: containers = _pro_post_json( @@ -798,7 +805,11 @@ def _fetch_pro_legacy( time.sleep(sleep) # 4. PCB documents via documents/lists docType=3 - pcb_uuids = [b["pcb"] for b in boards if b.get("pcb")] + # Same deprecated-uuid risk as #2 — filter against ticket.pcbs keys. + valid_pcb_uuids = set((manifest_ticket.get("pcbs") or {}).keys()) + pcb_uuids = [ + b["pcb"] for b in boards if b.get("pcb") and b["pcb"] in valid_pcb_uuids + ] if pcb_uuids: pcbs = _pro_post_json( pro_client,