diff --git a/crawlers/oshwhub/crawler.py b/crawlers/oshwhub/crawler.py index dd2eebe..6cbf121 100644 --- a/crawlers/oshwhub/crawler.py +++ b/crawlers/oshwhub/crawler.py @@ -45,7 +45,13 @@ BROWSER_UA = ( ) SLEEP_BETWEEN = 2.0 # seconds between detail-page / file fetches SLEEP_SOURCE = 5.0 # source fetch is sensitive — QPS ≤ 0.2 per CLAUDE.md登录态 spirit -SLEEP_PRO = 5.0 # Pro is logged-in; same QPS ≤ 0.2 per docs/sources/easyeda_pro_source.md §4.1 +SLEEP_PRO = 5.0 # Pro API host (pro.lceda.cn): rate-sensitive, keep at QPS ≤ 0.2 +# CDN host (modules.lceda.cn) only serves AES-encrypted history blobs. +# HAR analysis (proexportNew2.har 2026-04-29) shows the editor fires these +# blobs back-to-back without throttling — the CDN can clearly take it. +# Walltime for chain replay is dominated by this loop on multi-hundred-history +# projects (X86 board: chain ≈ 700 → ~1h at 5s/req → ~few min at 0.2s/req). +SLEEP_PRO_CDN = 0.2 # --------------------------------------------------------------------------- @@ -549,7 +555,8 @@ def _fetch_pro_modern( } if cur_doc and cur_doc in docs: docs[cur_doc]["lines"].append(ln) - time.sleep(sleep) + # CDN host, not the rate-sensitive API host — see SLEEP_PRO_CDN comment. + time.sleep(SLEEP_PRO_CDN) # 6. write per-doc .epro2 + manifest doc_metas: list[dict] = [] diff --git a/data/raw/oshwhub/ba64bd6f1c9c467ba3b674a54943557d/source/manifest.json b/data/raw/oshwhub/ba64bd6f1c9c467ba3b674a54943557d/source/manifest.json index 4d6ea80..723e63f 100644 --- a/data/raw/oshwhub/ba64bd6f1c9c467ba3b674a54943557d/source/manifest.json +++ b/data/raw/oshwhub/ba64bd6f1c9c467ba3b674a54943557d/source/manifest.json @@ -2,7 +2,7 @@ "project_uuid": "ba64bd6f1c9c467ba3b674a54943557d", "branch_uuid": "ef5f58bd0f1245b0a808c07e541a1b5c", "head_uuid": "764dd8b722a44914a915493277e204c9", - "fetched_at": "2026-04-28T13:22:26.550372+00:00", + "fetched_at": "2026-04-28T16:06:55.434479+00:00", "editor_version": "3.2.91", "chain_length": 12, "blob_bytes_total": 1195716, diff --git a/data/raw/oshwhub/dc91a91e669349898d709a5ba02f5b5f/source/manifest.json b/data/raw/oshwhub/dc91a91e669349898d709a5ba02f5b5f/source/manifest.json index 7ecc486..3c4bb25 100644 --- a/data/raw/oshwhub/dc91a91e669349898d709a5ba02f5b5f/source/manifest.json +++ b/data/raw/oshwhub/dc91a91e669349898d709a5ba02f5b5f/source/manifest.json @@ -2,7 +2,7 @@ "project_uuid": "dc91a91e669349898d709a5ba02f5b5f", "branch_uuid": "248807156ba341efb886f33121486d41", "head_uuid": "320b5e8efc184b6f92fae0adc05ff547", - "fetched_at": "2026-04-28T13:34:08.163913+00:00", + "fetched_at": "2026-04-28T16:08:04.447128+00:00", "editor_version": "3.2.69", "chain_length": 28, "blob_bytes_total": 7712204,