"""Scan data/raw/*/*/metadata.json and build projects.md (index, sorted by stars desc). Usage: uv run python scripts/build_index.py uv run python scripts/build_index.py --out projects.md """ from __future__ import annotations import argparse import json from datetime import datetime, timezone from pathlib import Path REPO = Path(__file__).resolve().parent.parent def fmt_mb(b: int) -> str: return f"{b / 1024 / 1024:.1f}" SOURCE_FORMAT_LABEL = { "easyeda-std": "Std", "easyeda-pro": "Pro 3.x", "easyeda-pro-legacy": "Pro 2.x", "kicad": "KiCad", "altium": "Altium", "eagle": "Eagle", "other": "Other", } def collect() -> list[dict]: rows: list[dict] = [] for meta in (REPO / "data" / "raw").rglob("metadata.json"): m = json.loads(meta.read_text(encoding="utf-8")) files = m.get("files", []) bytes_total = sum(f.get("size") or 0 for f in files) rows.append( { "uuid": m["project_id"], "title": m["title"], "source": m["source"], "source_url": m["source_url"], "author_display": m["author"].get("display_name") or m["author"]["username"], "author_username": m["author"]["username"], "license": m.get("license") or "unknown", "metrics": m.get("metrics") or {}, "files_count": len(files), "files_bytes": bytes_total, "local_dir": str(meta.parent.relative_to(REPO)), "source_format": m.get("source_format"), "editor_version": m.get("editor_version"), } ) # sort by stars desc, tie-break by likes rows.sort( key=lambda r: ( -(r["metrics"].get("stars") or 0), -(r["metrics"].get("likes") or 0), ) ) return rows def render(rows: list[dict]) -> str: out: list[str] = [] w = out.append total_files = sum(r["files_count"] for r in rows) total_bytes = sum(r["files_bytes"] for r in rows) total_stars = sum((r["metrics"].get("stars") or 0) for r in rows) total_likes = sum((r["metrics"].get("likes") or 0) for r in rows) total_views = sum((r["metrics"].get("views") or 0) for r in rows) w("# Crawled Projects Index") w("") w(f"_自动生成,最近更新 {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}_") w("") w( f"**当前**:{len(rows)} 个项目 · {total_files} 个附件 · {fmt_mb(total_bytes)} MB" ) w("") w("> 按 **Stars 倒序**。Title → 源站;UUID → 本仓库对应目录。") w("") w( "| # | Title | Author | License | 版本 | " "⭐ Stars | ❤️ Likes | 🍴 Forks | 👁 Views | Files | Size (MB) |" ) w( "|---|-------|--------|---------|------|" "--------:|---------:|---------:|---------:|------:|----------:|" ) for i, r in enumerate(rows, 1): m = r["metrics"] title_link = f"[{r['title']}]({r['source_url']})" # author link inference: oshwhub 格式 `https://oshwhub.com/` if r["source"] == "oshwhub": author_url = f"https://oshwhub.com/{r['author_username']}" else: author_url = r["source_url"] # fallback author_link = f"[{r['author_display']}]({author_url})" uuid_short = r["uuid"][:8] dir_link = f"[`{uuid_short}…`](./{r['local_dir']}/)" version_label = SOURCE_FORMAT_LABEL.get(r["source_format"] or "", "—") if r["editor_version"]: version_label = f"{version_label}
{r['editor_version']}" w( f"| {i} | {title_link}
{dir_link} | {author_link} | {r['license']} | {version_label} | " f"{m.get('stars', 0):,} | {m.get('likes', 0):,} | {m.get('forks', 0):,} | " f"{m.get('views', 0):,} | " f"{r['files_count']} | {fmt_mb(r['files_bytes'])} |" ) w("") w("## 汇总") w("") avg_stars = total_stars // max(len(rows), 1) w(f"- Stars 合计 **{total_stars:,}**(平均 {avg_stars:,}/项目)") w(f"- Likes 合计 **{total_likes:,}**") w(f"- Views 合计 **{total_views:,}**") w("") w("### License 分布") w("") lic_count: dict[str, int] = {} for r in rows: lic_count[r["license"]] = lic_count.get(r["license"], 0) + 1 for lic, c in sorted(lic_count.items(), key=lambda x: -x[1]): w(f"- `{lic}` — {c} 项目") w("") w("### 数据源分布") w("") src_count: dict[str, int] = {} for r in rows: src_count[r["source"]] = src_count.get(r["source"], 0) + 1 for src, c in sorted(src_count.items(), key=lambda x: -x[1]): w(f"- `{src}` — {c} 项目") w("") w("## 目录结构(每个项目)") w("") w("```") w("data/raw///") w("├── metadata.json # 统一 schema,见 schemas/project.schema.json") w("├── description.md # 标题 + 简介 + 许可证") w("├── cover.{jpg,png} # 封面") w("├── _urls.json # 所有原始 URL") w("└── files/* # 原始附件(Git LFS)") w("```") w("") w("## 重新生成") w("") w("```bash") w("uv run python scripts/build_index.py") w("```") w("") return "\n".join(out) def main(argv: list[str] | None = None) -> int: ap = argparse.ArgumentParser() ap.add_argument("--out", type=Path, default=REPO / "projects.md") args = ap.parse_args(argv) rows = collect() md = render(rows) args.out.write_text(md, encoding="utf-8") print(f"wrote {args.out} ({len(rows)} projects)") return 0 if __name__ == "__main__": raise SystemExit(main())