Comments 那列对工程"品质"信号弱(评论量主要看话题热度);换成"版本" 列直接告诉读者每个项目源是哪种 EDA 格式 + 编辑器版本号。当前 15 个项目里 10 Std / 3 Pro 3.x / 2 Pro 2.x。 source_format 字段映射: easyeda-std → Std easyeda-pro → Pro 3.x easyeda-pro-legacy → Pro 2.x 其它 → 透传 editor_version(如 6.5.43 / 3.2.91 / 2.1.40)作为子标签放第二行。 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
172 lines
5.7 KiB
Python
172 lines
5.7 KiB
Python
"""Scan data/raw/*/*/metadata.json and build projects.md (index, sorted by stars desc).
|
||
|
||
Usage:
|
||
uv run python scripts/build_index.py
|
||
uv run python scripts/build_index.py --out projects.md
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
|
||
REPO = Path(__file__).resolve().parent.parent
|
||
|
||
|
||
def fmt_mb(b: int) -> str:
|
||
return f"{b / 1024 / 1024:.1f}"
|
||
|
||
|
||
SOURCE_FORMAT_LABEL = {
|
||
"easyeda-std": "Std",
|
||
"easyeda-pro": "Pro 3.x",
|
||
"easyeda-pro-legacy": "Pro 2.x",
|
||
"kicad": "KiCad",
|
||
"altium": "Altium",
|
||
"eagle": "Eagle",
|
||
"other": "Other",
|
||
}
|
||
|
||
|
||
def collect() -> list[dict]:
|
||
rows: list[dict] = []
|
||
for meta in (REPO / "data" / "raw").rglob("metadata.json"):
|
||
m = json.loads(meta.read_text(encoding="utf-8"))
|
||
files = m.get("files", [])
|
||
bytes_total = sum(f.get("size") or 0 for f in files)
|
||
rows.append(
|
||
{
|
||
"uuid": m["project_id"],
|
||
"title": m["title"],
|
||
"source": m["source"],
|
||
"source_url": m["source_url"],
|
||
"author_display": m["author"].get("display_name") or m["author"]["username"],
|
||
"author_username": m["author"]["username"],
|
||
"license": m.get("license") or "unknown",
|
||
"metrics": m.get("metrics") or {},
|
||
"files_count": len(files),
|
||
"files_bytes": bytes_total,
|
||
"local_dir": str(meta.parent.relative_to(REPO)),
|
||
"source_format": m.get("source_format"),
|
||
"editor_version": m.get("editor_version"),
|
||
}
|
||
)
|
||
# sort by stars desc, tie-break by likes
|
||
rows.sort(
|
||
key=lambda r: (
|
||
-(r["metrics"].get("stars") or 0),
|
||
-(r["metrics"].get("likes") or 0),
|
||
)
|
||
)
|
||
return rows
|
||
|
||
|
||
def render(rows: list[dict]) -> str:
|
||
out: list[str] = []
|
||
w = out.append
|
||
|
||
total_files = sum(r["files_count"] for r in rows)
|
||
total_bytes = sum(r["files_bytes"] for r in rows)
|
||
total_stars = sum((r["metrics"].get("stars") or 0) for r in rows)
|
||
total_likes = sum((r["metrics"].get("likes") or 0) for r in rows)
|
||
total_views = sum((r["metrics"].get("views") or 0) for r in rows)
|
||
|
||
w("# Crawled Projects Index")
|
||
w("")
|
||
w(f"_自动生成,最近更新 {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}_")
|
||
w("")
|
||
w(
|
||
f"**当前**:{len(rows)} 个项目 · {total_files} 个附件 · {fmt_mb(total_bytes)} MB"
|
||
)
|
||
w("")
|
||
w("> 按 **Stars 倒序**。Title → 源站;UUID → 本仓库对应目录。")
|
||
w("")
|
||
w(
|
||
"| # | Title | Author | License | 版本 | "
|
||
"⭐ Stars | ❤️ Likes | 🍴 Forks | 👁 Views | Files | Size (MB) |"
|
||
)
|
||
w(
|
||
"|---|-------|--------|---------|------|"
|
||
"--------:|---------:|---------:|---------:|------:|----------:|"
|
||
)
|
||
for i, r in enumerate(rows, 1):
|
||
m = r["metrics"]
|
||
title_link = f"[{r['title']}]({r['source_url']})"
|
||
# author link inference: oshwhub 格式 `https://oshwhub.com/<username>`
|
||
if r["source"] == "oshwhub":
|
||
author_url = f"https://oshwhub.com/{r['author_username']}"
|
||
else:
|
||
author_url = r["source_url"] # fallback
|
||
author_link = f"[{r['author_display']}]({author_url})"
|
||
uuid_short = r["uuid"][:8]
|
||
dir_link = f"[`{uuid_short}…`](./{r['local_dir']}/)"
|
||
version_label = SOURCE_FORMAT_LABEL.get(r["source_format"] or "", "—")
|
||
if r["editor_version"]:
|
||
version_label = f"{version_label}<br><sub>{r['editor_version']}</sub>"
|
||
w(
|
||
f"| {i} | {title_link}<br>{dir_link} | {author_link} | {r['license']} | {version_label} | "
|
||
f"{m.get('stars', 0):,} | {m.get('likes', 0):,} | {m.get('forks', 0):,} | "
|
||
f"{m.get('views', 0):,} | "
|
||
f"{r['files_count']} | {fmt_mb(r['files_bytes'])} |"
|
||
)
|
||
w("")
|
||
w("## 汇总")
|
||
w("")
|
||
avg_stars = total_stars // max(len(rows), 1)
|
||
w(f"- Stars 合计 **{total_stars:,}**(平均 {avg_stars:,}/项目)")
|
||
w(f"- Likes 合计 **{total_likes:,}**")
|
||
w(f"- Views 合计 **{total_views:,}**")
|
||
w("")
|
||
w("### License 分布")
|
||
w("")
|
||
lic_count: dict[str, int] = {}
|
||
for r in rows:
|
||
lic_count[r["license"]] = lic_count.get(r["license"], 0) + 1
|
||
for lic, c in sorted(lic_count.items(), key=lambda x: -x[1]):
|
||
w(f"- `{lic}` — {c} 项目")
|
||
w("")
|
||
w("### 数据源分布")
|
||
w("")
|
||
src_count: dict[str, int] = {}
|
||
for r in rows:
|
||
src_count[r["source"]] = src_count.get(r["source"], 0) + 1
|
||
for src, c in sorted(src_count.items(), key=lambda x: -x[1]):
|
||
w(f"- `{src}` — {c} 项目")
|
||
w("")
|
||
w("## 目录结构(每个项目)")
|
||
w("")
|
||
w("```")
|
||
w("data/raw/<source>/<uuid>/")
|
||
w("├── metadata.json # 统一 schema,见 schemas/project.schema.json")
|
||
w("├── description.md # 标题 + 简介 + 许可证")
|
||
w("├── cover.{jpg,png} # 封面")
|
||
w("├── _urls.json # 所有原始 URL")
|
||
w("└── files/* # 原始附件(Git LFS)")
|
||
w("```")
|
||
w("")
|
||
w("## 重新生成")
|
||
w("")
|
||
w("```bash")
|
||
w("uv run python scripts/build_index.py")
|
||
w("```")
|
||
w("")
|
||
return "\n".join(out)
|
||
|
||
|
||
def main(argv: list[str] | None = None) -> int:
|
||
ap = argparse.ArgumentParser()
|
||
ap.add_argument("--out", type=Path, default=REPO / "projects.md")
|
||
args = ap.parse_args(argv)
|
||
|
||
rows = collect()
|
||
md = render(rows)
|
||
args.out.write_text(md, encoding="utf-8")
|
||
print(f"wrote {args.out} ({len(rows)} projects)")
|
||
return 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
raise SystemExit(main())
|