Files
FacereDataset/scripts/build_index.py
Knowit c721e08c93 projects.md: replace Comments column with 版本 (Std / Pro 3.x / Pro 2.x)
Comments 那列对工程"品质"信号弱(评论量主要看话题热度);换成"版本"
列直接告诉读者每个项目源是哪种 EDA 格式 + 编辑器版本号。当前 15
个项目里 10 Std / 3 Pro 3.x / 2 Pro 2.x。

source_format 字段映射:
  easyeda-std        → Std
  easyeda-pro        → Pro 3.x
  easyeda-pro-legacy → Pro 2.x
  其它               → 透传

editor_version(如 6.5.43 / 3.2.91 / 2.1.40)作为子标签放第二行。

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 22:01:41 +08:00

172 lines
5.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Scan data/raw/*/*/metadata.json and build projects.md (index, sorted by stars desc).
Usage:
uv run python scripts/build_index.py
uv run python scripts/build_index.py --out projects.md
"""
from __future__ import annotations
import argparse
import json
from datetime import datetime, timezone
from pathlib import Path
REPO = Path(__file__).resolve().parent.parent
def fmt_mb(b: int) -> str:
return f"{b / 1024 / 1024:.1f}"
SOURCE_FORMAT_LABEL = {
"easyeda-std": "Std",
"easyeda-pro": "Pro 3.x",
"easyeda-pro-legacy": "Pro 2.x",
"kicad": "KiCad",
"altium": "Altium",
"eagle": "Eagle",
"other": "Other",
}
def collect() -> list[dict]:
rows: list[dict] = []
for meta in (REPO / "data" / "raw").rglob("metadata.json"):
m = json.loads(meta.read_text(encoding="utf-8"))
files = m.get("files", [])
bytes_total = sum(f.get("size") or 0 for f in files)
rows.append(
{
"uuid": m["project_id"],
"title": m["title"],
"source": m["source"],
"source_url": m["source_url"],
"author_display": m["author"].get("display_name") or m["author"]["username"],
"author_username": m["author"]["username"],
"license": m.get("license") or "unknown",
"metrics": m.get("metrics") or {},
"files_count": len(files),
"files_bytes": bytes_total,
"local_dir": str(meta.parent.relative_to(REPO)),
"source_format": m.get("source_format"),
"editor_version": m.get("editor_version"),
}
)
# sort by stars desc, tie-break by likes
rows.sort(
key=lambda r: (
-(r["metrics"].get("stars") or 0),
-(r["metrics"].get("likes") or 0),
)
)
return rows
def render(rows: list[dict]) -> str:
out: list[str] = []
w = out.append
total_files = sum(r["files_count"] for r in rows)
total_bytes = sum(r["files_bytes"] for r in rows)
total_stars = sum((r["metrics"].get("stars") or 0) for r in rows)
total_likes = sum((r["metrics"].get("likes") or 0) for r in rows)
total_views = sum((r["metrics"].get("views") or 0) for r in rows)
w("# Crawled Projects Index")
w("")
w(f"_自动生成最近更新 {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}_")
w("")
w(
f"**当前**{len(rows)} 个项目 · {total_files} 个附件 · {fmt_mb(total_bytes)} MB"
)
w("")
w("> 按 **Stars 倒序**。Title → 源站UUID → 本仓库对应目录。")
w("")
w(
"| # | Title | Author | License | 版本 | "
"⭐ Stars | ❤️ Likes | 🍴 Forks | 👁 Views | Files | Size (MB) |"
)
w(
"|---|-------|--------|---------|------|"
"--------:|---------:|---------:|---------:|------:|----------:|"
)
for i, r in enumerate(rows, 1):
m = r["metrics"]
title_link = f"[{r['title']}]({r['source_url']})"
# author link inference: oshwhub 格式 `https://oshwhub.com/<username>`
if r["source"] == "oshwhub":
author_url = f"https://oshwhub.com/{r['author_username']}"
else:
author_url = r["source_url"] # fallback
author_link = f"[{r['author_display']}]({author_url})"
uuid_short = r["uuid"][:8]
dir_link = f"[`{uuid_short}…`](./{r['local_dir']}/)"
version_label = SOURCE_FORMAT_LABEL.get(r["source_format"] or "", "")
if r["editor_version"]:
version_label = f"{version_label}<br><sub>{r['editor_version']}</sub>"
w(
f"| {i} | {title_link}<br>{dir_link} | {author_link} | {r['license']} | {version_label} | "
f"{m.get('stars', 0):,} | {m.get('likes', 0):,} | {m.get('forks', 0):,} | "
f"{m.get('views', 0):,} | "
f"{r['files_count']} | {fmt_mb(r['files_bytes'])} |"
)
w("")
w("## 汇总")
w("")
avg_stars = total_stars // max(len(rows), 1)
w(f"- Stars 合计 **{total_stars:,}**(平均 {avg_stars:,}/项目)")
w(f"- Likes 合计 **{total_likes:,}**")
w(f"- Views 合计 **{total_views:,}**")
w("")
w("### License 分布")
w("")
lic_count: dict[str, int] = {}
for r in rows:
lic_count[r["license"]] = lic_count.get(r["license"], 0) + 1
for lic, c in sorted(lic_count.items(), key=lambda x: -x[1]):
w(f"- `{lic}` — {c} 项目")
w("")
w("### 数据源分布")
w("")
src_count: dict[str, int] = {}
for r in rows:
src_count[r["source"]] = src_count.get(r["source"], 0) + 1
for src, c in sorted(src_count.items(), key=lambda x: -x[1]):
w(f"- `{src}` — {c} 项目")
w("")
w("## 目录结构(每个项目)")
w("")
w("```")
w("data/raw/<source>/<uuid>/")
w("├── metadata.json # 统一 schema见 schemas/project.schema.json")
w("├── description.md # 标题 + 简介 + 许可证")
w("├── cover.{jpg,png} # 封面")
w("├── _urls.json # 所有原始 URL")
w("└── files/* # 原始附件Git LFS")
w("```")
w("")
w("## 重新生成")
w("")
w("```bash")
w("uv run python scripts/build_index.py")
w("```")
w("")
return "\n".join(out)
def main(argv: list[str] | None = None) -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--out", type=Path, default=REPO / "projects.md")
args = ap.parse_args(argv)
rows = collect()
md = render(rows)
args.out.write_text(md, encoding="utf-8")
print(f"wrote {args.out} ({len(rows)} projects)")
return 0
if __name__ == "__main__":
raise SystemExit(main())