Files
FacereDataset/scripts/build_feikong_index.py
Knowit d5cc6507cb docs: 飞控 std topical index (79 projects)
Topical index for std-origin flight-controller projects. Combines
data/state/oshwhub_listing_full.jsonl listing fields with each
project's metadata.json (license, source completeness,
editor_version). Useful as a flat per-topic reference vs the global
projects.md sorted purely by stars.

77 added this batch (commit 29530e0) + 2 prior. 75 have editor source,
4 are attachments-only on upstream.

scripts/build_feikong_index.py is reproducible: source of truth lives
in data/state/ + data/raw/, no hand-editing.
2026-04-30 19:23:52 +08:00

145 lines
4.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Build docs/feikong_std77.md — topical index of std 飞控 projects.
Selection: origin=std AND ('飞控' in name OR '飞控' in introduction)
Source: data/state/oshwhub_listing_full.jsonl (full-corpus listing snapshot)
Augmented from each project's data/raw/oshwhub/<uuid>/metadata.json:
- license
- source completeness (editor docs vs attachments-only)
- editor_version
"""
from __future__ import annotations
import json
from pathlib import Path
REPO = Path(__file__).resolve().parent.parent
LISTING = REPO / "data" / "state" / "oshwhub_listing_full.jsonl"
RAW_ROOT = REPO / "data" / "raw" / "oshwhub"
OUT = REPO / "docs" / "feikong_std77.md"
def collect() -> list[dict]:
rows: list[dict] = []
for ln in LISTING.read_text(encoding="utf-8").splitlines():
if not ln.strip():
continue
it = json.loads(ln)
if it.get("origin") != "std":
continue
name = it.get("name") or ""
intro = it.get("introduction") or ""
if "飞控" not in name and "飞控" not in intro:
continue
uuid = it["uuid"]
meta_p = RAW_ROOT / uuid / "metadata.json"
meta: dict = {}
if meta_p.is_file():
meta = json.loads(meta_p.read_text(encoding="utf-8"))
rows.append({"listing": it, "meta": meta})
return rows
def render(rows: list[dict]) -> str:
out: list[str] = []
w = out.append
n_total = len(rows)
n_have_meta = sum(1 for r in rows if r["meta"])
n_with_source = sum(1 for r in rows if (r["meta"].get("source_documents") or []))
n_attachments_only = n_have_meta - n_with_source
w("# 飞控 / Flight-Controller Projects (std)")
w("")
w(f"_主题定向索引oshwhub `origin=std` 且 `name` 或 `introduction` 含「飞控」字样_")
w("")
w(
f"**当前**{n_total} 个项目 · {n_with_source} 个有可编辑器源 · "
f"{n_attachments_only} 个仅附件upstream 没编辑器 session"
)
w("")
w(
"> 数据来源:`data/state/oshwhub_listing_full.jsonl` + 每项 `data/raw/oshwhub/<uuid>/metadata.json`"
)
w("> 排序likes 倒序")
w("")
rows.sort(
key=lambda r: -((r["listing"].get("count") or {}).get("like", 0))
)
w(
"| # | Title | Author | License | Source | Editor | "
"❤️ Likes | ⭐ Stars | 🍴 Forks | 👁 Views | Grade |"
)
w(
"|---|-------|--------|---------|--------|--------|"
"--------:|--------:|--------:|--------:|------:|"
)
for i, r in enumerate(rows, 1):
it = r["listing"]
m = r["meta"]
c = it.get("count") or {}
uuid = it["uuid"]
title = it.get("name") or "?"
url = f"https://oshwhub.com/{it['path']}"
author_user = (it.get("owner") or {}).get("username") or "?"
author_disp = (it.get("owner") or {}).get("nickname") or author_user
title_link = f"[{title}]({url})"
dir_link = f"[`{uuid[:8]}…`](../data/raw/oshwhub/{uuid}/)"
author_link = f"[{author_disp}](https://oshwhub.com/{author_user})"
license_str = m.get("license") or ""
# source completeness label
if not m:
source_label = "❌ 未抓"
elif m.get("source_documents"):
n_docs = len(m["source_documents"])
source_label = f"{n_docs} docs"
else:
source_label = "📎 attachments-only"
editor = m.get("editor_version") or ""
w(
f"| {i} | {title_link}<br>{dir_link} | {author_link} | {license_str} | "
f"{source_label} | {editor} | "
f"{c.get('like', 0):,} | {c.get('star', 0):,} | {c.get('fork', 0):,} | "
f"{c.get('views', 0):,} | {it.get('grade') or 0} |"
)
w("")
w("## License 分布")
w("")
lic_count: dict[str, int] = {}
for r in rows:
lic = r["meta"].get("license") or "(未抓)"
lic_count[lic] = lic_count.get(lic, 0) + 1
for lic, n in sorted(lic_count.items(), key=lambda x: -x[1]):
w(f"- `{lic}` — {n}")
w("")
w("## Editor 版本分布(有源工程的项目)")
w("")
ed_count: dict[str, int] = {}
for r in rows:
if r["meta"].get("source_documents"):
ed = r["meta"].get("editor_version") or "(unknown)"
ed_count[ed] = ed_count.get(ed, 0) + 1
for ed, n in sorted(ed_count.items(), key=lambda x: (-x[1], x[0])):
w(f"- `{ed}` — {n}")
w("")
w("## 重新生成")
w("")
w("```bash")
w("uv run python scripts/build_feikong_index.py")
w("```")
w("")
return "\n".join(out)
def main() -> int:
rows = collect()
md = render(rows)
OUT.parent.mkdir(parents=True, exist_ok=True)
OUT.write_text(md, encoding="utf-8")
print(f"wrote {OUT} ({len(rows)} projects)")
return 0
if __name__ == "__main__":
raise SystemExit(main())