""" 重新生成所有题目的 AI trio,子题带父题上下文。 用法: python backfill_ai_trio_with_context.py [--paper-id ] [--course ] """ import asyncio import io import json import re import sys import time import argparse from contextlib import redirect_stdout from app.services.supabase_client import get_supabase from app.services.llm_clients import get_deepseek_client def extract_code_lines(text: str) -> str: lines = (text or "").splitlines() result = [] in_code = False open_brackets = 0 CODE_START = re.compile(r"^\s*(import |from \w|[A-Za-z_]\w*\s*=|print\()") for line in lines: stripped = line.strip() if in_code and open_brackets > 0: result.append(stripped) open_brackets += stripped.count("(") + stripped.count("[") + stripped.count("{") open_brackets -= stripped.count(")") + stripped.count("]") + stripped.count("}") continue if CODE_START.match(line): in_code = True result.append(stripped) open_brackets += stripped.count("(") + stripped.count("[") + stripped.count("{") open_brackets -= stripped.count(")") + stripped.count("]") + stripped.count("}") continue in_code = False return "\n".join(result) def try_exec_python(code: str, shared_ns: dict) -> str | None: buf = io.StringIO() try: with redirect_stdout(buf): exec(code, shared_ns) # noqa: S102 output = buf.getvalue().strip() return output if output else None except Exception: return None BATCH_ANALYSIS_PROMPT = """You are an expert academic answer analyst. Generate three study sections for each question below. ALL output must be in English. For every question, return: - knowledge_reminder: concise prerequisite bullets in HTML - ai_hint: a helpful hint in HTML without revealing the final answer - solution: a complete step-by-step solution in HTML Return JSON in this exact format: {{ "analyses": [ {{ "question_number": "1a", "knowledge_reminder": "...", "ai_hint": "...", "solution": "..." }} ] }} Rules: - Return one item for every provided question_number - All text must be in English - HTML only, KaTeX compatible (block $$ ... $$ inline $ ... $) - For MC questions, explain why the correct option is right and why others are wrong - For long questions, show a complete derivation or reasoning chain - Use
    or numbered steps in solution when appropriate - Mark common mistakes with
    ...
    - CRITICAL: When a question_text contains "[Context from parent question X]" followed by "[Sub-question Y]", the parent section is background context only. You MUST solve ONLY the specific sub-question labeled [Sub-question Y]. Do NOT solve other sub-questions listed in the parent context. Give one precise answer for that single sub-question only. Questions: {questions_payload} """ def chunked(lst, size): return [lst[i:i+size] for i in range(0, len(lst), size)] async def deepseek_batch(batch: list[dict]) -> list[dict]: client = get_deepseek_client() for attempt in range(5): try: resp = client.chat.completions.create( model="deepseek-chat", messages=[{ "role": "system", "content": BATCH_ANALYSIS_PROMPT.format( questions_payload=json.dumps(batch, ensure_ascii=False) ) }], temperature=0.3, max_tokens=8192, response_format={"type": "json_object"}, ) raw = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f]', '', resp.choices[0].message.content) raw = re.sub(r'(?