feat: expandable previews, KaTeX rendering, variant speedup, batch import
- Analytics/Similar: expandable question preview with KaTeX rendering - KaTeXRenderer: auto markdown-to-HTML (code blocks, tables, bold), auto Unicode→LaTeX - ErrorBook: full question text rendering instead of truncated preview - Variant: remove hint/solution from generation (faster), async, fix null crash - Grading: add max_tokens limit - JSON parser: robust multi-layer repair + JSONDecodeError retry - Extraction prompt: enforce LaTeX notation for math - Upload: redirect to home instead of blank paper page - ProcessingBanner: add ETA time estimate + percentage - Batch import script + handoff guide for team Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -230,6 +230,7 @@ async def get_course_analytics(course_code: str):
|
||||
"source": source_label,
|
||||
"question_number": question["question_number"],
|
||||
"preview": question["question_text"][:220],
|
||||
"full_text": question["question_text"],
|
||||
"difficulty": question.get("difficulty"),
|
||||
"question_type": question_type,
|
||||
"year": paper.get("year"),
|
||||
|
||||
@@ -133,7 +133,7 @@ async def create_variant(question_id: str, user_id: str = Depends(get_current_us
|
||||
raise HTTPException(status_code=404, detail="Question not found")
|
||||
|
||||
question = result.data[0]
|
||||
variant_data = await asyncio.to_thread(generate_variant, question)
|
||||
variant_data = await generate_variant(question)
|
||||
variant_data["knowledge_reminder"] = question.get("knowledge_reminder", "")
|
||||
|
||||
saved = sb.table("question_variants").insert({
|
||||
|
||||
@@ -68,9 +68,7 @@ Return JSON:
|
||||
"question_text": "HTML formatted variant question",
|
||||
"question_type": "{question_type}",
|
||||
"options": [MC only, format {{"label":"A","text":"..."}}, ...] or null,
|
||||
"correct_answer": "Correct answer (plain text)",
|
||||
"ai_hint": "HTML formatted hint that guides thinking WITHOUT giving the answer",
|
||||
"solution": "HTML formatted complete step-by-step solution"
|
||||
"correct_answer": "Correct answer (plain text)"
|
||||
}}"""
|
||||
|
||||
|
||||
@@ -90,7 +88,7 @@ def ocr_photo(photo_bytes: bytes) -> str:
|
||||
]},
|
||||
],
|
||||
temperature=0,
|
||||
max_tokens=2000,
|
||||
max_tokens=1500,
|
||||
)
|
||||
return resp.choices[0].message.content or ""
|
||||
|
||||
@@ -114,13 +112,15 @@ def grade_answer(question: dict, student_answer: str) -> dict:
|
||||
)},
|
||||
],
|
||||
temperature=0.2,
|
||||
max_tokens=2048,
|
||||
response_format={"type": "json_object"},
|
||||
)
|
||||
return json.loads(resp.choices[0].message.content)
|
||||
|
||||
|
||||
def generate_variant(question: dict) -> dict:
|
||||
"""Gemini generates a variant question"""
|
||||
async def generate_variant(question: dict) -> dict:
|
||||
"""DeepSeek generates a variant question (async)"""
|
||||
import asyncio
|
||||
answer = (
|
||||
question.get("correct_option")
|
||||
or question.get("correct_answer")
|
||||
@@ -129,18 +129,20 @@ def generate_variant(question: dict) -> dict:
|
||||
)
|
||||
|
||||
ds = get_deepseek_client()
|
||||
resp = ds.chat.completions.create(
|
||||
prompt = VARIANT_PROMPT.format(
|
||||
question_type=question["question_type"],
|
||||
question_text=question["question_text"],
|
||||
topics=", ".join(question.get("topics", [])),
|
||||
difficulty=question.get("difficulty", "medium"),
|
||||
answer=answer,
|
||||
)
|
||||
|
||||
resp = await asyncio.to_thread(
|
||||
ds.chat.completions.create,
|
||||
model="deepseek-chat",
|
||||
messages=[
|
||||
{"role": "system", "content": VARIANT_PROMPT.format(
|
||||
question_type=question["question_type"],
|
||||
question_text=question["question_text"],
|
||||
topics=", ".join(question.get("topics", [])),
|
||||
difficulty=question.get("difficulty", "medium"),
|
||||
answer=answer,
|
||||
)},
|
||||
],
|
||||
messages=[{"role": "system", "content": prompt}],
|
||||
temperature=0.5,
|
||||
max_tokens=2048,
|
||||
response_format={"type": "json_object"},
|
||||
)
|
||||
return json.loads(resp.choices[0].message.content)
|
||||
|
||||
@@ -35,6 +35,8 @@ CRITICAL RULES for question_text:
|
||||
- For sub-questions (e.g. (a)(i)), copy the ENTIRE parent question setup (variable definitions, code blocks, problem description) into the question_text, then append the specific sub-question.
|
||||
- For Python/code questions: include ALL variable definitions and import statements verbatim, exactly as they appear in the exam, preserving multi-line arrays and data structures completely.
|
||||
- Never truncate code. If a variable is defined across multiple lines (e.g. a numpy array), include every line.
|
||||
- CRITICAL: ALL mathematical expressions, formulas, variables, and symbols MUST use LaTeX notation. Wrap inline math with $...$ and display math with $$...$$. NEVER use Unicode symbols like σ, μ, π, ², ≥, ≤, √, ∑, etc. Use $\sigma$, $\mu$, $\pi$, $^2$, $\geq$, $\leq$, $\sqrt{}$, $\sum$, etc. Every fraction should be $\frac{a}{b}$, every subscript $x_i$, every superscript $x^n$.
|
||||
- Code blocks must use markdown fenced code blocks (```python ... ```).
|
||||
|
||||
Output JSON format (strictly follow):
|
||||
{
|
||||
@@ -203,6 +205,8 @@ RETRYABLE_ERROR_MARKERS = (
|
||||
|
||||
|
||||
def is_retryable_error(exc: Exception) -> bool:
|
||||
if isinstance(exc, json.JSONDecodeError):
|
||||
return True # LLM returned bad JSON, retry may fix it
|
||||
message = str(exc).lower()
|
||||
return any(marker in message for marker in RETRYABLE_ERROR_MARKERS)
|
||||
|
||||
@@ -221,17 +225,51 @@ def pdf_to_images(pdf_bytes: bytes, dpi: int = 96) -> list[str]:
|
||||
|
||||
|
||||
def parse_json_response(text: str) -> dict:
|
||||
"""解析模型返回的 JSON,兼容 markdown 代码块包装"""
|
||||
"""解析模型返回的 JSON,兼容各种格式问题"""
|
||||
text = text.strip()
|
||||
# 去掉 ```json ... ``` 包装
|
||||
|
||||
# 1. 去掉 ```json ... ``` 包装
|
||||
if text.startswith("```"):
|
||||
lines = text.splitlines()
|
||||
text = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:])
|
||||
# 移除 JSON 字符串中的非法控制字符(0x00-0x1F 除了 \t \n \r)
|
||||
|
||||
# 2. 如果不以 { 开头,尝试找到第一个 {
|
||||
idx = text.find("{")
|
||||
if idx > 0:
|
||||
text = text[idx:]
|
||||
# 找到最后一个 } 截断尾部垃圾
|
||||
ridx = text.rfind("}")
|
||||
if ridx > 0:
|
||||
text = text[:ridx + 1]
|
||||
|
||||
# 3. 移除所有非法控制字符(0x00-0x1F 除了 \t \n \r)
|
||||
text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f]', '', text)
|
||||
# 修复模型返回的无效 JSON 转义序列:只修奇数个反斜杠后的非法字符
|
||||
text = re.sub(r'(?<!\\)((?:\\\\)*)\\([^"\\/bfnrtu])', r'\1\\\\\2', text)
|
||||
return json.loads(text)
|
||||
|
||||
# 4. 修复无效 JSON 转义:LaTeX 如 \sqrt, \sigma 等
|
||||
text = re.sub(r'(?<!\\)((?:\\\\)*)\\([^"\\/bfnrtu\n])', r'\1\\\\\2', text)
|
||||
|
||||
# 5. 尝试解析
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 6. 更激进的修复:移除所有控制字符包括 \t
|
||||
text = re.sub(r'[\x00-\x1f]', lambda m: ' ' if m.group() in '\t\n\r' else '', text)
|
||||
|
||||
# 7. 修复未终止的字符串:在行尾补引号
|
||||
text = re.sub(r'(?<!\\)"([^"]*)\n', r'"\1\\n"\n', text)
|
||||
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 8. 最后一搏:用 strict=False 解析
|
||||
try:
|
||||
return json.loads(text, strict=False)
|
||||
except json.JSONDecodeError:
|
||||
raise
|
||||
|
||||
|
||||
async def gemini_vision_json(
|
||||
|
||||
Reference in New Issue
Block a user