Files
Zhao 9c09944c96 feat: expandable previews, KaTeX rendering, variant speedup, batch import
- Analytics/Similar: expandable question preview with KaTeX rendering
- KaTeXRenderer: auto markdown-to-HTML (code blocks, tables, bold), auto Unicode→LaTeX
- ErrorBook: full question text rendering instead of truncated preview
- Variant: remove hint/solution from generation (faster), async, fix null crash
- Grading: add max_tokens limit
- JSON parser: robust multi-layer repair + JSONDecodeError retry
- Extraction prompt: enforce LaTeX notation for math
- Upload: redirect to home instead of blank paper page
- ProcessingBanner: add ETA time estimate + percentage
- Batch import script + handoff guide for team

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-24 22:41:57 +09:00

149 lines
5.0 KiB
Python

"""OCR, grading, and variant generation prompts"""
import json
import base64
from app.services.llm_clients import get_vision_client, get_deepseek_client
OCR_PROMPT = """You are an expert at recognizing handwritten answers. Analyze this photo of a student's handwritten answer and extract the text and mathematical formulas.
Requirements:
- Faithfully extract what the student wrote, do not modify or correct
- Use LaTeX format for math formulas (e.g. $x^2 + 1$)
- If there are multiple steps, list them in original order
- If some handwriting is unclear, mark with [unclear]
Return only the extracted text, no additional explanation."""
GRADING_PROMPT = """You are an expert academic grader. Grade the following student answer. ALL output must be in English.
Question info:
- Number: {question_number}
- Type: {question_type}
- Question: {question_text}
- Score: {score}
Reference answer / solution:
{reference_answer}
Student answer:
{student_answer}
Grade and return JSON:
{{
"is_correct": true/false,
"score_given": 0-{score},
"feedback": "<HTML> Step-by-step analysis of the student's answer, pointing out correct parts and errors, using KaTeX formulas </HTML>",
"error_at_step": null or the step number where errors begin (integer)
}}
Grading rules:
- MC / fill-blank: only correct if answer matches exactly
- Long questions: give partial credit for correct steps even if the final answer is wrong
- feedback in HTML format, supports KaTeX ($..$ inline, $$...$$ block)
- Mark errors with <div class="common-error">...</div>
- Identify exactly which step the error starts"""
VARIANT_PROMPT = """You are an expert exam question creator. Generate a similar but different variant question based on the original below. ALL output must be in English.
Original question info:
- Type: {question_type}
- Question: {question_text}
- Topics: {topics}
- Difficulty: {difficulty}
- Reference answer: {answer}
Requirements:
- Variant must test the same knowledge points at similar difficulty
- Data/scenario/wording must differ — don't just change numbers
- Must provide a complete correct answer
Format requirements (CRITICAL):
- All text in HTML format, absolutely NO markdown syntax
- Code: <pre><code class="language-xxx">...</code></pre>, NOT ```
- Math: $...$ (inline) or $$...$$ (block), KaTeX compatible
- Line breaks: <br>, paragraphs: <p>
Return JSON:
{{
"question_text": "HTML formatted variant question",
"question_type": "{question_type}",
"options": [MC only, format {{"label":"A","text":"..."}}, ...] or null,
"correct_answer": "Correct answer (plain text)"
}}"""
def ocr_photo(photo_bytes: bytes) -> str:
"""Gemini Vision OCR for handwritten answers"""
client = get_vision_client()
b64 = base64.b64encode(photo_bytes).decode("utf-8")
resp = client.chat.completions.create(
model="gemini-2.5-flash",
messages=[
{"role": "system", "content": OCR_PROMPT},
{"role": "user", "content": [
{"type": "image_url", "image_url": {
"url": f"data:image/jpeg;base64,{b64}",
}},
]},
],
temperature=0,
max_tokens=1500,
)
return resp.choices[0].message.content or ""
def grade_answer(question: dict, student_answer: str) -> dict:
"""Qwen grades student answer"""
reference = question.get("raw_answer_text") or question.get("solution") or "No reference answer"
score = question.get("score") or "unknown"
ds = get_deepseek_client()
resp = ds.chat.completions.create(
model="deepseek-chat",
messages=[
{"role": "system", "content": GRADING_PROMPT.format(
question_number=question["question_number"],
question_type=question["question_type"],
question_text=question["question_text"],
score=score,
reference_answer=reference,
student_answer=student_answer,
)},
],
temperature=0.2,
max_tokens=2048,
response_format={"type": "json_object"},
)
return json.loads(resp.choices[0].message.content)
async def generate_variant(question: dict) -> dict:
"""DeepSeek generates a variant question (async)"""
import asyncio
answer = (
question.get("correct_option")
or question.get("correct_answer")
or question.get("raw_answer_text")
or "N/A"
)
ds = get_deepseek_client()
prompt = VARIANT_PROMPT.format(
question_type=question["question_type"],
question_text=question["question_text"],
topics=", ".join(question.get("topics", [])),
difficulty=question.get("difficulty", "medium"),
answer=answer,
)
resp = await asyncio.to_thread(
ds.chat.completions.create,
model="deepseek-chat",
messages=[{"role": "system", "content": prompt}],
temperature=0.5,
max_tokens=2048,
response_format={"type": "json_object"},
)
return json.loads(resp.choices[0].message.content)