Initial commit: PastPaper Master full stack

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-21 12:15:35 +07:00
commit 7a09167261
105 changed files with 24799 additions and 0 deletions
--- a/backend/app/services/grader.py
+++ b/backend/app/services/grader.py
@@ -0,0 +1,146 @@
+"""OCR, grading, and variant generation prompts"""
+
+import json
+import base64
+from app.services.llm_clients import get_vision_client, get_deepseek_client
+
+OCR_PROMPT = """You are an expert at recognizing handwritten answers. Analyze this photo of a student's handwritten answer and extract the text and mathematical formulas.
+
+Requirements:
+- Faithfully extract what the student wrote, do not modify or correct
+- Use LaTeX format for math formulas (e.g. $x^2 + 1$)
+- If there are multiple steps, list them in original order
+- If some handwriting is unclear, mark with [unclear]
+
+Return only the extracted text, no additional explanation."""
+
+GRADING_PROMPT = """You are an expert academic grader. Grade the following student answer. ALL output must be in English.
+
+Question info:
+- Number: {question_number}
+- Type: {question_type}
+- Question: {question_text}
+- Score: {score}
+
+Reference answer / solution:
+{reference_answer}
+
+Student answer:
+{student_answer}
+
+Grade and return JSON:
+{{
+  "is_correct": true/false,
+  "score_given": 0-{score},
+  "feedback": "<HTML> Step-by-step analysis of the student's answer, pointing out correct parts and errors, using KaTeX formulas </HTML>",
+  "error_at_step": null or the step number where errors begin (integer)
+}}
+
+Grading rules:
+- MC / fill-blank: only correct if answer matches exactly
+- Long questions: give partial credit for correct steps even if the final answer is wrong
+- feedback in HTML format, supports KaTeX ($..$ inline, $$...$$ block)
+- Mark errors with <div class="common-error">...</div>
+- Identify exactly which step the error starts"""
+
+VARIANT_PROMPT = """You are an expert exam question creator. Generate a similar but different variant question based on the original below. ALL output must be in English.
+
+Original question info:
+- Type: {question_type}
+- Question: {question_text}
+- Topics: {topics}
+- Difficulty: {difficulty}
+- Reference answer: {answer}
+
+Requirements:
+- Variant must test the same knowledge points at similar difficulty
+- Data/scenario/wording must differ — don't just change numbers
+- Must provide a complete correct answer
+
+Format requirements (CRITICAL):
+- All text in HTML format, absolutely NO markdown syntax
+- Code: <pre><code class="language-xxx">...</code></pre>, NOT ```
+- Math: $...$ (inline) or $$...$$ (block), KaTeX compatible
+- Line breaks: <br>, paragraphs: <p>
+
+Return JSON:
+{{
+  "question_text": "HTML formatted variant question",
+  "question_type": "{question_type}",
+  "options": [MC only, format {{"label":"A","text":"..."}}, ...] or null,
+  "correct_answer": "Correct answer (plain text)",
+  "ai_hint": "HTML formatted hint that guides thinking WITHOUT giving the answer",
+  "solution": "HTML formatted complete step-by-step solution"
+}}"""
+
+
+def ocr_photo(photo_bytes: bytes) -> str:
+    """Gemini Vision OCR for handwritten answers"""
+    client = get_vision_client()
+    b64 = base64.b64encode(photo_bytes).decode("utf-8")
+
+    resp = client.chat.completions.create(
+        model="gemini-2.5-flash",
+        messages=[
+            {"role": "system", "content": OCR_PROMPT},
+            {"role": "user", "content": [
+                {"type": "image_url", "image_url": {
+                    "url": f"data:image/jpeg;base64,{b64}",
+                }},
+            ]},
+        ],
+        temperature=0,
+        max_tokens=2000,
+    )
+    return resp.choices[0].message.content or ""
+
+
+def grade_answer(question: dict, student_answer: str) -> dict:
+    """Qwen grades student answer"""
+    reference = question.get("raw_answer_text") or question.get("solution") or "No reference answer"
+    score = question.get("score") or "unknown"
+
+    ds = get_deepseek_client()
+    resp = ds.chat.completions.create(
+        model="deepseek-chat",
+        messages=[
+            {"role": "system", "content": GRADING_PROMPT.format(
+                question_number=question["question_number"],
+                question_type=question["question_type"],
+                question_text=question["question_text"],
+                score=score,
+                reference_answer=reference,
+                student_answer=student_answer,
+            )},
+        ],
+        temperature=0.2,
+        response_format={"type": "json_object"},
+    )
+    return json.loads(resp.choices[0].message.content)
+
+
+def generate_variant(question: dict) -> dict:
+    """Gemini generates a variant question"""
+    answer = (
+        question.get("correct_option")
+        or question.get("correct_answer")
+        or question.get("raw_answer_text")
+        or "N/A"
+    )
+
+    ds = get_deepseek_client()
+    resp = ds.chat.completions.create(
+        model="deepseek-chat",
+        messages=[
+            {"role": "system", "content": VARIANT_PROMPT.format(
+                question_type=question["question_type"],
+                question_text=question["question_text"],
+                topics=", ".join(question.get("topics", [])),
+                difficulty=question.get("difficulty", "medium"),
+                answer=answer,
+            )},
+        ],
+        temperature=0.5,
+        response_format={"type": "json_object"},
+    )
+    return json.loads(resp.choices[0].message.content)