From 9c09944c96f6f2c23ab40ba50b694cc3c3b6e035 Mon Sep 17 00:00:00 2001 From: Zhao Date: Fri, 24 Apr 2026 22:41:57 +0900 Subject: [PATCH] feat: expandable previews, KaTeX rendering, variant speedup, batch import MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Analytics/Similar: expandable question preview with KaTeX rendering - KaTeXRenderer: auto markdown-to-HTML (code blocks, tables, bold), auto Unicode→LaTeX - ErrorBook: full question text rendering instead of truncated preview - Variant: remove hint/solution from generation (faster), async, fix null crash - Grading: add max_tokens limit - JSON parser: robust multi-layer repair + JSONDecodeError retry - Extraction prompt: enforce LaTeX notation for math - Upload: redirect to home instead of blank paper page - ProcessingBanner: add ETA time estimate + percentage - Batch import script + handoff guide for team Co-Authored-By: Claude Opus 4.6 (1M context) --- backend/BATCH_IMPORT_GUIDE.md | 220 ++++++++++++ backend/app/routers/analytics.py | 1 + backend/app/routers/questions.py | 2 +- backend/app/services/grader.py | 34 +- backend/app/services/paper_processor.py | 50 ++- backend/batch_import.py | 323 ++++++++++++++++++ .../components/layout/ProcessingBanner.tsx | 46 ++- .../src/components/shared/KaTeXRenderer.tsx | 121 ++++++- frontend/src/components/upload/UploadForm.tsx | 4 +- .../workbench/SimilarHistoryPanel.tsx | 149 +++++--- .../components/workbench/VariantDetail.tsx | 8 +- .../src/components/workbench/VariantModal.tsx | 28 +- frontend/src/pages/AnalyticsPage.tsx | 114 +++++-- frontend/src/pages/ErrorBookPage.tsx | 10 +- frontend/src/pages/WorkbenchPage.tsx | 6 +- frontend/src/types/api.ts | 1 + 16 files changed, 990 insertions(+), 127 deletions(-) create mode 100644 backend/BATCH_IMPORT_GUIDE.md create mode 100644 backend/batch_import.py diff --git a/backend/BATCH_IMPORT_GUIDE.md b/backend/BATCH_IMPORT_GUIDE.md new file mode 100644 index 0000000..3992cb7 --- /dev/null +++ b/backend/BATCH_IMPORT_GUIDE.md @@ -0,0 +1,220 @@ +# 批量导入试卷指南 + +## 概述 + +`batch_import.py` 用于批量向 PastPaper Master 数据库填充试卷。它会自动完成: +1. 创建 DB 记录 +2. 上传 PDF 到 Supabase Storage +3. Gemini Vision 提取题目结构 +4. DeepSeek 生成 AI 解题三件套(knowledge reminder + hint + solution) + +## 环境准备 + +### 1. 服务器信息 + +| 项目 | 值 | +|------|-----| +| 生产服务器 | `129.226.210.66` | +| SSH | `ssh -i ~/.ssh/id_ed25519 root@129.226.210.66` | +| 后端容器 | `pastpaper-backend-1` | +| 项目路径 | `/opt/pastpaper/` | +| 前端静态文件 | `/opt/1panel/www/pastpaper/` | + +### 2. 在本地运行(推荐) + +```bash +cd /path/to/PastPaper\ Master/backend + +# 确保 .env 在项目根目录(../. env) +# 需要的 key: SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY, GOOGLE_GEMINI_API_KEY, DEEPSEEK_API_KEY + +# 激活虚拟环境 +source .venv/bin/activate + +# 或用 venv 的 python +.venv/bin/python batch_import.py ... +``` + +### 3. 在服务器 Docker 容器里运行 + +```bash +# 先把脚本和试卷文件传到服务器 +scp -i ~/.ssh/id_ed25519 batch_import.py root@129.226.210.66:/opt/pastpaper/backend/ +scp -i ~/.ssh/id_ed25519 -r /path/to/papers root@129.226.210.66:/opt/pastpaper/papers_to_import/ + +# 进容器运行 +ssh -i ~/.ssh/id_ed25519 root@129.226.210.66 +docker exec -it pastpaper-backend-1 bash +cd /app +python batch_import.py /path/to/papers --batch +``` + +## 使用方法 + +### 单份导入 + +```bash +python batch_import.py paper.pdf \ + --course COMP2211 \ + --year 2024 \ + --term spring \ + --exam midterm + +# 带答案 +python batch_import.py paper.pdf \ + --answer answer.pdf \ + --course COMP2211 \ + --year 2024 \ + --term spring \ + --exam midterm +``` + +### 批量导入 + +#### 目录结构要求 + +``` +papers_to_import/ +├── COMP2211/ +│ ├── 2024_spring_midterm.pdf +│ ├── 2024_spring_midterm_answer.pdf <- 自动匹配 +│ ├── 2024_fall_final.pdf +│ └── 2023_spring_midterm.pdf +├── COMP2011/ +│ ├── 2024_spring_midterm.pdf +│ └── 2024_fall_final.pdf +├── MATH1014/ +│ └── 2024_spring_midterm.pdf +└── FINA2303/ + └── 2023_fall_midterm.pdf +``` + +- 一级目录名 = 课程代码(自动转大写) +- 文件名格式: `{year}_{term}_{examtype}.pdf` +- 答案文件: `{year}_{term}_{examtype}_answer.pdf`(可选,放同一目录,自动匹配) +- term: `spring` / `fall` / `summer` +- examtype: `midterm` / `final` / `quiz` + +#### 命令 + +```bash +# 先试运行看看会导入什么 +python batch_import.py papers_to_import/ --batch --dry-run + +# 正式导入(串行,最安全) +python batch_import.py papers_to_import/ --batch + +# 并发导入(2个同时处理,更快但 API 可能限流) +python batch_import.py papers_to_import/ --batch --concurrency 2 +``` + +### 自动查重 + +脚本会自动跳过已存在的试卷(相同 course_code + year + term + exam_type 且 status 为 ready 或 processing)。 + +## 处理时间估计 + +单份试卷处理时间取决于页数和题目数: + +| 阶段 | 耗时 | +|------|------| +| PDF 渲染 | 2-5s | +| Vision 提取(每 8 页一批) | 30-60s/批 | +| 答案匹配 | 20-40s | +| AI trio 生成(每 3 题一批) | 15-25s/批 | +| **总计(30 题试卷)** | **~3-5 min** | +| **总计(40+ 题试卷)** | **~5-8 min** | + +建议: 并发不要超过 2,否则 Gemini API 可能限流(429 错误,脚本会自动重试但会更慢)。 + +## API 费用 + +| 模型 | 用途 | 费用 | +|------|------|------| +| Gemini 2.5 Flash | Vision 提取 + 答案匹配 | 免费额度内通常够 | +| DeepSeek V3 | AI trio 生成 | ~$0.5-1.5/份试卷 | + +监控费用: +- Gemini: https://aistudio.google.com (API keys 页面看用量) +- DeepSeek: https://platform.deepseek.com (Usage 页面) + +## 常见问题 + +### Q: 处理失败怎么办? + +试卷会标记为 `status=error`。可以删掉重来: +```python +# 在 backend/ 目录下 +.venv/bin/python -c " +import sys; sys.path.insert(0, '.') +from dotenv import load_dotenv; load_dotenv('../.env') +from app.services.supabase_client import get_supabase +sb = get_supabase() +errors = sb.table('papers').select('id, course_code').eq('status', 'error').execute().data +for p in errors: + sb.table('paper_questions').delete().eq('paper_id', p['id']).execute() + sb.table('papers').delete().eq('id', p['id']).execute() + print('Deleted', p['course_code']) +" +``` + +### Q: JSON 解析错误? + +已内置多层 JSON 修复 + 自动重试(最多 6 次)。如果还是失败,通常是因为试卷内容太复杂(大量 LaTeX + 代码),可以尝试: +1. 删掉 error 记录重新导入 +2. 如果反复失败,可能需要拆分试卷 PDF + +### Q: 如何只重新生成 AI trio(题目已提取)? + +```python +# 清空 solution 字段,重启后端会自动续传 +.venv/bin/python -c " +import sys; sys.path.insert(0, '.') +from dotenv import load_dotenv; load_dotenv('../.env') +from app.services.supabase_client import get_supabase +sb = get_supabase() +PAPER_ID = 'xxxxxxxx-xxxx-...' # 替换 +qs = sb.table('paper_questions').select('id').eq('paper_id', PAPER_ID).execute().data +for q in qs: + sb.table('paper_questions').update({'solution': None, 'ai_hint': None, 'knowledge_reminder': None}).eq('id', q['id']).execute() +sb.table('papers').update({'status': 'processing'}).eq('id', PAPER_ID).execute() +print(f'Reset {len(qs)} questions, restart backend to regenerate') +" + +# 然后重启后端 +ssh -i ~/.ssh/id_ed25519 root@129.226.210.66 "sudo docker restart pastpaper-backend-1" +``` + +### Q: 如何部署后端代码改动? + +```bash +# 上传改动的文件 +scp -i ~/.ssh/id_ed25519 app/services/paper_processor.py root@129.226.210.66:/opt/pastpaper/backend/app/services/ + +# 重建容器 +ssh -i ~/.ssh/id_ed25519 root@129.226.210.66 "cd /opt/pastpaper && sudo docker compose up -d --build backend" +``` + +### Q: 如何部署前端改动? + +```bash +cd frontend +npm run build +cp public/favicon.jpg dist/ +ssh -i ~/.ssh/id_ed25519 root@129.226.210.66 "rm -rf /opt/1panel/www/pastpaper/assets" +scp -i ~/.ssh/id_ed25519 dist/index.html dist/favicon.jpg root@129.226.210.66:/opt/1panel/www/pastpaper/ +scp -i ~/.ssh/id_ed25519 -r dist/assets root@129.226.210.66:/opt/1panel/www/pastpaper/ +``` + +## 试卷来源 + +`pastpaper-scraper/papers/` 目录下有从 HKUST 爬取的历年试卷 PDF,按课程分目录。可以从中挑选热门课程导入: + +优先导入的课程(用户量大): +- COMP2011, COMP2211, COMP2711H +- MATH1013, MATH1014, MATH2023 +- PHYS1112 +- ELEC2100 +- FINA2303 + +将文件按上述目录结构组织后运行 `--batch` 即可。 diff --git a/backend/app/routers/analytics.py b/backend/app/routers/analytics.py index 4771c04..1fa5736 100644 --- a/backend/app/routers/analytics.py +++ b/backend/app/routers/analytics.py @@ -230,6 +230,7 @@ async def get_course_analytics(course_code: str): "source": source_label, "question_number": question["question_number"], "preview": question["question_text"][:220], + "full_text": question["question_text"], "difficulty": question.get("difficulty"), "question_type": question_type, "year": paper.get("year"), diff --git a/backend/app/routers/questions.py b/backend/app/routers/questions.py index b61d288..820bb4c 100644 --- a/backend/app/routers/questions.py +++ b/backend/app/routers/questions.py @@ -133,7 +133,7 @@ async def create_variant(question_id: str, user_id: str = Depends(get_current_us raise HTTPException(status_code=404, detail="Question not found") question = result.data[0] - variant_data = await asyncio.to_thread(generate_variant, question) + variant_data = await generate_variant(question) variant_data["knowledge_reminder"] = question.get("knowledge_reminder", "") saved = sb.table("question_variants").insert({ diff --git a/backend/app/services/grader.py b/backend/app/services/grader.py index 68e58f4..d2f9740 100644 --- a/backend/app/services/grader.py +++ b/backend/app/services/grader.py @@ -68,9 +68,7 @@ Return JSON: "question_text": "HTML formatted variant question", "question_type": "{question_type}", "options": [MC only, format {{"label":"A","text":"..."}}, ...] or null, - "correct_answer": "Correct answer (plain text)", - "ai_hint": "HTML formatted hint that guides thinking WITHOUT giving the answer", - "solution": "HTML formatted complete step-by-step solution" + "correct_answer": "Correct answer (plain text)" }}""" @@ -90,7 +88,7 @@ def ocr_photo(photo_bytes: bytes) -> str: ]}, ], temperature=0, - max_tokens=2000, + max_tokens=1500, ) return resp.choices[0].message.content or "" @@ -114,13 +112,15 @@ def grade_answer(question: dict, student_answer: str) -> dict: )}, ], temperature=0.2, + max_tokens=2048, response_format={"type": "json_object"}, ) return json.loads(resp.choices[0].message.content) -def generate_variant(question: dict) -> dict: - """Gemini generates a variant question""" +async def generate_variant(question: dict) -> dict: + """DeepSeek generates a variant question (async)""" + import asyncio answer = ( question.get("correct_option") or question.get("correct_answer") @@ -129,18 +129,20 @@ def generate_variant(question: dict) -> dict: ) ds = get_deepseek_client() - resp = ds.chat.completions.create( + prompt = VARIANT_PROMPT.format( + question_type=question["question_type"], + question_text=question["question_text"], + topics=", ".join(question.get("topics", [])), + difficulty=question.get("difficulty", "medium"), + answer=answer, + ) + + resp = await asyncio.to_thread( + ds.chat.completions.create, model="deepseek-chat", - messages=[ - {"role": "system", "content": VARIANT_PROMPT.format( - question_type=question["question_type"], - question_text=question["question_text"], - topics=", ".join(question.get("topics", [])), - difficulty=question.get("difficulty", "medium"), - answer=answer, - )}, - ], + messages=[{"role": "system", "content": prompt}], temperature=0.5, + max_tokens=2048, response_format={"type": "json_object"}, ) return json.loads(resp.choices[0].message.content) diff --git a/backend/app/services/paper_processor.py b/backend/app/services/paper_processor.py index aed6397..5078290 100644 --- a/backend/app/services/paper_processor.py +++ b/backend/app/services/paper_processor.py @@ -35,6 +35,8 @@ CRITICAL RULES for question_text: - For sub-questions (e.g. (a)(i)), copy the ENTIRE parent question setup (variable definitions, code blocks, problem description) into the question_text, then append the specific sub-question. - For Python/code questions: include ALL variable definitions and import statements verbatim, exactly as they appear in the exam, preserving multi-line arrays and data structures completely. - Never truncate code. If a variable is defined across multiple lines (e.g. a numpy array), include every line. +- CRITICAL: ALL mathematical expressions, formulas, variables, and symbols MUST use LaTeX notation. Wrap inline math with $...$ and display math with $$...$$. NEVER use Unicode symbols like σ, μ, π, ², ≥, ≤, √, ∑, etc. Use $\sigma$, $\mu$, $\pi$, $^2$, $\geq$, $\leq$, $\sqrt{}$, $\sum$, etc. Every fraction should be $\frac{a}{b}$, every subscript $x_i$, every superscript $x^n$. +- Code blocks must use markdown fenced code blocks (```python ... ```). Output JSON format (strictly follow): { @@ -203,6 +205,8 @@ RETRYABLE_ERROR_MARKERS = ( def is_retryable_error(exc: Exception) -> bool: + if isinstance(exc, json.JSONDecodeError): + return True # LLM returned bad JSON, retry may fix it message = str(exc).lower() return any(marker in message for marker in RETRYABLE_ERROR_MARKERS) @@ -221,17 +225,51 @@ def pdf_to_images(pdf_bytes: bytes, dpi: int = 96) -> list[str]: def parse_json_response(text: str) -> dict: - """解析模型返回的 JSON,兼容 markdown 代码块包装""" + """解析模型返回的 JSON,兼容各种格式问题""" text = text.strip() - # 去掉 ```json ... ``` 包装 + + # 1. 去掉 ```json ... ``` 包装 if text.startswith("```"): lines = text.splitlines() text = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:]) - # 移除 JSON 字符串中的非法控制字符(0x00-0x1F 除了 \t \n \r) + + # 2. 如果不以 { 开头,尝试找到第一个 { + idx = text.find("{") + if idx > 0: + text = text[idx:] + # 找到最后一个 } 截断尾部垃圾 + ridx = text.rfind("}") + if ridx > 0: + text = text[:ridx + 1] + + # 3. 移除所有非法控制字符(0x00-0x1F 除了 \t \n \r) text = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f]', '', text) - # 修复模型返回的无效 JSON 转义序列:只修奇数个反斜杠后的非法字符 - text = re.sub(r'(? dict | None: + """ + 从文件名解析元数据。支持格式: + - 2024_spring_midterm.pdf + - 2024-fall-final.pdf + - 2024s_mid.pdf + - (COMP2211)[2024](s)midterm~xxx.pdf (scraper 格式) + """ + base = Path(filename).stem.lower() + + # 去掉 _answer 后缀 + if base.endswith("_answer") or base.endswith("_ans") or base.endswith("_solution"): + return None # 这是答案文件,不单独导入 + + result = {} + + # Year: 4位数字 + year_match = re.search(r'(20[1-2]\d)', base) + if year_match: + result["year"] = int(year_match.group(1)) + + # Term + if re.search(r'spring|spr|\(s\)|_s_', base): + result["term"] = "spring" + elif re.search(r'fall|aut|\(f\)|_f_', base): + result["term"] = "fall" + elif re.search(r'summer|sum', base): + result["term"] = "summer" + + # Exam type + if re.search(r'mid', base): + result["exam_type"] = "midterm" + elif re.search(r'final|fin', base): + result["exam_type"] = "final" + elif re.search(r'quiz', base): + result["exam_type"] = "quiz" + + if "year" in result and "term" in result and "exam_type" in result: + return result + return None + + +def find_answer_file(paper_path: Path) -> Path | None: + """查找对应的答案文件""" + stem = paper_path.stem + parent = paper_path.parent + for suffix in ["_answer", "_ans", "_solution"]: + candidate = parent / f"{stem}{suffix}.pdf" + if candidate.exists(): + return candidate + return None + + +def scan_directory(dir_path: Path) -> list[dict]: + """ + 扫描目录,返回待导入的试卷列表。 + 期望结构: dir_path/COURSE_CODE/year_term_examtype.pdf + """ + items = [] + for course_dir in sorted(dir_path.iterdir()): + if not course_dir.is_dir(): + continue + course_code = course_dir.name.upper() + + for pdf in sorted(course_dir.glob("*.pdf")): + meta = parse_filename(pdf.name) + if meta is None: + continue + + answer_file = find_answer_file(pdf) + items.append({ + "paper_path": pdf, + "answer_path": answer_file, + "course_code": course_code, + **meta, + }) + return items + + +def check_duplicate(sb, course_code: str, year: int, term: str, exam_type: str) -> bool: + """检查是否已存在相同试卷""" + existing = ( + sb.table("papers") + .select("id") + .eq("course_code", course_code) + .eq("year", year) + .eq("term", term) + .eq("exam_type", exam_type) + .in_("status", ["ready", "processing"]) + .execute() + .data + ) + return len(existing) > 0 + + +async def import_single( + paper_path: Path, + answer_path: Path | None, + course_code: str, + year: int, + term: str, + exam_type: str, + skip_duplicates: bool = True, +) -> str | None: + """导入单份试卷,返回 paper_id 或 None(跳过)""" + sb = get_supabase() + + # 查重 + if skip_duplicates and check_duplicate(sb, course_code, year, term, exam_type): + print(f" SKIP (duplicate): {course_code} {year} {term} {exam_type}") + return None + + # 读文件 + paper_bytes = paper_path.read_bytes() + answer_bytes = answer_path.read_bytes() if answer_path else None + + # 创建 DB 记录 + record = sb.table("papers").insert({ + "user_id": BATCH_USER_ID, + "course_code": course_code, + "year": year, + "term": term, + "exam_type": exam_type, + "paper_file_url": "", + "answer_file_url": None, + "status": "processing", + }).execute() + paper_id = record.data[0]["id"] + + # 上传到 Supabase Storage + storage_path = f"{course_code}/{year}_{term}_{exam_type}" + try: + sb.storage.from_("papers").upload( + f"{storage_path}/paper.pdf", paper_bytes, + file_options={"content-type": "application/pdf", "upsert": "true"}, + ) + paper_url = sb.storage.from_("papers").get_public_url(f"{storage_path}/paper.pdf") + update = {"paper_file_url": paper_url} + + if answer_bytes: + sb.storage.from_("papers").upload( + f"{storage_path}/answer.pdf", answer_bytes, + file_options={"content-type": "application/pdf", "upsert": "true"}, + ) + update["answer_file_url"] = sb.storage.from_("papers").get_public_url(f"{storage_path}/answer.pdf") + + sb.table("papers").update(update).eq("id", paper_id).execute() + except Exception as e: + print(f" WARNING: Storage upload failed: {e}") + + # 处理试卷(Vision 提取 + AI trio) + print(f" Processing {course_code} {year} {term} {exam_type} ...") + t0 = time.time() + try: + await process_paper(paper_id, paper_bytes, answer_bytes) + elapsed = time.time() - t0 + print(f" DONE in {elapsed:.0f}s -> {paper_id[:8]}") + except Exception as e: + elapsed = time.time() - t0 + print(f" ERROR after {elapsed:.0f}s: {e}") + sb.table("papers").update({"status": "error", "processing_step": str(e)[:200]}).eq("id", paper_id).execute() + + return paper_id + + +async def batch_import(dir_path: Path, concurrency: int = 1, dry_run: bool = False): + """批量导入目录下所有试卷""" + items = scan_directory(dir_path) + + if not items: + print(f"No papers found in {dir_path}") + print("Expected structure: dir/COURSE_CODE/year_term_examtype.pdf") + return + + print(f"Found {len(items)} papers to import:\n") + for item in items: + ans_label = f" + answer" if item["answer_path"] else "" + print(f" {item['course_code']} {item['year']} {item['term']} {item['exam_type']}{ans_label}") + print(f" <- {item['paper_path']}") + + if dry_run: + print(f"\n[DRY RUN] Would import {len(items)} papers. Exiting.") + return + + print(f"\nStarting import (concurrency={concurrency})...\n") + + semaphore = asyncio.Semaphore(concurrency) + results = {"ok": 0, "skip": 0, "error": 0} + + async def process_one(item): + async with semaphore: + try: + pid = await import_single( + paper_path=item["paper_path"], + answer_path=item["answer_path"], + course_code=item["course_code"], + year=item["year"], + term=item["term"], + exam_type=item["exam_type"], + ) + if pid: + results["ok"] += 1 + else: + results["skip"] += 1 + except Exception as e: + results["error"] += 1 + print(f" FATAL: {item['course_code']} {item['year']} - {e}") + + # 串行或并发处理 + if concurrency == 1: + for item in items: + await process_one(item) + else: + await asyncio.gather(*[process_one(item) for item in items]) + + print(f"\n{'='*50}") + print(f"Import complete: {results['ok']} success, {results['skip']} skipped, {results['error']} errors") + + +def main(): + parser = argparse.ArgumentParser(description="Batch import papers to PastPaper Master") + parser.add_argument("path", help="Path to PDF file or directory (with --batch)") + parser.add_argument("--answer", help="Path to answer PDF (single file mode)") + parser.add_argument("--course", help="Course code (e.g. COMP2211)") + parser.add_argument("--year", type=int, help="Year (e.g. 2024)") + parser.add_argument("--term", choices=["spring", "summer", "fall"], help="Term") + parser.add_argument("--exam", choices=["midterm", "final", "quiz"], help="Exam type") + parser.add_argument("--batch", action="store_true", help="Batch import from directory") + parser.add_argument("--concurrency", type=int, default=1, help="Max concurrent imports (default: 1)") + parser.add_argument("--dry-run", action="store_true", help="Print what would be imported without doing it") + + args = parser.parse_args() + path = Path(args.path) + + if args.batch: + if not path.is_dir(): + print(f"Error: {path} is not a directory") + sys.exit(1) + asyncio.run(batch_import(path, concurrency=args.concurrency, dry_run=args.dry_run)) + else: + # Single file mode + if not path.is_file(): + print(f"Error: {path} is not a file") + sys.exit(1) + if not all([args.course, args.year, args.term, args.exam]): + print("Error: --course, --year, --term, --exam are required for single file import") + sys.exit(1) + + answer_path = Path(args.answer) if args.answer else None + result = asyncio.run(import_single( + paper_path=path, + answer_path=answer_path, + course_code=args.course.upper(), + year=args.year, + term=args.term, + exam_type=args.exam, + )) + if result: + print(f"\nPaper ID: {result}") + + +if __name__ == "__main__": + main() diff --git a/frontend/src/components/layout/ProcessingBanner.tsx b/frontend/src/components/layout/ProcessingBanner.tsx index f7bbe7c..db44db5 100644 --- a/frontend/src/components/layout/ProcessingBanner.tsx +++ b/frontend/src/components/layout/ProcessingBanner.tsx @@ -121,10 +121,34 @@ export default function ProcessingBanner() { {expanded && (
{processing.map((p) => { - const step = p.processing_step; + const step = p.processing_step || ""; const progress = p.processing_progress || 0; - const total = p.processing_total || 0; - const pct = total > 0 ? Math.round((progress / total) * 100) : 0; + const totalSteps = p.processing_total || 0; + const pct = totalSteps > 0 ? Math.round((progress / totalSteps) * 100) : 0; + + // Estimate remaining time based on step + let eta = ""; + if (step.includes("Rendering")) { + eta = "~2-3 min"; + } else if (step.includes("Reading") || step.includes("Extracting")) { + eta = "~3-5 min"; + } else if (step.includes("Matching answer")) { + eta = "~1-2 min"; + } else if (step.includes("Generating solution") || step.includes("Generating AI")) { + if (totalSteps > 0 && progress > 0) { + const remaining = totalSteps - progress; + const secsPerBatch = 25; + const batchSize = 3; + const totalSecs = Math.ceil(remaining / batchSize) * secsPerBatch; + if (totalSecs < 60) eta = `~${totalSecs}s`; + else eta = `~${Math.ceil(totalSecs / 60)} min`; + } else { + eta = "~5-8 min"; + } + } else if (step) { + eta = "~5-10 min"; + } + return (
- + {p.course_code}{" "} {p.year} {p.term} {p.exam_type} + {eta && ( + {eta} + )}
{step && ( <>
{step}
- {total > 0 && ( -
-
-
+ {totalSteps > 0 && ( + <> +
+
+
+
{pct}%
+ )} )} diff --git a/frontend/src/components/shared/KaTeXRenderer.tsx b/frontend/src/components/shared/KaTeXRenderer.tsx index 9dc78da..369b865 100644 --- a/frontend/src/components/shared/KaTeXRenderer.tsx +++ b/frontend/src/components/shared/KaTeXRenderer.tsx @@ -68,6 +68,122 @@ function renderTex(tex: string, displayMode: boolean): string { } } +/** + * Light markdown-to-HTML for raw question text that isn't already HTML. + * Handles fenced code blocks, inline code, markdown tables, and newlines. + */ +function markdownToHtml(text: string): string { + // Split into blocks to handle code fences and tables separately + const blocks: string[] = []; + let remaining = text; + + // 1. Extract fenced code blocks first + remaining = remaining.replace(/```(\w*)\n([\s\S]*?)```/g, (_m, lang: string, code: string) => { + const escaped = code.replace(/&/g, "&").replace(//g, ">"); + const placeholder = `\x00CODE${blocks.length}\x00`; + blocks.push(`
${escaped.trimEnd()}
`); + return placeholder; + }); + + // 2. Convert markdown tables + remaining = remaining.replace( + /(?:^|\n)((?:\|[^\n]+\|\n)+\|[-| :]+\|\n(?:\|[^\n]+\|\n?)*)/g, + (_m, table: string) => { + const rows = table.trim().split("\n").filter((r) => r.trim()); + if (rows.length < 2) return _m; + const parseRow = (row: string) => + row.split("|").slice(1, -1).map((c) => c.trim()); + const headers = parseRow(rows[0]); + // rows[1] is the separator + const bodyRows = rows.slice(2).map(parseRow); + let html = ''; + for (const h of headers) { + html += ``; + } + html += ""; + for (const row of bodyRows) { + html += ""; + for (const cell of row) { + html += ``; + } + html += ""; + } + html += "
${h}
${cell}
"; + return html; + } + ); + + // 3. Inline code: `...` → + remaining = remaining.replace(/`([^`]+)`/g, '$1'); + + // 4. Bold: **...** or __...__ + remaining = remaining.replace(/\*\*([^*]+)\*\*/g, "$1"); + + // 5. Auto-wrap Unicode math symbols in $ if not already wrapped + // Greek letters + remaining = remaining.replace(/(? { + const greekMap: Record = { + "α": "\\alpha", "β": "\\beta", "γ": "\\gamma", "δ": "\\delta", + "ε": "\\epsilon", "ζ": "\\zeta", "η": "\\eta", "θ": "\\theta", + "λ": "\\lambda", "μ": "\\mu", "ν": "\\nu", "π": "\\pi", + "ρ": "\\rho", "σ": "\\sigma", "τ": "\\tau", "φ": "\\phi", + "χ": "\\chi", "ψ": "\\psi", "ω": "\\omega", + "Σ": "\\Sigma", "Π": "\\Pi", "Δ": "\\Delta", "Ω": "\\Omega", + "Φ": "\\Phi", "Γ": "\\Gamma", "Λ": "\\Lambda", "Θ": "\\Theta", + }; + return `$${greekMap[ch] || ch}$`; + }); + // Unicode math operators: ≥ ≤ ≠ × ÷ ± ∈ ⊆ ∪ ∩ √ ∞ + const mathSymbols: [RegExp, string][] = [ + [/≥/g, "$\\geq$"], [/≤/g, "$\\leq$"], [/≠/g, "$\\neq$"], + [/×/g, "$\\times$"], [/÷/g, "$\\div$"], [/±/g, "$\\pm$"], + [/∈/g, "$\\in$"], [/∉/g, "$\\notin$"], [/⊆/g, "$\\subseteq$"], + [/∪/g, "$\\cup$"], [/∩/g, "$\\cap$"], [/∅/g, "$\\emptyset$"], + [/√/g, "$\\sqrt{}$"], [/∞/g, "$\\infty$"], [/∑/g, "$\\sum$"], + [/∧/g, "$\\wedge$"], [/∨/g, "$\\vee$"], + ]; + for (const [re, repl] of mathSymbols) { + remaining = remaining.replace(re, repl); + } + // Unicode superscripts/subscripts + remaining = remaining.replace(/([⁰¹²³⁴⁵⁶⁷⁸⁹ⁿ⁻]+)/g, (_, sups) => { + const supMap: Record = { + "⁰": "0", "¹": "1", "²": "2", "³": "3", "⁴": "4", + "⁵": "5", "⁶": "6", "⁷": "7", "⁸": "8", "⁹": "9", + "ⁿ": "n", "⁻": "-", + }; + const converted = [...sups].map((c) => supMap[c] || c).join(""); + return `$^{${converted}}$`; + }); + remaining = remaining.replace(/([₀₁₂₃₄₅₆₇₈₉]+)/g, (_, subs) => { + const subMap: Record = { + "₀": "0", "₁": "1", "₂": "2", "₃": "3", "₄": "4", + "₅": "5", "₆": "6", "₇": "7", "₈": "8", "₉": "9", + }; + const converted = [...subs].map((c) => subMap[c] || c).join(""); + return `$_{${converted}}$`; + }); + // Merge adjacent $...$ $...$ → $... ...$ + remaining = remaining.replace(/\$\s*\$/g, " "); + + // 6. Newlines →
+ remaining = remaining.replace(/\n/g, "
"); + + // 6. Restore code blocks + for (let i = 0; i < blocks.length; i++) { + remaining = remaining.replace(`\x00CODE${i}\x00`, blocks[i]); + } + + return remaining; +} + +/** + * Detect if a string is already HTML (has tags) or is raw text. + */ +function isHtml(text: string): boolean { + return /<[a-z][\s\S]*>/i.test(text); +} + export default function KaTeXRenderer({ html, className, @@ -75,7 +191,10 @@ export default function KaTeXRenderer({ html: string; className?: string; }) { - const rendered = useMemo(() => renderLatexInString(html), [html]); + const rendered = useMemo(() => { + const processed = isHtml(html) ? html : markdownToHtml(html); + return renderLatexInString(processed); + }, [html]); return (
= { @@ -21,7 +22,6 @@ function matchColor(percent: number): string { } function cleanReason(reason: string): string { - // "Shared topic: foo_bar, baz_qux" → "Shared topic: Foo Bar, Baz Qux" return reason.replace(/[_]/g, " ").replace(/:\s*(.+)$/, (_, rest) => ": " + rest.split(",").map((s: string) => s.trim().replace(/\b\w/g, (c: string) => c.toUpperCase()) @@ -29,6 +29,108 @@ function cleanReason(reason: string): string { ); } +function SimilarCard({ item }: { item: SimilarQuestion }) { + const [expanded, setExpanded] = useState(false); + + return ( +
+ {/* Header — click to expand */} + + + {/* Expanded preview */} + {expanded && ( +
+
+ +
+ + {/* All topics */} + {item.topics.length > 0 && ( +
+ {item.topics.map((t) => ( + + {t} + + ))} +
+ )} + + {/* Actions */} +
+ + + + + Open in Exam + + +
+
+ )} +
+ ); +} + export default function SimilarHistoryPanel({ question }: { question: Question }) { const [items, setItems] = useState([]); const [loading, setLoading] = useState(true); @@ -78,50 +180,7 @@ export default function SimilarHistoryPanel({ question }: { question: Question } )} {items.map((item) => ( - - {/* Match % badge */} - - {item.match_percent}% - - - {/* Main info */} -
-
- {item.source} - · - Q{item.question_number} - {item.question_type && ( - <> - · - {typeLabel[item.question_type] ?? item.question_type} - - )} -
- - {/* Topics + reasons in one row */} -
- {item.topics.slice(0, 2).map((topic) => ( - - {topic} - - ))} - {item.match_reasons - ?.filter((r) => !r.startsWith("Same format") && !r.startsWith("Same difficulty")) - .slice(0, 2) - .map((reason) => ( - - {cleanReason(reason)} - - ))} -
-
- - - + ))}
)} diff --git a/frontend/src/components/workbench/VariantDetail.tsx b/frontend/src/components/workbench/VariantDetail.tsx index 9f8f05c..d788cd5 100644 --- a/frontend/src/components/workbench/VariantDetail.tsx +++ b/frontend/src/components/workbench/VariantDetail.tsx @@ -139,9 +139,11 @@ export default function VariantDetail({ )} - - - + {variant.solution && ( + + + + )}
); diff --git a/frontend/src/components/workbench/VariantModal.tsx b/frontend/src/components/workbench/VariantModal.tsx index b83c4f0..c3a2dc6 100644 --- a/frontend/src/components/workbench/VariantModal.tsx +++ b/frontend/src/components/workbench/VariantModal.tsx @@ -161,19 +161,21 @@ export default function VariantModal({ )}
)} -
- - {showSolution && ( -
- -
- )} -
+ {variant.solution && ( +
+ + {showSolution && ( +
+ +
+ )} +
+ )}
+ + {/* Expanded preview */} + {expanded && ( +
+
+ + + {/* All topics */} + {q.topics && q.topics.length > 0 && ( +
+ {q.topics.map((t) => ( + + {t} + + ))} +
+ )} + + {/* Actions */} +
+ + + + + Open in Exam + + +
+
+
+ )} + ); } diff --git a/frontend/src/pages/ErrorBookPage.tsx b/frontend/src/pages/ErrorBookPage.tsx index ffe5cc6..5da78bb 100644 --- a/frontend/src/pages/ErrorBookPage.tsx +++ b/frontend/src/pages/ErrorBookPage.tsx @@ -172,7 +172,9 @@ export default function ErrorBookPage() {
Variant of Q{v.source_question_number} -

{v.variant_data.question_text?.replace(/<[^>]*>/g, "").slice(0, 100)}

+
+ +
@@ -264,8 +266,10 @@ function ErrorCard({ entry, onMastered, onRemove }: { entry: UserAttempt; onMast )} - {/* Question preview */} -

{preview}

+ {/* Question text */} +
+ +
{/* Topics */} {question.topics && question.topics.length > 0 && ( diff --git a/frontend/src/pages/WorkbenchPage.tsx b/frontend/src/pages/WorkbenchPage.tsx index 25c6c98..709733b 100644 --- a/frontend/src/pages/WorkbenchPage.tsx +++ b/frontend/src/pages/WorkbenchPage.tsx @@ -461,9 +461,9 @@ export default function WorkbenchPage() { -

- {v.variant_data.question_text?.replace(/<[^>]*>/g, "").slice(0, 140)} -

+
+ +