Initial commit: PastPaper Master full stack
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
0
backend/app/routers/__init__.py
Normal file
0
backend/app/routers/__init__.py
Normal file
285
backend/app/routers/analytics.py
Normal file
285
backend/app/routers/analytics.py
Normal file
@@ -0,0 +1,285 @@
|
||||
"""Course-level analytics endpoints."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import Counter, defaultdict
|
||||
|
||||
from fastapi import APIRouter
|
||||
|
||||
from app.services.supabase_client import get_supabase
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
DIFFICULTY_SCORE = {"easy": 1, "medium": 2, "hard": 3}
|
||||
DIFFICULTY_LABEL = {1: "Easy", 2: "Medium", 3: "Hard"}
|
||||
|
||||
# ── Topic normalization ──────────────────────────────────────
|
||||
# Map variant spellings to canonical label
|
||||
_TOPIC_ALIASES: dict[str, str] = {
|
||||
"numpy": "NumPy",
|
||||
"naïve bayes": "Naive Bayes",
|
||||
"naïve bayes classifier": "Naive Bayes",
|
||||
"naive bayes classifier": "Naive Bayes",
|
||||
"bayes classifier": "Naive Bayes",
|
||||
"bayes model": "Naive Bayes",
|
||||
"bayes' theorem": "Naive Bayes",
|
||||
"bayes' rule": "Naive Bayes",
|
||||
"k-nearest neighbors": "K-Nearest Neighbors (KNN)",
|
||||
"knn": "K-Nearest Neighbors (KNN)",
|
||||
"k-means clustering": "K-Means Clustering",
|
||||
"k-means": "K-Means Clustering",
|
||||
"k means": "K-Means Clustering",
|
||||
"multilayer perceptron": "Multilayer Perceptron (MLP)",
|
||||
"multi-layer perceptron": "Multilayer Perceptron (MLP)",
|
||||
"multi-layer perceptron (mlp)": "Multilayer Perceptron (MLP)",
|
||||
"mlp": "Multilayer Perceptron (MLP)",
|
||||
"single layer perceptron": "Perceptron",
|
||||
"convolutional neural network": "CNN",
|
||||
"convolutional neural network (cnn)": "CNN",
|
||||
"convolutional neural networks": "CNN",
|
||||
"cnn architecture": "CNN",
|
||||
"cnn properties": "CNN",
|
||||
"python fundamentals": "Python",
|
||||
"python programming": "Python",
|
||||
"python implementation": "Python",
|
||||
"advanced python programming": "Python",
|
||||
"python programming: convolutional neural network": "CNN",
|
||||
"cross-validation": "Cross Validation",
|
||||
"model evaluation implementation": "Model Evaluation",
|
||||
"digital image processing": "Image Processing",
|
||||
"computer vision": "Image Processing",
|
||||
"array slicing": "Array Slicing",
|
||||
"slicing": "Array Slicing",
|
||||
"array indexing": "Array Slicing",
|
||||
"array reshaping": "Reshape",
|
||||
"array views": "Array Slicing",
|
||||
"view vs copy": "Array Slicing",
|
||||
"boolean indexing": "Array Slicing",
|
||||
"arange": "NumPy",
|
||||
"newaxis": "NumPy",
|
||||
"expand dims": "NumPy",
|
||||
"transpose": "NumPy",
|
||||
"type casting": "NumPy",
|
||||
"element-wise operation": "NumPy",
|
||||
"array reduction": "NumPy",
|
||||
"multi-dimensional array": "NumPy",
|
||||
"dot product": "NumPy",
|
||||
"vectorization": "NumPy",
|
||||
"activation functions": "Activation Function",
|
||||
"linear activation function": "Activation Function",
|
||||
"neural network architecture": "Neural Networks",
|
||||
"hidden layer": "Neural Networks",
|
||||
"deep learning": "Neural Networks",
|
||||
"deep learning frameworks": "Neural Networks",
|
||||
"alpha-beta pruning": "Alpha-Beta Pruning",
|
||||
"minimax algorithm": "Minimax",
|
||||
"ethics of ai": "AI Ethics",
|
||||
"ethics": "AI Ethics",
|
||||
"cosine distance": "Cosine Similarity",
|
||||
"distance calculation": "Distance Metrics",
|
||||
"euclidean distance": "Distance Metrics",
|
||||
"manhattan distance": "Distance Metrics",
|
||||
"hamming distance": "Distance Metrics",
|
||||
"precision": "Model Evaluation",
|
||||
"recall": "Model Evaluation",
|
||||
"f1 score": "Model Evaluation",
|
||||
"macro f1 score": "Model Evaluation",
|
||||
"accuracy": "Model Evaluation",
|
||||
"classification accuracy": "Model Evaluation",
|
||||
"confusion matrix": "Model Evaluation",
|
||||
"convolution operation": "Convolution",
|
||||
"dilated convolution": "Convolution",
|
||||
"3d convolution": "Convolution",
|
||||
"gaussian likelihood": "Probability",
|
||||
"gaussian distribution": "Probability",
|
||||
"categorical likelihood": "Probability",
|
||||
"conditional probability": "Probability",
|
||||
"total probability theorem": "Probability",
|
||||
"probability assumptions": "Probability",
|
||||
"tensorflow": "Keras",
|
||||
"model summary": "Keras",
|
||||
"model construction": "Keras",
|
||||
"trainable parameters": "Parameter Calculation",
|
||||
"parameter reduction": "Parameter Calculation",
|
||||
"output shape calculation": "Parameter Calculation",
|
||||
"shape calculation": "Parameter Calculation",
|
||||
}
|
||||
|
||||
|
||||
def normalize_topic(label: str) -> str:
|
||||
return _TOPIC_ALIASES.get(label.lower().strip(), label)
|
||||
|
||||
|
||||
def extract_topic_labels(question: dict) -> list[str]:
|
||||
labels: list[str] = []
|
||||
raw_labels: list[str] = []
|
||||
|
||||
analytics_topic = question.get("analytics_topic")
|
||||
if analytics_topic:
|
||||
raw_labels.append(analytics_topic)
|
||||
|
||||
for tag in question.get("topic_tags") or []:
|
||||
if tag and tag not in raw_labels:
|
||||
raw_labels.append(tag)
|
||||
|
||||
if not raw_labels:
|
||||
for tag in question.get("topics") or []:
|
||||
if tag and tag not in raw_labels:
|
||||
raw_labels.append(tag)
|
||||
|
||||
# Normalize and deduplicate
|
||||
seen: set[str] = set()
|
||||
for raw in raw_labels:
|
||||
norm = normalize_topic(raw)
|
||||
if norm not in seen:
|
||||
seen.add(norm)
|
||||
labels.append(norm)
|
||||
|
||||
return labels
|
||||
|
||||
|
||||
def extract_question_family(question: dict) -> str:
|
||||
return (
|
||||
question.get("question_format")
|
||||
or question.get("question_type")
|
||||
or "unknown"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/courses")
|
||||
async def list_courses():
|
||||
"""返回所有有 ready 状态试卷的课程列表"""
|
||||
sb = get_supabase()
|
||||
rows = (
|
||||
sb.table("papers")
|
||||
.select("course_code")
|
||||
.eq("status", "ready")
|
||||
.execute()
|
||||
.data
|
||||
)
|
||||
codes = sorted({row["course_code"] for row in rows if row.get("course_code")})
|
||||
return codes
|
||||
|
||||
|
||||
@router.get("/course/{course_code}")
|
||||
async def get_course_analytics(course_code: str):
|
||||
sb = get_supabase()
|
||||
|
||||
papers = (
|
||||
sb.table("papers")
|
||||
.select("id, course_code, year, term, exam_type, part_label, status")
|
||||
.eq("course_code", course_code.upper())
|
||||
.eq("status", "ready")
|
||||
.order("year", desc=True)
|
||||
.execute()
|
||||
.data
|
||||
)
|
||||
if not papers:
|
||||
return {
|
||||
"course_code": course_code.upper(),
|
||||
"kpi": {"papers": 0, "questions": 0, "topics": 0, "difficulty": "N/A"},
|
||||
"topic_frequency": [],
|
||||
"question_types": [],
|
||||
"difficulty_distribution": {"easy": 0, "medium": 0, "hard": 0},
|
||||
"high_yield_topics": [],
|
||||
}
|
||||
|
||||
paper_ids = [paper["id"] for paper in papers]
|
||||
questions = (
|
||||
sb.table("paper_questions")
|
||||
.select(
|
||||
"id, paper_id, question_number, question_type, question_format, "
|
||||
"question_text, score, topics, analytics_topic, topic_tags, difficulty"
|
||||
)
|
||||
.in_("paper_id", paper_ids)
|
||||
.order("display_order")
|
||||
.execute()
|
||||
.data
|
||||
)
|
||||
|
||||
papers_by_id = {paper["id"]: paper for paper in papers}
|
||||
total_questions = len(questions)
|
||||
topic_counter: Counter[str] = Counter()
|
||||
type_counter: Counter[str] = Counter()
|
||||
difficulty_counter: Counter[str] = Counter()
|
||||
topic_examples: dict[str, list[dict]] = defaultdict(list)
|
||||
difficulty_scores: list[int] = []
|
||||
all_question_items: list[dict] = []
|
||||
|
||||
for question in questions:
|
||||
question_type = extract_question_family(question)
|
||||
type_counter[question_type] += 1
|
||||
|
||||
difficulty = question.get("difficulty")
|
||||
if difficulty in DIFFICULTY_SCORE:
|
||||
difficulty_counter[difficulty] += 1
|
||||
difficulty_scores.append(DIFFICULTY_SCORE[difficulty])
|
||||
|
||||
paper = papers_by_id.get(question["paper_id"], {})
|
||||
source_label = (
|
||||
f"{paper.get('year', '')} {paper.get('term', '').title()} "
|
||||
f"{paper.get('exam_type', '').title()}"
|
||||
).strip()
|
||||
if paper.get("part_label"):
|
||||
source_label = f"{source_label} Part {paper['part_label']}"
|
||||
|
||||
topics = extract_topic_labels(question)
|
||||
q_item = {
|
||||
"paper_id": paper.get("id"),
|
||||
"source": source_label,
|
||||
"question_number": question["question_number"],
|
||||
"preview": question["question_text"][:220],
|
||||
"difficulty": question.get("difficulty"),
|
||||
"question_type": question_type,
|
||||
"year": paper.get("year"),
|
||||
"term": paper.get("term"),
|
||||
"exam_type": paper.get("exam_type"),
|
||||
"topics": topics,
|
||||
}
|
||||
all_question_items.append(q_item)
|
||||
|
||||
for topic in topics:
|
||||
topic_counter[topic] += 1
|
||||
topic_examples[topic].append(q_item)
|
||||
|
||||
avg_difficulty = "N/A"
|
||||
if difficulty_scores:
|
||||
rounded = round(sum(difficulty_scores) / len(difficulty_scores))
|
||||
avg_difficulty = DIFFICULTY_LABEL.get(rounded, "Medium")
|
||||
|
||||
topic_frequency = []
|
||||
for topic, count in topic_counter.most_common():
|
||||
pct = round((count / total_questions) * 100) if total_questions else 0
|
||||
topic_frequency.append(
|
||||
{
|
||||
"label": topic,
|
||||
"count": count,
|
||||
"pct": pct,
|
||||
"questions": topic_examples[topic],
|
||||
}
|
||||
)
|
||||
|
||||
question_types = []
|
||||
for label, count in type_counter.most_common():
|
||||
pct = round((count / total_questions) * 100) if total_questions else 0
|
||||
question_types.append({"label": label, "count": count, "pct": pct})
|
||||
|
||||
return {
|
||||
"course_code": course_code.upper(),
|
||||
"kpi": {
|
||||
"papers": len(papers),
|
||||
"questions": total_questions,
|
||||
"topics": len(topic_counter),
|
||||
"difficulty": avg_difficulty,
|
||||
},
|
||||
"topic_frequency": topic_frequency,
|
||||
"question_types": question_types,
|
||||
"all_questions": all_question_items,
|
||||
"difficulty_distribution": {
|
||||
"easy": difficulty_counter.get("easy", 0),
|
||||
"medium": difficulty_counter.get("medium", 0),
|
||||
"hard": difficulty_counter.get("hard", 0),
|
||||
},
|
||||
"high_yield_topics": [topic for topic, _ in topic_counter.most_common(5)],
|
||||
}
|
||||
208
backend/app/routers/attempts.py
Normal file
208
backend/app/routers/attempts.py
Normal file
@@ -0,0 +1,208 @@
|
||||
"""用户答题记录 + 拍照批改 + 错题本"""
|
||||
|
||||
import asyncio
|
||||
from fastapi import APIRouter, UploadFile, File, Form, HTTPException, Depends
|
||||
from pydantic import BaseModel
|
||||
from app.services.supabase_client import get_supabase
|
||||
from app.services.grader import ocr_photo, grade_answer
|
||||
from app.dependencies.auth import get_current_user_id
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class AttemptCreate(BaseModel):
|
||||
question_id: str
|
||||
attempt_type: str # "select" | "input" | "photo"
|
||||
user_answer: str | None = None
|
||||
is_correct: bool | None = None
|
||||
|
||||
|
||||
class AttemptUpdate(BaseModel):
|
||||
in_error_book: bool | None = None
|
||||
mastered: bool | None = None
|
||||
|
||||
|
||||
@router.post("/")
|
||||
async def create_attempt(data: AttemptCreate, user_id: str = Depends(get_current_user_id)):
|
||||
"""记录一次答题"""
|
||||
sb = get_supabase()
|
||||
record = {
|
||||
"user_id": user_id,
|
||||
"question_id": data.question_id,
|
||||
"attempt_type": data.attempt_type,
|
||||
"user_answer": data.user_answer,
|
||||
"is_correct": data.is_correct,
|
||||
}
|
||||
# Auto add to error book if wrong
|
||||
if data.is_correct is False:
|
||||
record["in_error_book"] = True
|
||||
|
||||
result = sb.table("user_attempts").insert(record).execute()
|
||||
return result.data[0]
|
||||
|
||||
|
||||
@router.post("/photo")
|
||||
async def photo_attempt(
|
||||
question_id: str = Form(...),
|
||||
photo: UploadFile = File(...),
|
||||
user_id: str = Depends(get_current_user_id),
|
||||
):
|
||||
"""拍照上传 → OCR → AI批改"""
|
||||
sb = get_supabase()
|
||||
|
||||
# 1. Read photo
|
||||
photo_bytes = await photo.read()
|
||||
|
||||
# 2. Upload to storage
|
||||
storage_path = f"attempts/{user_id}/{question_id}/{photo.filename}"
|
||||
sb.storage.from_("attempt-photos").upload(
|
||||
storage_path, photo_bytes,
|
||||
file_options={"content-type": photo.content_type or "image/jpeg", "upsert": "true"},
|
||||
)
|
||||
photo_url = sb.storage.from_("attempt-photos").get_public_url(storage_path)
|
||||
|
||||
# 3. OCR (run in thread pool to avoid blocking event loop)
|
||||
ocr_text = await asyncio.to_thread(ocr_photo, photo_bytes)
|
||||
|
||||
# 4. Fetch question for grading context
|
||||
q_result = sb.table("paper_questions").select("*").eq("id", question_id).execute()
|
||||
if not q_result.data:
|
||||
raise HTTPException(status_code=404, detail="Question not found")
|
||||
question = q_result.data[0]
|
||||
|
||||
# 5. AI grading (run in thread pool)
|
||||
grade_result = await asyncio.to_thread(grade_answer, question, ocr_text)
|
||||
|
||||
# 6. Save attempt
|
||||
record = {
|
||||
"user_id": user_id,
|
||||
"question_id": question_id,
|
||||
"attempt_type": "photo",
|
||||
"photo_url": photo_url,
|
||||
"photo_ocr_text": ocr_text,
|
||||
"is_correct": grade_result.get("is_correct", False),
|
||||
"feedback": grade_result.get("feedback", ""),
|
||||
"error_at_step": grade_result.get("error_at_step"),
|
||||
"in_error_book": not grade_result.get("is_correct", False),
|
||||
}
|
||||
result = sb.table("user_attempts").insert(record).execute()
|
||||
|
||||
return {
|
||||
"attempt": result.data[0],
|
||||
"ocr_text": ocr_text,
|
||||
"grade": grade_result,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/error-book")
|
||||
async def get_error_book(
|
||||
course_code: str | None = None,
|
||||
user_id: str = Depends(get_current_user_id),
|
||||
):
|
||||
"""获取错题本"""
|
||||
sb = get_supabase()
|
||||
attempts = (
|
||||
sb.table("user_attempts")
|
||||
.select("*")
|
||||
.eq("user_id", user_id)
|
||||
.eq("in_error_book", True)
|
||||
.eq("mastered", False)
|
||||
.order("created_at", desc=True)
|
||||
.execute()
|
||||
.data
|
||||
)
|
||||
if not attempts:
|
||||
return []
|
||||
|
||||
question_ids = list({attempt["question_id"] for attempt in attempts})
|
||||
questions = (
|
||||
sb.table("paper_questions")
|
||||
.select("*")
|
||||
.in_("id", question_ids)
|
||||
.execute()
|
||||
.data
|
||||
)
|
||||
questions_by_id = {question["id"]: question for question in questions}
|
||||
|
||||
paper_ids = list({question["paper_id"] for question in questions})
|
||||
papers = (
|
||||
sb.table("papers")
|
||||
.select("id, course_code, year, term, exam_type, part_label")
|
||||
.in_("id", paper_ids)
|
||||
.execute()
|
||||
.data
|
||||
)
|
||||
papers_by_id = {paper["id"]: paper for paper in papers}
|
||||
|
||||
enriched = []
|
||||
for attempt in attempts:
|
||||
question = questions_by_id.get(attempt["question_id"])
|
||||
if not question:
|
||||
continue
|
||||
paper = papers_by_id.get(question["paper_id"])
|
||||
if course_code and paper and paper.get("course_code") != course_code.upper():
|
||||
continue
|
||||
|
||||
enriched.append(
|
||||
{
|
||||
**attempt,
|
||||
"paper_questions": {
|
||||
**question,
|
||||
"paper": paper,
|
||||
},
|
||||
}
|
||||
)
|
||||
return enriched
|
||||
|
||||
|
||||
@router.get("/by-paper/{paper_id}")
|
||||
async def get_paper_attempts(paper_id: str, user_id: str = Depends(get_current_user_id)):
|
||||
"""获取某张试卷所有题目的最新判卷记录"""
|
||||
sb = get_supabase()
|
||||
attempts = (
|
||||
sb.table("user_attempts")
|
||||
.select("question_id, is_correct, feedback, photo_ocr_text, attempt_type, created_at")
|
||||
.eq("user_id", user_id)
|
||||
.order("created_at", desc=True)
|
||||
.execute()
|
||||
.data
|
||||
)
|
||||
# 只保留 photo 类型的,且只保留每题最新一条
|
||||
question_ids = (
|
||||
sb.table("paper_questions")
|
||||
.select("id")
|
||||
.eq("paper_id", paper_id)
|
||||
.execute()
|
||||
.data
|
||||
)
|
||||
qid_set = {q["id"] for q in question_ids}
|
||||
seen: set[str] = set()
|
||||
result = []
|
||||
for a in attempts:
|
||||
if a["question_id"] not in qid_set:
|
||||
continue
|
||||
if a["question_id"] in seen:
|
||||
continue
|
||||
if a["attempt_type"] != "photo":
|
||||
continue
|
||||
seen.add(a["question_id"])
|
||||
result.append(a)
|
||||
return result
|
||||
|
||||
|
||||
@router.patch("/{attempt_id}")
|
||||
async def update_attempt(attempt_id: str, data: AttemptUpdate):
|
||||
"""更新错题状态(标记掌握等)"""
|
||||
sb = get_supabase()
|
||||
update = {}
|
||||
if data.in_error_book is not None:
|
||||
update["in_error_book"] = data.in_error_book
|
||||
if data.mastered is not None:
|
||||
update["mastered"] = data.mastered
|
||||
if not update:
|
||||
raise HTTPException(status_code=400, detail="Nothing to update")
|
||||
|
||||
result = sb.table("user_attempts").update(update).eq("id", attempt_id).execute()
|
||||
if not result.data:
|
||||
raise HTTPException(status_code=404, detail="Attempt not found")
|
||||
return result.data[0]
|
||||
142
backend/app/routers/papers.py
Normal file
142
backend/app/routers/papers.py
Normal file
@@ -0,0 +1,142 @@
|
||||
"""试卷上传 + 处理管线"""
|
||||
|
||||
import asyncio
|
||||
import threading
|
||||
from fastapi import APIRouter, UploadFile, File, Form, HTTPException, Depends
|
||||
from app.services.supabase_client import get_supabase
|
||||
from app.services.text_extractor import extract_pdf, get_full_text
|
||||
from app.services.paper_processor import process_paper
|
||||
from app.dependencies.auth import get_current_user_id
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def _upload_and_process_sync(
|
||||
paper_id: str,
|
||||
storage_path: str,
|
||||
paper_bytes: bytes,
|
||||
answer_bytes: bytes | None,
|
||||
):
|
||||
"""在独立线程中运行:Storage 上传 + AI 处理"""
|
||||
sb = get_supabase()
|
||||
try:
|
||||
paper_storage_path = f"{storage_path}/paper.pdf"
|
||||
sb.storage.from_("papers").upload(
|
||||
paper_storage_path, paper_bytes,
|
||||
file_options={"content-type": "application/pdf", "upsert": "true"},
|
||||
)
|
||||
paper_url = sb.storage.from_("papers").get_public_url(paper_storage_path)
|
||||
|
||||
update_data: dict = {"paper_file_url": paper_url}
|
||||
|
||||
if answer_bytes:
|
||||
answer_storage_path = f"{storage_path}/answer.pdf"
|
||||
sb.storage.from_("papers").upload(
|
||||
answer_storage_path, answer_bytes,
|
||||
file_options={"content-type": "application/pdf", "upsert": "true"},
|
||||
)
|
||||
update_data["answer_file_url"] = sb.storage.from_("papers").get_public_url(answer_storage_path)
|
||||
|
||||
sb.table("papers").update(update_data).eq("id", paper_id).execute()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# process_paper 是 async,在新事件循环里跑
|
||||
asyncio.run(process_paper(paper_id, paper_bytes, answer_bytes))
|
||||
|
||||
|
||||
@router.get("/")
|
||||
async def list_papers():
|
||||
"""获取试卷列表(公共资产,所有用户共享)"""
|
||||
sb = get_supabase()
|
||||
return (
|
||||
sb.table("papers")
|
||||
.select("id, course_code, year, term, exam_type, status, question_count, total_score, difficulty_level, processing_step, processing_progress, processing_total, created_at")
|
||||
.order("created_at", desc=True)
|
||||
.execute()
|
||||
.data
|
||||
)
|
||||
|
||||
|
||||
@router.get("/mine")
|
||||
async def my_papers(user_id: str = Depends(get_current_user_id)):
|
||||
"""当前用户上传的试卷(含 processing 状态)"""
|
||||
sb = get_supabase()
|
||||
return (
|
||||
sb.table("papers")
|
||||
.select("id, course_code, year, term, exam_type, part_label, status, question_count, processing_step, processing_progress, processing_total, created_at")
|
||||
.eq("user_id", user_id)
|
||||
.order("created_at", desc=True)
|
||||
.execute()
|
||||
.data
|
||||
)
|
||||
|
||||
|
||||
@router.post("/upload")
|
||||
async def upload_paper(
|
||||
paper_file: UploadFile = File(...),
|
||||
answer_file: UploadFile | None = File(None),
|
||||
course_code: str = Form(...),
|
||||
year: int = Form(...),
|
||||
term: str = Form(...),
|
||||
exam_type: str = Form(...),
|
||||
user_id: str = Depends(get_current_user_id),
|
||||
):
|
||||
"""上传试卷 PDF(可选答案 PDF),触发后台处理"""
|
||||
sb = get_supabase()
|
||||
|
||||
# 1. 读取文件内容(已在内存中,快)
|
||||
paper_bytes = await paper_file.read()
|
||||
answer_bytes = await answer_file.read() if answer_file else None
|
||||
|
||||
# 2. 立即创建记录(status=processing),马上返回
|
||||
storage_path = f"{course_code.upper()}/{year}_{term}_{exam_type}"
|
||||
paper_record = sb.table("papers").insert({
|
||||
"user_id": user_id,
|
||||
"course_code": course_code.upper(),
|
||||
"year": year,
|
||||
"term": term,
|
||||
"exam_type": exam_type,
|
||||
"paper_file_url": "", # 后台上传后更新
|
||||
"answer_file_url": None,
|
||||
"status": "processing",
|
||||
}).execute()
|
||||
|
||||
paper_id = paper_record.data[0]["id"]
|
||||
|
||||
# 3. 在独立线程中运行,完全不阻塞事件循环
|
||||
threading.Thread(
|
||||
target=_upload_and_process_sync,
|
||||
args=(paper_id, storage_path, paper_bytes, answer_bytes),
|
||||
daemon=True,
|
||||
).start()
|
||||
|
||||
return {
|
||||
"paper_id": paper_id,
|
||||
"status": "processing",
|
||||
"message": "试卷已上传,正在处理中...",
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{paper_id}")
|
||||
async def get_paper(paper_id: str):
|
||||
"""获取试卷信息 + 处理状态"""
|
||||
sb = get_supabase()
|
||||
result = sb.table("papers").select("*").eq("id", paper_id).execute()
|
||||
if not result.data:
|
||||
raise HTTPException(status_code=404, detail="Paper not found")
|
||||
return result.data[0]
|
||||
|
||||
|
||||
@router.get("/{paper_id}/questions")
|
||||
async def get_questions(paper_id: str):
|
||||
"""获取试卷的所有题目(含 AI 三件套)"""
|
||||
sb = get_supabase()
|
||||
result = (
|
||||
sb.table("paper_questions")
|
||||
.select("*")
|
||||
.eq("paper_id", paper_id)
|
||||
.order("display_order")
|
||||
.execute()
|
||||
)
|
||||
return result.data
|
||||
325
backend/app/routers/questions.py
Normal file
325
backend/app/routers/questions.py
Normal file
@@ -0,0 +1,325 @@
|
||||
"""题目相关:变式题生成 + 相似题召回"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from fastapi import APIRouter, HTTPException, Depends
|
||||
from pydantic import BaseModel
|
||||
from app.services.supabase_client import get_supabase
|
||||
from app.services.grader import generate_variant
|
||||
from app.dependencies.auth import get_current_user_id
|
||||
|
||||
# Simple in-memory cache: question_id → (timestamp, result)
|
||||
_similar_cache: dict[str, tuple[float, list]] = {}
|
||||
_CACHE_TTL = 300 # 5 minutes
|
||||
|
||||
|
||||
class VariantUpdate(BaseModel):
|
||||
favorited: bool | None = None
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def normalized_labels(values: list[str] | None) -> dict[str, str]:
|
||||
labels: dict[str, str] = {}
|
||||
for value in values or []:
|
||||
if value:
|
||||
labels[value.lower()] = value
|
||||
return labels
|
||||
|
||||
|
||||
def question_family(question: dict) -> str:
|
||||
return question.get("question_format") or question.get("question_type") or "unknown"
|
||||
|
||||
|
||||
def display_topics(question: dict) -> list[str]:
|
||||
labels: list[str] = []
|
||||
analytics_topic = question.get("analytics_topic")
|
||||
if analytics_topic:
|
||||
labels.append(analytics_topic)
|
||||
for topic in question.get("topic_tags") or []:
|
||||
if topic and topic not in labels:
|
||||
labels.append(topic)
|
||||
if labels:
|
||||
return labels
|
||||
for topic in question.get("topics") or []:
|
||||
if topic and topic not in labels:
|
||||
labels.append(topic)
|
||||
return labels
|
||||
|
||||
|
||||
def similarity_score(
|
||||
target: dict,
|
||||
candidate: dict,
|
||||
text_score: float = 0.0,
|
||||
) -> tuple[int, list[str]]:
|
||||
score = 0
|
||||
reasons: list[str] = []
|
||||
|
||||
# Primary topic bucket: 40 pts
|
||||
target_topic = target.get("analytics_topic")
|
||||
candidate_topic = candidate.get("analytics_topic")
|
||||
if target_topic and target_topic == candidate_topic:
|
||||
score += 40
|
||||
reasons.append(f"Same topic: {target_topic}")
|
||||
|
||||
# Concept overlap: up to 20 pts
|
||||
target_topics = normalized_labels(target.get("topic_tags"))
|
||||
candidate_topics = normalized_labels(candidate.get("topic_tags"))
|
||||
shared_topics = sorted(set(target_topics) & set(candidate_topics))
|
||||
if shared_topics:
|
||||
score += min(len(shared_topics) * 10, 20)
|
||||
# Only show concept reason if analytics_topic didn't already match (avoid redundancy)
|
||||
if not (target_topic and target_topic == candidate_topic):
|
||||
reasons.append(
|
||||
"Shared concept: "
|
||||
+ ", ".join(target_topics[key] for key in shared_topics[:2])
|
||||
)
|
||||
|
||||
# Skill overlap: up to 20 pts
|
||||
target_skills = normalized_labels(target.get("skill_tags"))
|
||||
candidate_skills = normalized_labels(candidate.get("skill_tags"))
|
||||
shared_skills = sorted(set(target_skills) & set(candidate_skills))
|
||||
if shared_skills:
|
||||
score += min(len(shared_skills) * 10, 20)
|
||||
reasons.append(
|
||||
"Shared skill: "
|
||||
+ ", ".join(target_skills[key] for key in shared_skills[:2])
|
||||
)
|
||||
|
||||
# Same question format: 10 pts
|
||||
if question_family(candidate) == question_family(target):
|
||||
score += 10
|
||||
reasons.append("Same format")
|
||||
|
||||
# Same difficulty: 5 pts
|
||||
if candidate.get("difficulty") and candidate.get("difficulty") == target.get("difficulty"):
|
||||
score += 5
|
||||
reasons.append("Same difficulty")
|
||||
|
||||
# Full-text similarity from PostgreSQL ts_rank_cd: up to 20 pts
|
||||
if text_score > 0:
|
||||
text_pts = min(round(text_score * 60), 20)
|
||||
score += text_pts
|
||||
if text_pts >= 4:
|
||||
reasons.append("Similar wording")
|
||||
|
||||
return min(score, 99), reasons
|
||||
|
||||
|
||||
@router.get("/variants/favorited")
|
||||
async def get_favorited_variants(user_id: str = Depends(get_current_user_id)):
|
||||
"""获取用户收藏的所有 variant(用于 Error Book)"""
|
||||
sb = get_supabase()
|
||||
rows = (
|
||||
sb.table("question_variants")
|
||||
.select("*, paper_questions(question_number, paper_id, papers(id, course_code, year, term, exam_type, part_label))")
|
||||
.eq("user_id", user_id)
|
||||
.eq("favorited", True)
|
||||
.order("created_at", desc=True)
|
||||
.execute()
|
||||
.data
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
@router.post("/{question_id}/variant")
|
||||
async def create_variant(question_id: str, user_id: str = Depends(get_current_user_id)):
|
||||
"""生成变式题并入库"""
|
||||
sb = get_supabase()
|
||||
result = sb.table("paper_questions").select("*").eq("id", question_id).execute()
|
||||
if not result.data:
|
||||
raise HTTPException(status_code=404, detail="Question not found")
|
||||
|
||||
question = result.data[0]
|
||||
variant_data = await asyncio.to_thread(generate_variant, question)
|
||||
variant_data["knowledge_reminder"] = question.get("knowledge_reminder", "")
|
||||
|
||||
saved = sb.table("question_variants").insert({
|
||||
"user_id": user_id,
|
||||
"source_question_id": question_id,
|
||||
"variant_data": variant_data,
|
||||
"favorited": False,
|
||||
}).execute()
|
||||
|
||||
row = saved.data[0]
|
||||
row["source_question_number"] = question["question_number"]
|
||||
return row
|
||||
|
||||
|
||||
@router.get("/{question_id}/variants")
|
||||
async def list_variants(question_id: str, user_id: str = Depends(get_current_user_id)):
|
||||
"""获取某道题的用户所有 variant"""
|
||||
sb = get_supabase()
|
||||
q_result = sb.table("paper_questions").select("question_number").eq("id", question_id).execute()
|
||||
question_number = q_result.data[0]["question_number"] if q_result.data else ""
|
||||
|
||||
rows = (
|
||||
sb.table("question_variants")
|
||||
.select("*")
|
||||
.eq("user_id", user_id)
|
||||
.eq("source_question_id", question_id)
|
||||
.order("created_at", desc=True)
|
||||
.execute()
|
||||
.data
|
||||
)
|
||||
for row in rows:
|
||||
row["source_question_number"] = question_number
|
||||
return rows
|
||||
|
||||
|
||||
@router.patch("/variant/{variant_id}")
|
||||
async def update_variant(variant_id: str, data: VariantUpdate, user_id: str = Depends(get_current_user_id)):
|
||||
"""更新 variant(收藏/取消收藏)"""
|
||||
sb = get_supabase()
|
||||
update: dict = {}
|
||||
if data.favorited is not None:
|
||||
update["favorited"] = data.favorited
|
||||
if not update:
|
||||
raise HTTPException(status_code=400, detail="Nothing to update")
|
||||
|
||||
result = (
|
||||
sb.table("question_variants")
|
||||
.update(update)
|
||||
.eq("id", variant_id)
|
||||
.eq("user_id", user_id)
|
||||
.execute()
|
||||
)
|
||||
if not result.data:
|
||||
raise HTTPException(status_code=404, detail="Variant not found")
|
||||
return result.data[0]
|
||||
|
||||
|
||||
@router.delete("/variant/{variant_id}", status_code=204)
|
||||
async def delete_variant(variant_id: str, user_id: str = Depends(get_current_user_id)):
|
||||
"""删除 variant"""
|
||||
sb = get_supabase()
|
||||
sb.table("question_variants").delete().eq("id", variant_id).eq("user_id", user_id).execute()
|
||||
|
||||
|
||||
@router.get("/{question_id}/similar")
|
||||
async def get_similar_questions(question_id: str, limit: int = 6):
|
||||
"""Retrieve similar questions from the same course."""
|
||||
# Cache hit
|
||||
cached = _similar_cache.get(question_id)
|
||||
if cached and (time.time() - cached[0]) < _CACHE_TTL:
|
||||
return cached[1][:max(1, min(limit, 12))]
|
||||
|
||||
sb = get_supabase()
|
||||
result = sb.table("paper_questions").select("*, similar_questions").eq("id", question_id).execute()
|
||||
if not result.data:
|
||||
raise HTTPException(status_code=404, detail="Question not found")
|
||||
|
||||
target = result.data[0]
|
||||
|
||||
# Return pre-computed immediately; schedule background refresh
|
||||
if target.get("similar_questions"):
|
||||
precomputed = target["similar_questions"]
|
||||
_similar_cache[question_id] = (time.time(), precomputed)
|
||||
return precomputed[:max(1, min(limit, 12))]
|
||||
|
||||
paper_result = sb.table("papers").select("id, course_code").eq("id", target["paper_id"]).execute()
|
||||
# (fallback: compute on-the-fly for questions not yet backfilled)
|
||||
if not paper_result.data:
|
||||
raise HTTPException(status_code=404, detail="Paper not found")
|
||||
|
||||
course_code = paper_result.data[0]["course_code"]
|
||||
papers = (
|
||||
sb.table("papers")
|
||||
.select("id, course_code, year, term, exam_type, part_label")
|
||||
.eq("course_code", course_code)
|
||||
.eq("status", "ready")
|
||||
.execute()
|
||||
.data
|
||||
)
|
||||
paper_ids = [paper["id"] for paper in papers if paper["id"] != target["paper_id"]]
|
||||
if not paper_ids:
|
||||
return []
|
||||
|
||||
papers_by_id = {paper["id"]: paper for paper in papers}
|
||||
|
||||
# Pre-filter by analytics_topic in DB when possible (cuts candidates from ~250 to ~30)
|
||||
candidates_query = (
|
||||
sb.table("paper_questions")
|
||||
.select(
|
||||
"id, paper_id, question_number, question_type, question_format, "
|
||||
"question_text, score, topics, analytics_topic, topic_tags, skill_tags, "
|
||||
"difficulty, knowledge_reminder, ai_hint, solution"
|
||||
)
|
||||
.in_("paper_id", paper_ids)
|
||||
)
|
||||
target_topic = target.get("analytics_topic")
|
||||
if target_topic:
|
||||
candidates_query = candidates_query.eq("analytics_topic", target_topic)
|
||||
|
||||
candidates = candidates_query.execute().data
|
||||
if not candidates:
|
||||
return []
|
||||
|
||||
# Batch full-text scores from PostgreSQL (skip if too many candidates — slow)
|
||||
text_scores: dict[str, float] = {}
|
||||
if len(candidates) <= 50:
|
||||
try:
|
||||
rpc_result = sb.rpc(
|
||||
"text_similarity_scores",
|
||||
{
|
||||
"query_text": target.get("question_text") or "",
|
||||
"candidate_ids": [c["id"] for c in candidates],
|
||||
},
|
||||
).execute()
|
||||
for row in rpc_result.data or []:
|
||||
text_scores[row["question_id"]] = float(row["text_score"] or 0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
ranked = []
|
||||
for candidate in candidates:
|
||||
text_score = text_scores.get(candidate["id"], 0.0)
|
||||
match_percent, reasons = similarity_score(target, candidate, text_score)
|
||||
if match_percent < 20:
|
||||
continue
|
||||
paper = papers_by_id.get(candidate["paper_id"], {})
|
||||
source = (
|
||||
f"{paper.get('year', '')} {paper.get('term', '').title()} "
|
||||
f"{paper.get('exam_type', '').title()}"
|
||||
).strip()
|
||||
if paper.get("part_label"):
|
||||
source = f"{source} Part {paper['part_label']}"
|
||||
ranked.append(
|
||||
{
|
||||
"id": candidate["id"],
|
||||
"paper_id": candidate["paper_id"],
|
||||
"source": source,
|
||||
"question_number": candidate["question_number"],
|
||||
"match_percent": match_percent,
|
||||
"match_reasons": reasons,
|
||||
"question_type": question_family(candidate),
|
||||
"question_text": candidate["question_text"],
|
||||
"topics": display_topics(candidate),
|
||||
"difficulty": candidate.get("difficulty"),
|
||||
"knowledge_reminder": candidate.get("knowledge_reminder", ""),
|
||||
"ai_hint": candidate.get("ai_hint", ""),
|
||||
"solution": candidate.get("solution", ""),
|
||||
}
|
||||
)
|
||||
|
||||
ranked.sort(key=lambda item: (-item["match_percent"], item["source"], item["question_number"]))
|
||||
|
||||
# Keep only the best-scoring question per paper
|
||||
seen_papers: set[str] = set()
|
||||
deduped = []
|
||||
for item in ranked:
|
||||
if item["paper_id"] not in seen_papers:
|
||||
seen_papers.add(item["paper_id"])
|
||||
deduped.append(item)
|
||||
|
||||
_similar_cache[question_id] = (time.time(), deduped)
|
||||
|
||||
# Persist to DB so future requests are instant
|
||||
try:
|
||||
sb.table("paper_questions").update({"similar_questions": deduped}).eq("id", question_id).execute()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return deduped[:max(1, min(limit, 12))]
|
||||
Reference in New Issue
Block a user