Initial commit: PastPaper Master full stack
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
285
backend/app/routers/analytics.py
Normal file
285
backend/app/routers/analytics.py
Normal file
@@ -0,0 +1,285 @@
|
||||
"""Course-level analytics endpoints."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import Counter, defaultdict
|
||||
|
||||
from fastapi import APIRouter
|
||||
|
||||
from app.services.supabase_client import get_supabase
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
DIFFICULTY_SCORE = {"easy": 1, "medium": 2, "hard": 3}
|
||||
DIFFICULTY_LABEL = {1: "Easy", 2: "Medium", 3: "Hard"}
|
||||
|
||||
# ── Topic normalization ──────────────────────────────────────
|
||||
# Map variant spellings to canonical label
|
||||
_TOPIC_ALIASES: dict[str, str] = {
|
||||
"numpy": "NumPy",
|
||||
"naïve bayes": "Naive Bayes",
|
||||
"naïve bayes classifier": "Naive Bayes",
|
||||
"naive bayes classifier": "Naive Bayes",
|
||||
"bayes classifier": "Naive Bayes",
|
||||
"bayes model": "Naive Bayes",
|
||||
"bayes' theorem": "Naive Bayes",
|
||||
"bayes' rule": "Naive Bayes",
|
||||
"k-nearest neighbors": "K-Nearest Neighbors (KNN)",
|
||||
"knn": "K-Nearest Neighbors (KNN)",
|
||||
"k-means clustering": "K-Means Clustering",
|
||||
"k-means": "K-Means Clustering",
|
||||
"k means": "K-Means Clustering",
|
||||
"multilayer perceptron": "Multilayer Perceptron (MLP)",
|
||||
"multi-layer perceptron": "Multilayer Perceptron (MLP)",
|
||||
"multi-layer perceptron (mlp)": "Multilayer Perceptron (MLP)",
|
||||
"mlp": "Multilayer Perceptron (MLP)",
|
||||
"single layer perceptron": "Perceptron",
|
||||
"convolutional neural network": "CNN",
|
||||
"convolutional neural network (cnn)": "CNN",
|
||||
"convolutional neural networks": "CNN",
|
||||
"cnn architecture": "CNN",
|
||||
"cnn properties": "CNN",
|
||||
"python fundamentals": "Python",
|
||||
"python programming": "Python",
|
||||
"python implementation": "Python",
|
||||
"advanced python programming": "Python",
|
||||
"python programming: convolutional neural network": "CNN",
|
||||
"cross-validation": "Cross Validation",
|
||||
"model evaluation implementation": "Model Evaluation",
|
||||
"digital image processing": "Image Processing",
|
||||
"computer vision": "Image Processing",
|
||||
"array slicing": "Array Slicing",
|
||||
"slicing": "Array Slicing",
|
||||
"array indexing": "Array Slicing",
|
||||
"array reshaping": "Reshape",
|
||||
"array views": "Array Slicing",
|
||||
"view vs copy": "Array Slicing",
|
||||
"boolean indexing": "Array Slicing",
|
||||
"arange": "NumPy",
|
||||
"newaxis": "NumPy",
|
||||
"expand dims": "NumPy",
|
||||
"transpose": "NumPy",
|
||||
"type casting": "NumPy",
|
||||
"element-wise operation": "NumPy",
|
||||
"array reduction": "NumPy",
|
||||
"multi-dimensional array": "NumPy",
|
||||
"dot product": "NumPy",
|
||||
"vectorization": "NumPy",
|
||||
"activation functions": "Activation Function",
|
||||
"linear activation function": "Activation Function",
|
||||
"neural network architecture": "Neural Networks",
|
||||
"hidden layer": "Neural Networks",
|
||||
"deep learning": "Neural Networks",
|
||||
"deep learning frameworks": "Neural Networks",
|
||||
"alpha-beta pruning": "Alpha-Beta Pruning",
|
||||
"minimax algorithm": "Minimax",
|
||||
"ethics of ai": "AI Ethics",
|
||||
"ethics": "AI Ethics",
|
||||
"cosine distance": "Cosine Similarity",
|
||||
"distance calculation": "Distance Metrics",
|
||||
"euclidean distance": "Distance Metrics",
|
||||
"manhattan distance": "Distance Metrics",
|
||||
"hamming distance": "Distance Metrics",
|
||||
"precision": "Model Evaluation",
|
||||
"recall": "Model Evaluation",
|
||||
"f1 score": "Model Evaluation",
|
||||
"macro f1 score": "Model Evaluation",
|
||||
"accuracy": "Model Evaluation",
|
||||
"classification accuracy": "Model Evaluation",
|
||||
"confusion matrix": "Model Evaluation",
|
||||
"convolution operation": "Convolution",
|
||||
"dilated convolution": "Convolution",
|
||||
"3d convolution": "Convolution",
|
||||
"gaussian likelihood": "Probability",
|
||||
"gaussian distribution": "Probability",
|
||||
"categorical likelihood": "Probability",
|
||||
"conditional probability": "Probability",
|
||||
"total probability theorem": "Probability",
|
||||
"probability assumptions": "Probability",
|
||||
"tensorflow": "Keras",
|
||||
"model summary": "Keras",
|
||||
"model construction": "Keras",
|
||||
"trainable parameters": "Parameter Calculation",
|
||||
"parameter reduction": "Parameter Calculation",
|
||||
"output shape calculation": "Parameter Calculation",
|
||||
"shape calculation": "Parameter Calculation",
|
||||
}
|
||||
|
||||
|
||||
def normalize_topic(label: str) -> str:
|
||||
return _TOPIC_ALIASES.get(label.lower().strip(), label)
|
||||
|
||||
|
||||
def extract_topic_labels(question: dict) -> list[str]:
|
||||
labels: list[str] = []
|
||||
raw_labels: list[str] = []
|
||||
|
||||
analytics_topic = question.get("analytics_topic")
|
||||
if analytics_topic:
|
||||
raw_labels.append(analytics_topic)
|
||||
|
||||
for tag in question.get("topic_tags") or []:
|
||||
if tag and tag not in raw_labels:
|
||||
raw_labels.append(tag)
|
||||
|
||||
if not raw_labels:
|
||||
for tag in question.get("topics") or []:
|
||||
if tag and tag not in raw_labels:
|
||||
raw_labels.append(tag)
|
||||
|
||||
# Normalize and deduplicate
|
||||
seen: set[str] = set()
|
||||
for raw in raw_labels:
|
||||
norm = normalize_topic(raw)
|
||||
if norm not in seen:
|
||||
seen.add(norm)
|
||||
labels.append(norm)
|
||||
|
||||
return labels
|
||||
|
||||
|
||||
def extract_question_family(question: dict) -> str:
|
||||
return (
|
||||
question.get("question_format")
|
||||
or question.get("question_type")
|
||||
or "unknown"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/courses")
|
||||
async def list_courses():
|
||||
"""返回所有有 ready 状态试卷的课程列表"""
|
||||
sb = get_supabase()
|
||||
rows = (
|
||||
sb.table("papers")
|
||||
.select("course_code")
|
||||
.eq("status", "ready")
|
||||
.execute()
|
||||
.data
|
||||
)
|
||||
codes = sorted({row["course_code"] for row in rows if row.get("course_code")})
|
||||
return codes
|
||||
|
||||
|
||||
@router.get("/course/{course_code}")
|
||||
async def get_course_analytics(course_code: str):
|
||||
sb = get_supabase()
|
||||
|
||||
papers = (
|
||||
sb.table("papers")
|
||||
.select("id, course_code, year, term, exam_type, part_label, status")
|
||||
.eq("course_code", course_code.upper())
|
||||
.eq("status", "ready")
|
||||
.order("year", desc=True)
|
||||
.execute()
|
||||
.data
|
||||
)
|
||||
if not papers:
|
||||
return {
|
||||
"course_code": course_code.upper(),
|
||||
"kpi": {"papers": 0, "questions": 0, "topics": 0, "difficulty": "N/A"},
|
||||
"topic_frequency": [],
|
||||
"question_types": [],
|
||||
"difficulty_distribution": {"easy": 0, "medium": 0, "hard": 0},
|
||||
"high_yield_topics": [],
|
||||
}
|
||||
|
||||
paper_ids = [paper["id"] for paper in papers]
|
||||
questions = (
|
||||
sb.table("paper_questions")
|
||||
.select(
|
||||
"id, paper_id, question_number, question_type, question_format, "
|
||||
"question_text, score, topics, analytics_topic, topic_tags, difficulty"
|
||||
)
|
||||
.in_("paper_id", paper_ids)
|
||||
.order("display_order")
|
||||
.execute()
|
||||
.data
|
||||
)
|
||||
|
||||
papers_by_id = {paper["id"]: paper for paper in papers}
|
||||
total_questions = len(questions)
|
||||
topic_counter: Counter[str] = Counter()
|
||||
type_counter: Counter[str] = Counter()
|
||||
difficulty_counter: Counter[str] = Counter()
|
||||
topic_examples: dict[str, list[dict]] = defaultdict(list)
|
||||
difficulty_scores: list[int] = []
|
||||
all_question_items: list[dict] = []
|
||||
|
||||
for question in questions:
|
||||
question_type = extract_question_family(question)
|
||||
type_counter[question_type] += 1
|
||||
|
||||
difficulty = question.get("difficulty")
|
||||
if difficulty in DIFFICULTY_SCORE:
|
||||
difficulty_counter[difficulty] += 1
|
||||
difficulty_scores.append(DIFFICULTY_SCORE[difficulty])
|
||||
|
||||
paper = papers_by_id.get(question["paper_id"], {})
|
||||
source_label = (
|
||||
f"{paper.get('year', '')} {paper.get('term', '').title()} "
|
||||
f"{paper.get('exam_type', '').title()}"
|
||||
).strip()
|
||||
if paper.get("part_label"):
|
||||
source_label = f"{source_label} Part {paper['part_label']}"
|
||||
|
||||
topics = extract_topic_labels(question)
|
||||
q_item = {
|
||||
"paper_id": paper.get("id"),
|
||||
"source": source_label,
|
||||
"question_number": question["question_number"],
|
||||
"preview": question["question_text"][:220],
|
||||
"difficulty": question.get("difficulty"),
|
||||
"question_type": question_type,
|
||||
"year": paper.get("year"),
|
||||
"term": paper.get("term"),
|
||||
"exam_type": paper.get("exam_type"),
|
||||
"topics": topics,
|
||||
}
|
||||
all_question_items.append(q_item)
|
||||
|
||||
for topic in topics:
|
||||
topic_counter[topic] += 1
|
||||
topic_examples[topic].append(q_item)
|
||||
|
||||
avg_difficulty = "N/A"
|
||||
if difficulty_scores:
|
||||
rounded = round(sum(difficulty_scores) / len(difficulty_scores))
|
||||
avg_difficulty = DIFFICULTY_LABEL.get(rounded, "Medium")
|
||||
|
||||
topic_frequency = []
|
||||
for topic, count in topic_counter.most_common():
|
||||
pct = round((count / total_questions) * 100) if total_questions else 0
|
||||
topic_frequency.append(
|
||||
{
|
||||
"label": topic,
|
||||
"count": count,
|
||||
"pct": pct,
|
||||
"questions": topic_examples[topic],
|
||||
}
|
||||
)
|
||||
|
||||
question_types = []
|
||||
for label, count in type_counter.most_common():
|
||||
pct = round((count / total_questions) * 100) if total_questions else 0
|
||||
question_types.append({"label": label, "count": count, "pct": pct})
|
||||
|
||||
return {
|
||||
"course_code": course_code.upper(),
|
||||
"kpi": {
|
||||
"papers": len(papers),
|
||||
"questions": total_questions,
|
||||
"topics": len(topic_counter),
|
||||
"difficulty": avg_difficulty,
|
||||
},
|
||||
"topic_frequency": topic_frequency,
|
||||
"question_types": question_types,
|
||||
"all_questions": all_question_items,
|
||||
"difficulty_distribution": {
|
||||
"easy": difficulty_counter.get("easy", 0),
|
||||
"medium": difficulty_counter.get("medium", 0),
|
||||
"hard": difficulty_counter.get("hard", 0),
|
||||
},
|
||||
"high_yield_topics": [topic for topic, _ in topic_counter.most_common(5)],
|
||||
}
|
||||
Reference in New Issue
Block a user