"""Course-level analytics endpoints.""" from __future__ import annotations from collections import Counter, defaultdict from fastapi import APIRouter from app.services.supabase_client import get_supabase router = APIRouter() DIFFICULTY_SCORE = {"easy": 1, "medium": 2, "hard": 3} DIFFICULTY_LABEL = {1: "Easy", 2: "Medium", 3: "Hard"} # ── Topic normalization ────────────────────────────────────── # Map variant spellings to canonical label _TOPIC_ALIASES: dict[str, str] = { "numpy": "NumPy", "naïve bayes": "Naive Bayes", "naïve bayes classifier": "Naive Bayes", "naive bayes classifier": "Naive Bayes", "bayes classifier": "Naive Bayes", "bayes model": "Naive Bayes", "bayes' theorem": "Naive Bayes", "bayes' rule": "Naive Bayes", "k-nearest neighbors": "K-Nearest Neighbors (KNN)", "knn": "K-Nearest Neighbors (KNN)", "k-means clustering": "K-Means Clustering", "k-means": "K-Means Clustering", "k means": "K-Means Clustering", "multilayer perceptron": "Multilayer Perceptron (MLP)", "multi-layer perceptron": "Multilayer Perceptron (MLP)", "multi-layer perceptron (mlp)": "Multilayer Perceptron (MLP)", "mlp": "Multilayer Perceptron (MLP)", "single layer perceptron": "Perceptron", "convolutional neural network": "CNN", "convolutional neural network (cnn)": "CNN", "convolutional neural networks": "CNN", "cnn architecture": "CNN", "cnn properties": "CNN", "python fundamentals": "Python", "python programming": "Python", "python implementation": "Python", "advanced python programming": "Python", "python programming: convolutional neural network": "CNN", "cross-validation": "Cross Validation", "model evaluation implementation": "Model Evaluation", "digital image processing": "Image Processing", "computer vision": "Image Processing", "array slicing": "Array Slicing", "slicing": "Array Slicing", "array indexing": "Array Slicing", "array reshaping": "Reshape", "array views": "Array Slicing", "view vs copy": "Array Slicing", "boolean indexing": "Array Slicing", "arange": "NumPy", "newaxis": "NumPy", "expand dims": "NumPy", "transpose": "NumPy", "type casting": "NumPy", "element-wise operation": "NumPy", "array reduction": "NumPy", "multi-dimensional array": "NumPy", "dot product": "NumPy", "vectorization": "NumPy", "activation functions": "Activation Function", "linear activation function": "Activation Function", "neural network architecture": "Neural Networks", "hidden layer": "Neural Networks", "deep learning": "Neural Networks", "deep learning frameworks": "Neural Networks", "alpha-beta pruning": "Alpha-Beta Pruning", "minimax algorithm": "Minimax", "ethics of ai": "AI Ethics", "ethics": "AI Ethics", "cosine distance": "Cosine Similarity", "distance calculation": "Distance Metrics", "euclidean distance": "Distance Metrics", "manhattan distance": "Distance Metrics", "hamming distance": "Distance Metrics", "precision": "Model Evaluation", "recall": "Model Evaluation", "f1 score": "Model Evaluation", "macro f1 score": "Model Evaluation", "accuracy": "Model Evaluation", "classification accuracy": "Model Evaluation", "confusion matrix": "Model Evaluation", "convolution operation": "Convolution", "dilated convolution": "Convolution", "3d convolution": "Convolution", "gaussian likelihood": "Probability", "gaussian distribution": "Probability", "categorical likelihood": "Probability", "conditional probability": "Probability", "total probability theorem": "Probability", "probability assumptions": "Probability", "tensorflow": "Keras", "model summary": "Keras", "model construction": "Keras", "trainable parameters": "Parameter Calculation", "parameter reduction": "Parameter Calculation", "output shape calculation": "Parameter Calculation", "shape calculation": "Parameter Calculation", } def normalize_topic(label: str) -> str: return _TOPIC_ALIASES.get(label.lower().strip(), label) def extract_topic_labels(question: dict) -> list[str]: labels: list[str] = [] raw_labels: list[str] = [] analytics_topic = question.get("analytics_topic") if analytics_topic: raw_labels.append(analytics_topic) for tag in question.get("topic_tags") or []: if tag and tag not in raw_labels: raw_labels.append(tag) if not raw_labels: for tag in question.get("topics") or []: if tag and tag not in raw_labels: raw_labels.append(tag) # Normalize and deduplicate seen: set[str] = set() for raw in raw_labels: norm = normalize_topic(raw) if norm not in seen: seen.add(norm) labels.append(norm) return labels def extract_question_family(question: dict) -> str: return ( question.get("question_format") or question.get("question_type") or "unknown" ) @router.get("/courses") async def list_courses(): """返回所有有 ready 状态试卷的课程列表""" sb = get_supabase() rows = ( sb.table("papers") .select("course_code") .eq("status", "ready") .execute() .data ) codes = sorted({row["course_code"] for row in rows if row.get("course_code")}) return codes @router.get("/course/{course_code}") async def get_course_analytics(course_code: str): sb = get_supabase() papers = ( sb.table("papers") .select("id, course_code, year, term, exam_type, part_label, status") .eq("course_code", course_code.upper()) .eq("status", "ready") .order("year", desc=True) .execute() .data ) if not papers: return { "course_code": course_code.upper(), "kpi": {"papers": 0, "questions": 0, "topics": 0, "difficulty": "N/A"}, "topic_frequency": [], "question_types": [], "difficulty_distribution": {"easy": 0, "medium": 0, "hard": 0}, "high_yield_topics": [], } paper_ids = [paper["id"] for paper in papers] questions = ( sb.table("paper_questions") .select( "id, paper_id, question_number, question_type, question_format, " "question_text, score, topics, analytics_topic, topic_tags, difficulty" ) .in_("paper_id", paper_ids) .order("display_order") .execute() .data ) papers_by_id = {paper["id"]: paper for paper in papers} total_questions = len(questions) topic_counter: Counter[str] = Counter() type_counter: Counter[str] = Counter() difficulty_counter: Counter[str] = Counter() topic_examples: dict[str, list[dict]] = defaultdict(list) difficulty_scores: list[int] = [] all_question_items: list[dict] = [] for question in questions: question_type = extract_question_family(question) type_counter[question_type] += 1 difficulty = question.get("difficulty") if difficulty in DIFFICULTY_SCORE: difficulty_counter[difficulty] += 1 difficulty_scores.append(DIFFICULTY_SCORE[difficulty]) paper = papers_by_id.get(question["paper_id"], {}) source_label = ( f"{paper.get('year', '')} {paper.get('term', '').title()} " f"{paper.get('exam_type', '').title()}" ).strip() if paper.get("part_label"): source_label = f"{source_label} Part {paper['part_label']}" topics = extract_topic_labels(question) q_item = { "paper_id": paper.get("id"), "source": source_label, "question_number": question["question_number"], "preview": question["question_text"][:220], "difficulty": question.get("difficulty"), "question_type": question_type, "year": paper.get("year"), "term": paper.get("term"), "exam_type": paper.get("exam_type"), "topics": topics, } all_question_items.append(q_item) for topic in topics: topic_counter[topic] += 1 topic_examples[topic].append(q_item) avg_difficulty = "N/A" if difficulty_scores: rounded = round(sum(difficulty_scores) / len(difficulty_scores)) avg_difficulty = DIFFICULTY_LABEL.get(rounded, "Medium") topic_frequency = [] for topic, count in topic_counter.most_common(): pct = round((count / total_questions) * 100) if total_questions else 0 topic_frequency.append( { "label": topic, "count": count, "pct": pct, "questions": topic_examples[topic], } ) question_types = [] for label, count in type_counter.most_common(): pct = round((count / total_questions) * 100) if total_questions else 0 question_types.append({"label": label, "count": count, "pct": pct}) return { "course_code": course_code.upper(), "kpi": { "papers": len(papers), "questions": total_questions, "topics": len(topic_counter), "difficulty": avg_difficulty, }, "topic_frequency": topic_frequency, "question_types": question_types, "all_questions": all_question_items, "difficulty_distribution": { "easy": difficulty_counter.get("easy", 0), "medium": difficulty_counter.get("medium", 0), "hard": difficulty_counter.get("hard", 0), }, "high_yield_topics": [topic for topic, _ in topic_counter.most_common(5)], }