"""Split COMP2211 Spring 2024 final into subquestions.""" from __future__ import annotations import json import re from dataclasses import dataclass from pathlib import Path from app.services.supabase_client import get_supabase EXAM_KEY = "COMP2211-2024-spring-final" PROBLEM_SEED_PATH = ( Path(__file__).resolve().parent.parent / "pastpaper-scraper" / "reviews" / "COMP2211" / "problem_seed.json" ) TRUE_FALSE_OPTIONS = [{"label": "True", "text": "True"}, {"label": "False", "text": "False"}] @dataclass(frozen=True) class ChildSpec: question_number: str parent_question: str top_level_number: str path: tuple[str, ...] score: float question_type: str question_format: str | None = None analytics_topic: str | None = None topic_primary: str | None = None topic_tags: tuple[str, ...] | None = None skill_tags: tuple[str, ...] | None = None options: tuple[tuple[str, str], ...] | None = None correct_option: str | None = None correct_answer: str | None = None page_number: int = 1 def short_answer( question_number: str, parent_question: str, top_level_number: str, path: tuple[str, ...], score: float, *, analytics_topic: str | None = None, topic_primary: str | None = None, topic_tags: tuple[str, ...] | None = None, skill_tags: tuple[str, ...] | None = None, correct_answer: str | None = None, page_number: int, ) -> ChildSpec: return ChildSpec( question_number=question_number, parent_question=parent_question, top_level_number=top_level_number, path=path, score=score, question_type="long_question", question_format="short_answer", analytics_topic=analytics_topic, topic_primary=topic_primary, topic_tags=topic_tags, skill_tags=skill_tags, correct_answer=correct_answer, page_number=page_number, ) CHILDREN: list[ChildSpec] = [ ChildSpec("1a", "1", "1", ("a",), 1, "true_false", "true_false", "Python Fundamentals", "Python Fundamentals", ("Python Fundamentals",), ("concept_check", "code_tracing"), page_number=2), ChildSpec("1b", "1", "1", ("b",), 1, "true_false", "true_false", "Probabilistic Models", "Probabilistic Models", ("Probabilistic Models",), ("concept_check", "classification_decision"), page_number=2), ChildSpec("1c", "1", "1", ("c",), 1, "true_false", "true_false", "KNN and Clustering", "KNN and Clustering", ("KNN and Clustering",), ("concept_check", "algorithm_property"), page_number=2), ChildSpec("1d", "1", "1", ("d",), 1, "true_false", "true_false", "KNN and Clustering", "KNN and Clustering", ("KNN and Clustering",), ("concept_check", "algorithm_property"), page_number=2), ChildSpec("1e", "1", "1", ("e",), 1, "true_false", "true_false", "Perceptron and MLP", "Perceptron and MLP", ("Perceptron and MLP",), ("concept_check", "activation_reasoning"), page_number=2), ChildSpec("1f", "1", "1", ("f",), 1, "true_false", "true_false", "Vision and CNN", "Vision and CNN", ("Vision and CNN",), ("concept_check", "image_processing"), page_number=2), ChildSpec("1g", "1", "1", ("g",), 1, "true_false", "true_false", "Vision and CNN", "Vision and CNN", ("Vision and CNN",), ("concept_check", "cnn_complexity"), page_number=2), ChildSpec("1h", "1", "1", ("h",), 1, "true_false", "true_false", "Vision and CNN", "Vision and CNN", ("Vision and CNN",), ("concept_check", "regularization"), page_number=2), ChildSpec("1i", "1", "1", ("i",), 1, "true_false", "true_false", "Search and Games", "Search and Games", ("Search and Games",), ("concept_check", "pruning_reasoning"), page_number=2), ChildSpec("1j", "1", "1", ("j",), 1, "true_false", "true_false", "Ethics of AI", "Ethics of AI", ("Ethics of AI",), ("concept_check", "research_ethics"), page_number=2), ChildSpec("2a", "2", "2", ("a",), 4, "long_question", "coding", "Python Fundamentals", "Python Fundamentals", ("Python Fundamentals",), ("implementation", "vectorization", "masking"), page_number=3), ChildSpec("2b", "2", "2", ("b",), 6, "long_question", "coding", "Python Fundamentals", "Python Fundamentals", ("Python Fundamentals",), ("implementation", "convolution", "array_manipulation"), page_number=4), short_answer("3a_i", "3a", "3", ("a", "i"), 1.5, analytics_topic="Probabilistic Models", topic_primary="Probabilistic Models", topic_tags=("Probabilistic Models",), skill_tags=("manual_computation", "probability_reasoning"), page_number=6), short_answer("3a_ii", "3a", "3", ("a", "ii"), 1.5, analytics_topic="Probabilistic Models", topic_primary="Probabilistic Models", topic_tags=("Probabilistic Models",), skill_tags=("manual_computation", "probability_reasoning"), page_number=6), short_answer("3a_iii", "3a", "3", ("a", "iii"), 1.5, analytics_topic="Probabilistic Models", topic_primary="Probabilistic Models", topic_tags=("Probabilistic Models",), skill_tags=("manual_computation", "probability_reasoning"), page_number=6), short_answer("3a_iv", "3a", "3", ("a", "iv"), 1.5, analytics_topic="Probabilistic Models", topic_primary="Probabilistic Models", topic_tags=("Probabilistic Models",), skill_tags=("manual_computation", "probability_reasoning"), page_number=6), short_answer("3b_i", "3b", "3", ("b", "i"), 1.5, analytics_topic="Evaluation and Validation", topic_primary="Evaluation and Validation", topic_tags=("Evaluation and Validation",), skill_tags=("validation_reasoning",), page_number=6), short_answer("3b_ii", "3b", "3", ("b", "ii"), 1.5, analytics_topic="Evaluation and Validation", topic_primary="Evaluation and Validation", topic_tags=("Evaluation and Validation",), skill_tags=("validation_reasoning",), page_number=6), short_answer("3b_iii", "3b", "3", ("b", "iii"), 1.5, analytics_topic="Evaluation and Validation", topic_primary="Evaluation and Validation", topic_tags=("Evaluation and Validation",), skill_tags=("validation_reasoning",), page_number=6), short_answer("3c", "3", "3", ("c",), 1.5, analytics_topic="Perceptron and MLP", topic_primary="Perceptron and MLP", topic_tags=("Perceptron and MLP",), skill_tags=("linearity_reasoning", "classification_decision"), page_number=6), short_answer("4a_i", "4a", "4", ("a", "i"), 2.5, analytics_topic="Perceptron and MLP", topic_primary="Perceptron and MLP", topic_tags=("Perceptron and MLP",), skill_tags=("parameter_counting",), page_number=7), short_answer("4a_ii", "4a", "4", ("a", "ii"), 2.5, analytics_topic="Perceptron and MLP", topic_primary="Perceptron and MLP", topic_tags=("Perceptron and MLP",), skill_tags=("model_selection",), page_number=7), short_answer("4b", "4", "4", ("b",), 1, analytics_topic="Perceptron and MLP", topic_primary="Perceptron and MLP", topic_tags=("Perceptron and MLP",), skill_tags=("concept_explanation",), page_number=7), short_answer("4c", "4", "4", ("c",), 2, analytics_topic="Perceptron and MLP", topic_primary="Perceptron and MLP", topic_tags=("Perceptron and MLP",), skill_tags=("activation_reasoning", "optimization_reasoning"), page_number=7), ChildSpec("4d_i", "4d", "4", ("d", "i"), 1.5, "long_question", "long_answer", "Perceptron and MLP", "Perceptron and MLP", ("Perceptron and MLP",), ("forward_pass", "activation_reasoning"), page_number=8), ChildSpec("4d_ii", "4d", "4", ("d", "ii"), 1.5, "long_question", "long_answer", "Perceptron and MLP", "Perceptron and MLP", ("Perceptron and MLP",), ("backpropagation", "weight_update"), page_number=8), ChildSpec("5a", "5", "5", ("a",), 4.5, "long_question", "long_answer", "Vision and CNN", "Vision and CNN", ("Vision and CNN",), ("histogram_reasoning", "image_transform"), page_number=9), ChildSpec("5b", "5", "5", ("b",), 3, "long_question", "long_answer", "Vision and CNN", "Vision and CNN", ("Vision and CNN",), ("thresholding", "manual_computation"), page_number=10), ChildSpec("5c", "5", "5", ("c",), 2, "long_question", "long_answer", "Vision and CNN", "Vision and CNN", ("Vision and CNN",), ("padding", "manual_construction"), page_number=10), short_answer("5d_i", "5d", "5", ("d", "i"), 0.5, analytics_topic="Vision and CNN", topic_primary="Vision and CNN", topic_tags=("Vision and CNN",), skill_tags=("filter_effect_reasoning",), page_number=11), short_answer("5d_ii", "5d", "5", ("d", "ii"), 0.5, analytics_topic="Vision and CNN", topic_primary="Vision and CNN", topic_tags=("Vision and CNN",), skill_tags=("filter_effect_reasoning",), page_number=11), short_answer("5d_iii", "5d", "5", ("d", "iii"), 0.5, analytics_topic="Vision and CNN", topic_primary="Vision and CNN", topic_tags=("Vision and CNN",), skill_tags=("filter_effect_reasoning",), page_number=11), short_answer("5e", "5", "5", ("e",), 2, analytics_topic="Vision and CNN", topic_primary="Vision and CNN", topic_tags=("Vision and CNN",), skill_tags=("concept_explanation", "local_vs_global"), page_number=11), ChildSpec("6a", "6", "6", ("a",), 10, "long_question", "coding", "Vision and CNN", "Vision and CNN", ("Vision and CNN",), ("implementation", "convolution", "debugging"), page_number=12), ChildSpec("6b", "6", "6", ("b",), 3, "long_question", "coding", "Vision and CNN", "Vision and CNN", ("Vision and CNN",), ("implementation", "regularization"), page_number=15), short_answer("7a_i", "7a", "7", ("a", "i"), 1, analytics_topic="Vision and CNN", topic_primary="Vision and CNN", topic_tags=("Vision and CNN",), skill_tags=("cnn_architecture",), page_number=16), short_answer("7a_ii", "7a", "7", ("a", "ii"), 4, analytics_topic="Vision and CNN", topic_primary="Vision and CNN", topic_tags=("Vision and CNN",), skill_tags=("shape_reasoning", "parameter_counting"), page_number=16), short_answer("7a_iii", "7a", "7", ("a", "iii"), 3, analytics_topic="Vision and CNN", topic_primary="Vision and CNN", topic_tags=("Vision and CNN",), skill_tags=("overfitting", "regularization"), page_number=16), ChildSpec("7b", "7", "7", ("b",), 5, "long_question", "long_answer", "Vision and CNN", "Vision and CNN", ("Vision and CNN",), ("manual_computation", "cnn_forward_pass"), page_number=17), short_answer("7c_i", "7c", "7", ("c", "i"), 2, analytics_topic="Vision and CNN", topic_primary="Vision and CNN", topic_tags=("Vision and CNN",), skill_tags=("shape_reasoning", "3d_convolution"), page_number=17), short_answer("7c_ii", "7c", "7", ("c", "ii"), 1.5, analytics_topic="Vision and CNN", topic_primary="Vision and CNN", topic_tags=("Vision and CNN",), skill_tags=("parameter_counting", "3d_convolution"), page_number=17), short_answer("7c_iii", "7c", "7", ("c", "iii"), 1.5, analytics_topic="Vision and CNN", topic_primary="Vision and CNN", topic_tags=("Vision and CNN",), skill_tags=("parameter_counting", "3d_convolution"), page_number=17), short_answer("8a_i", "8a", "8", ("a", "i"), 1, analytics_topic="Search and Games", topic_primary="Search and Games", topic_tags=("Search and Games",), skill_tags=("tree_search", "manual_tracing"), page_number=18), short_answer("8a_ii", "8a", "8", ("a", "ii"), 3, analytics_topic="Search and Games", topic_primary="Search and Games", topic_tags=("Search and Games",), skill_tags=("pruning", "manual_tracing"), page_number=18), short_answer("8a_iii", "8a", "8", ("a", "iii"), 1, analytics_topic="Search and Games", topic_primary="Search and Games", topic_tags=("Search and Games",), skill_tags=("game_reasoning",), page_number=18), short_answer("8b_i", "8b", "8", ("b", "i"), 2.5, analytics_topic="Search and Games", topic_primary="Search and Games", topic_tags=("Search and Games",), skill_tags=("utility_reasoning",), page_number=18), short_answer("8b_ii", "8b", "8", ("b", "ii"), 2.5, analytics_topic="Search and Games", topic_primary="Search and Games", topic_tags=("Search and Games",), skill_tags=("pruning_reasoning", "concept_explanation"), page_number=18), short_answer("9", "9", "9", (), 3, analytics_topic="Ethics of AI", topic_primary="Ethics of AI", topic_tags=("Ethics of AI",), skill_tags=("concept_explanation", "governance"), page_number=19), ] MARKER_RE = re.compile(r"(?m)^\(([a-z]+|[ivx]+)\)\s*") def split_sections(text: str) -> tuple[str, dict[str, str]]: matches = list(MARKER_RE.finditer(text)) if not matches: return text.strip(), {} intro = text[: matches[0].start()].strip() sections: dict[str, str] = {} for idx, match in enumerate(matches): marker = match.group(1) end = matches[idx + 1].start() if idx + 1 < len(matches) else len(text) sections[marker] = text[match.start() : end].strip() return intro, sections def extract_segment(text: str, path: tuple[str, ...]) -> str: if not path: return text.strip() current = text.strip() carried_intro: list[str] = [] for depth, marker in enumerate(path): intro, sections = split_sections(current) if depth == 0 and intro: carried_intro.append(intro) current = sections.get(marker, current) return "\n".join(part for part in [*carried_intro, current] if part).strip() def extract_true_false_answers(answer_text: str) -> dict[str, str]: answers: dict[str, str] = {} table_match = re.search(r"Answer\s+(T\s+F\s+T\s+F\s+F\s+T\s+F\s+F\s+F\s+T)", answer_text, re.S) if table_match: seq = re.findall(r"[TF]", table_match.group(1)) if len(seq) == 10: for idx, val in enumerate(seq): answers[chr(ord("a") + idx)] = val return answers seq = re.findall(r"\b([TF])\b", answer_text) if len(seq) >= 10: for idx, val in enumerate(seq[:10]): answers[chr(ord("a") + idx)] = val return answers def load_seed_rows() -> dict[str, dict]: data = json.loads(PROBLEM_SEED_PATH.read_text()) return {row["question_number"]: row for row in data if row["source_exam_key"] == EXAM_KEY} def main() -> None: sb = get_supabase() paper = sb.table("papers").select("id").eq("source_exam_key", EXAM_KEY).execute().data[0] paper_id = paper["id"] current_rows = ( sb.table("paper_questions") .select("*") .eq("paper_id", paper_id) .order("display_order") .execute() .data ) existing_by_number = {row["question_number"]: row for row in current_rows} parent_rows = load_seed_rows() tf_answers = extract_true_false_answers(parent_rows["1"]["raw_answer_text"] or "") inserts = [] for display_order, child in enumerate(CHILDREN, start=1): parent = parent_rows[child.top_level_number] existing = existing_by_number.get(child.question_number, {}) question_text = extract_segment(parent["question_text"] or "", child.path) raw_answer_text = extract_segment(parent["raw_answer_text"] or "", child.path) if child.path else (parent["raw_answer_text"] or "") options = None correct_option = child.correct_option if child.question_type == "true_false": options = TRUE_FALSE_OPTIONS correct_option = tf_answers.get(child.path[0]) elif child.options: options = [{"label": label, "text": text} for label, text in child.options] inserts.append( { "paper_id": paper_id, "question_number": child.question_number, "parent_question": child.parent_question, "display_order": display_order, "question_type": child.question_type, "question_format": child.question_format, "question_text": question_text, "score": child.score, "page_number": child.page_number, "page_y_ratio": existing.get("page_y_ratio"), "options": options, "correct_option": correct_option, "correct_answer": child.correct_answer, "raw_answer_text": raw_answer_text, "topics": existing.get("topics") or (list(child.topic_tags) if child.topic_tags else parent.get("topics")), "topic_primary": existing.get("topic_primary") or child.topic_primary or parent.get("topic_primary"), "analytics_topic": existing.get("analytics_topic") or child.analytics_topic or parent.get("analytics_topic"), "topic_tags": existing.get("topic_tags") or (list(child.topic_tags) if child.topic_tags else parent.get("topic_tags")), "skill_tags": existing.get("skill_tags") or (list(child.skill_tags) if child.skill_tags else parent.get("skill_tags")), "difficulty": existing.get("difficulty") or parent.get("difficulty"), "knowledge_reminder": existing.get("knowledge_reminder", ""), "ai_hint": existing.get("ai_hint", ""), "solution": existing.get("solution", ""), } ) sb.table("paper_questions").delete().eq("paper_id", paper_id).execute() sb.table("paper_questions").insert(inserts).execute() sb.table("papers").update({"question_count": len(inserts), "status": "processing"}).eq("id", paper_id).execute() print(f"Inserted {len(inserts)} rows for {EXAM_KEY}.") if __name__ == "__main__": main()