Files
PastpaperMaster/supabase/seeds/comp2211_problem_taxonomy_backfill.sql
Zhao 7a09167261 Initial commit: PastPaper Master full stack
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-21 12:27:47 +07:00

110 lines
12 KiB
SQL
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- ============================================
-- PastPaper Master — COMP2211 problem-level taxonomy backfill
-- Seed Date: 2026-03-24
-- ============================================
--
-- Purpose:
-- 1. Backfill coarse taxonomy for COMP2211 question rows after the paper has been
-- processed into `paper_questions`.
-- 2. Use the audited cover-page problem mapping as the initial analytics baseline.
-- 3. Only fill empty taxonomy fields, so later fine-grained per-question curation
-- can safely overwrite these defaults.
WITH mapping AS (
SELECT *
FROM (
VALUES
('COMP2211-2022-fall-midterm', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
('COMP2211-2022-fall-midterm', '2', 'Python Fundamentals', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals']::TEXT[], ARRAY['code_tracing', 'implementation', 'debugging']::TEXT[], 'coding'),
('COMP2211-2022-fall-midterm', '3', 'Conditional Probability and Bayes Classifier', 'Probabilistic Models', 'Probabilistic Models', ARRAY['Probabilistic Models']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'classification_decision']::TEXT[], 'long_question'),
('COMP2211-2022-fall-midterm', '4', 'K-Nearest Neighbors', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'distance_calculation', 'algorithm_tracing']::TEXT[], 'long_question'),
('COMP2211-2022-fall-midterm', '5', 'K-Means Clustering', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'cluster_update', 'algorithm_tracing']::TEXT[], 'long_question'),
('COMP2211-2022-fall-midterm', '6', 'Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['manual_computation', 'weight_update', 'formula_application']::TEXT[], 'long_question'),
('COMP2211-2022-fall-midterm', '7', 'Multilayer Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['forward_pass', 'backpropagation', 'derivation']::TEXT[], 'long_question'),
('COMP2211-2022-spring-midterm', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
('COMP2211-2022-spring-midterm', '2', 'Python Fundamentals', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals']::TEXT[], ARRAY['code_tracing', 'implementation', 'debugging']::TEXT[], 'coding'),
('COMP2211-2022-spring-midterm', '3', 'Conditional Probability and Bayes Classifier', 'Probabilistic Models', 'Probabilistic Models', ARRAY['Probabilistic Models']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'classification_decision']::TEXT[], 'long_question'),
('COMP2211-2022-spring-midterm', '4', 'K-Nearest Neighbors', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'distance_calculation', 'algorithm_tracing']::TEXT[], 'long_question'),
('COMP2211-2022-spring-midterm', '5', 'K-Means Clustering', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'cluster_update', 'algorithm_tracing']::TEXT[], 'long_question'),
('COMP2211-2022-spring-midterm', '6', 'Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['manual_computation', 'weight_update', 'formula_application']::TEXT[], 'long_question'),
('COMP2211-2022-spring-midterm', '7', 'Perceptron and Multilayer Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['forward_pass', 'backpropagation', 'weight_update']::TEXT[], 'long_question'),
('COMP2211-2022-spring-final-part-a', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
('COMP2211-2022-spring-final-part-a', '2', 'Na¨ıve Bayes and K-Nearest Neighbors', NULL, 'Probabilistic Models', ARRAY['Probabilistic Models', 'KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'distance_calculation']::TEXT[], 'long_question'),
('COMP2211-2022-spring-final-part-a', '3', 'Multilayer Perceptron (MLP)', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['forward_pass', 'backpropagation', 'derivation']::TEXT[], 'long_question'),
('COMP2211-2022-spring-final-part-a', '4', 'Digital Image Processing', 'Vision and CNN', 'Vision and CNN', ARRAY['Vision and CNN']::TEXT[], ARRAY['manual_computation', 'filter_computation', 'architecture_reasoning']::TEXT[], 'long_question'),
('COMP2211-2022-spring-final-part-b', '1', 'Convolutional Neural Network (CNN)', 'Vision and CNN', 'Vision and CNN', ARRAY['Vision and CNN']::TEXT[], ARRAY['forward_pass', 'architecture_reasoning', 'manual_computation']::TEXT[], 'long_question'),
('COMP2211-2022-spring-final-part-b', '2', 'Python Programming: Convolutional Neural Network', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals', 'Vision and CNN']::TEXT[], ARRAY['implementation', 'code_tracing', 'debugging']::TEXT[], 'coding'),
('COMP2211-2022-spring-final-part-b', '3', 'Minimax and Alpha-Beta Pruning', 'Search and Games', 'Search and Games', ARRAY['Search and Games']::TEXT[], ARRAY['tree_search', 'pruning', 'manual_tracing']::TEXT[], 'long_question'),
('COMP2211-2022-spring-final-part-b', '4', 'Ethics of Artificial Intelligence', 'Ethics of AI', 'Ethics of AI', ARRAY['Ethics of AI']::TEXT[], ARRAY['concept_explanation', 'argumentation', 'comparison']::TEXT[], 'short_answer'),
('COMP2211-2023-spring-midterm', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
('COMP2211-2023-spring-midterm', '2', 'Python Fundamentals', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals']::TEXT[], ARRAY['code_tracing', 'implementation', 'debugging']::TEXT[], 'coding'),
('COMP2211-2023-spring-midterm', '3', 'Na¨ıve Bayes Classifier', 'Probabilistic Models', 'Probabilistic Models', ARRAY['Probabilistic Models']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'classification_decision']::TEXT[], 'long_question'),
('COMP2211-2023-spring-midterm', '4', 'K-Nearest Neighbors', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'distance_calculation', 'algorithm_tracing']::TEXT[], 'long_question'),
('COMP2211-2023-spring-midterm', '5', 'K-Means Clustering', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'cluster_update', 'algorithm_tracing']::TEXT[], 'long_question'),
('COMP2211-2023-spring-midterm', '6', 'Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['manual_computation', 'weight_update', 'formula_application']::TEXT[], 'long_question'),
('COMP2211-2023-spring-midterm', '7', 'Multilayer Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['forward_pass', 'backpropagation', 'derivation']::TEXT[], 'long_question'),
('COMP2211-2024-spring-midterm', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
('COMP2211-2024-spring-midterm', '2', 'Advanced Python for Artificial Intelligence', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals']::TEXT[], ARRAY['code_tracing', 'implementation', 'data_manipulation']::TEXT[], 'coding'),
('COMP2211-2024-spring-midterm', '3', 'Model Evaluation & Advanced Python Programming', 'Evaluation and Validation', 'Evaluation and Validation', ARRAY['Evaluation and Validation', 'Python Fundamentals']::TEXT[], ARRAY['metric_computation', 'experimental_design', 'implementation']::TEXT[], 'coding'),
('COMP2211-2024-spring-midterm', '4', 'Na¨ıve Bayes Classifier', 'Probabilistic Models', 'Probabilistic Models', ARRAY['Probabilistic Models']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'classification_decision']::TEXT[], 'long_question'),
('COMP2211-2024-spring-midterm', '5', 'K-Nearest Neighbors', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'distance_calculation', 'algorithm_tracing']::TEXT[], 'long_question'),
('COMP2211-2024-spring-midterm', '6', 'Leader Clustering', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'cluster_update', 'algorithm_tracing']::TEXT[], 'long_question'),
('COMP2211-2024-spring-midterm', '7', 'D-fold Cross Validation', 'Evaluation and Validation', 'Evaluation and Validation', ARRAY['Evaluation and Validation']::TEXT[], ARRAY['metric_computation', 'experimental_design', 'reasoning']::TEXT[], 'long_question'),
('COMP2211-2024-spring-final', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
('COMP2211-2024-spring-final', '2', 'Advanced Python: Image Processing with NumPy', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals', 'Vision and CNN']::TEXT[], ARRAY['implementation', 'data_manipulation', 'filter_computation']::TEXT[], 'coding'),
('COMP2211-2024-spring-final', '3', 'Na¨ıve Bayes, K-Nearest Neighbors and Perceptron', NULL, 'Probabilistic Models', ARRAY['Probabilistic Models', 'KNN and Clustering', 'Perceptron and MLP']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'distance_calculation', 'weight_update']::TEXT[], 'long_question'),
('COMP2211-2024-spring-final', '4', 'Multi-layer Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['forward_pass', 'backpropagation', 'derivation']::TEXT[], 'long_question'),
('COMP2211-2024-spring-final', '5', 'Digital Image Processing', 'Vision and CNN', 'Vision and CNN', ARRAY['Vision and CNN']::TEXT[], ARRAY['manual_computation', 'filter_computation', 'architecture_reasoning']::TEXT[], 'long_question'),
('COMP2211-2024-spring-final', '6', 'Dilated Convolution and Dropout', 'Vision and CNN', 'Vision and CNN', ARRAY['Vision and CNN']::TEXT[], ARRAY['architecture_reasoning', 'forward_pass', 'comparison']::TEXT[], 'long_question'),
('COMP2211-2024-spring-final', '7', 'Convolutional Neural Network', 'Vision and CNN', 'Vision and CNN', ARRAY['Vision and CNN']::TEXT[], ARRAY['architecture_reasoning', 'forward_pass', 'implementation']::TEXT[], 'long_question'),
('COMP2211-2024-spring-final', '8', 'Minimax and Alpha-Beta Pruning', 'Search and Games', 'Search and Games', ARRAY['Search and Games']::TEXT[], ARRAY['tree_search', 'pruning', 'manual_tracing']::TEXT[], 'long_question'),
('COMP2211-2024-spring-final', '9', 'Ethics of Artificial Intelligence', 'Ethics of AI', 'Ethics of AI', ARRAY['Ethics of AI']::TEXT[], ARRAY['concept_explanation', 'argumentation', 'comparison']::TEXT[], 'short_answer')
) AS t (
source_exam_key,
problem_number,
raw_topic,
analytics_topic,
topic_primary,
topic_tags,
skill_tags,
default_question_format
)
)
UPDATE paper_questions AS q
SET analytics_topic = COALESCE(q.analytics_topic, mapping.analytics_topic),
topic_primary = COALESCE(q.topic_primary, mapping.topic_primary),
topic_tags = CASE
WHEN q.topic_tags IS NULL OR cardinality(q.topic_tags) = 0 THEN mapping.topic_tags
ELSE q.topic_tags
END,
skill_tags = CASE
WHEN q.skill_tags IS NULL OR cardinality(q.skill_tags) = 0 THEN mapping.skill_tags
ELSE q.skill_tags
END,
topics = CASE
WHEN q.topics IS NULL OR cardinality(q.topics) = 0 THEN mapping.topic_tags
ELSE q.topics
END,
question_format = CASE
WHEN (q.question_format IS NULL OR q.question_format = '')
AND mapping.default_question_format IS NOT NULL
THEN mapping.default_question_format
ELSE q.question_format
END
FROM papers AS p
JOIN mapping
ON mapping.source_exam_key = p.source_exam_key
WHERE q.paper_id = p.id
AND p.source_kind = 'course_library'
AND p.course_code = 'COMP2211'
AND (
q.question_number = mapping.problem_number
OR q.question_number ~ ('^' || mapping.problem_number || '([^0-9].*)?$')
);