Initial commit: PastPaper Master full stack

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-21 12:15:35 +07:00
commit 7a09167261
105 changed files with 24799 additions and 0 deletions
--- a/supabase/migrations/001_init_schema.sql
+++ b/supabase/migrations/001_init_schema.sql
@@ -0,0 +1,207 @@
+-- ============================================
+-- PastPaper Master — 初始数据库 Schema
+-- Version: 001
+-- Date: 2025-03-11
+-- ============================================
+
+-- 启用必要的扩展
+CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
+
+-- ============================================
+-- Table 1: papers — 上传的试卷
+-- ============================================
+CREATE TABLE papers (
+  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+  user_id UUID NOT NULL REFERENCES auth.users(id) ON DELETE CASCADE,
+
+  -- 元信息（用户上传时填写）
+  course_code TEXT NOT NULL,                -- "COMP2011"
+  year INTEGER NOT NULL,                    -- 2024
+  term TEXT NOT NULL CHECK (term IN ('fall', 'spring', 'summer')),
+  exam_type TEXT NOT NULL CHECK (exam_type IN ('midterm', 'final', 'quiz')),
+
+  -- 文件 (Supabase Storage)
+  paper_file_url TEXT NOT NULL,             -- 试卷 PDF
+  answer_file_url TEXT,                     -- 答案 PDF（可选）
+
+  -- 处理状态
+  status TEXT NOT NULL DEFAULT 'uploaded'
+    CHECK (status IN ('uploaded', 'processing', 'ready', 'error')),
+  error_message TEXT,                       -- 处理失败时的错误信息
+
+  -- 提取的原始文本（缓存）
+  paper_extracted_text TEXT,
+  answer_extracted_text TEXT,
+
+  -- 整卷概览（AI 生成）
+  total_score INTEGER,
+  question_count INTEGER,
+  topics_summary JSONB,                     -- {"Linked List": 40, "Recursion": 30}
+  difficulty_level TEXT CHECK (difficulty_level IN ('easy', 'medium', 'hard')),
+
+  created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
+  updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
+);
+
+-- ============================================
+-- Table 2: paper_questions — 逐题数据
+-- ============================================
+CREATE TABLE paper_questions (
+  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+  paper_id UUID NOT NULL REFERENCES papers(id) ON DELETE CASCADE,
+
+  -- 题目标识
+  question_number TEXT NOT NULL,             -- "1", "1a", "2b"
+  parent_question TEXT,                      -- 子题的父题号: "1a" → "1"
+  display_order INTEGER NOT NULL,            -- 显示顺序
+
+  -- 题目内容
+  question_type TEXT NOT NULL
+    CHECK (question_type IN ('mc', 'fill_blank', 'long_question')),
+  question_text TEXT NOT NULL,               -- 题目原文
+  score INTEGER,                             -- 分值
+  page_number INTEGER,                       -- PDF 页码（左右联动）
+
+  -- 选择题专用
+  options JSONB,                             -- [{"label":"A","text":"..."},...]
+  correct_option TEXT,                       -- "B"
+
+  -- 填空题专用
+  correct_answer TEXT,                       -- 正确答案
+  accept_variants TEXT[],                    -- 等价表达 ["O(nlogn)","O(n log n)"]
+
+  -- 答案 PDF 提取的原始答案（所有题型）
+  raw_answer_text TEXT,
+
+  -- 知识点标签
+  topics TEXT[],                             -- ["Linked List","Pointer"]
+  difficulty TEXT CHECK (difficulty IN ('easy', 'medium', 'hard')),
+
+  -- AI 三件套（HTML + KaTeX）
+  knowledge_reminder TEXT,                   -- 知识点 Reminder
+  ai_hint TEXT,                              -- AI Hint
+  solution TEXT,                             -- Solution（逐步 derivation）
+
+  created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
+  updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
+);
+
+-- ============================================
+-- Table 3: user_attempts — 用户答题记录
+-- Phase 4 实现，先建好表结构
+-- ============================================
+CREATE TABLE user_attempts (
+  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+  user_id UUID NOT NULL REFERENCES auth.users(id) ON DELETE CASCADE,
+  question_id UUID NOT NULL REFERENCES paper_questions(id) ON DELETE CASCADE,
+
+  -- 用户的作答
+  attempt_type TEXT NOT NULL
+    CHECK (attempt_type IN ('select', 'input', 'photo')),
+  user_answer TEXT,                          -- 选项 / 输入的答案
+  photo_url TEXT,                            -- 上传的照片
+  photo_ocr_text TEXT,                       -- OCR 识别结果
+
+  -- AI 判定
+  is_correct BOOLEAN,
+  feedback TEXT,                             -- HTML — 逐步错误分析
+  error_at_step INTEGER,                     -- 第几步开始错
+
+  -- 错题本
+  in_error_book BOOLEAN NOT NULL DEFAULT false,
+  mastered BOOLEAN NOT NULL DEFAULT false,
+
+  created_at TIMESTAMPTZ NOT NULL DEFAULT now()
+);
+
+-- ============================================
+-- 索引
+-- ============================================
+CREATE INDEX idx_papers_user ON papers(user_id);
+CREATE INDEX idx_papers_course ON papers(course_code);
+CREATE INDEX idx_papers_status ON papers(status);
+
+CREATE INDEX idx_questions_paper ON paper_questions(paper_id);
+CREATE INDEX idx_questions_type ON paper_questions(question_type);
+CREATE INDEX idx_questions_topics ON paper_questions USING GIN(topics);
+
+CREATE INDEX idx_attempts_user ON user_attempts(user_id);
+CREATE INDEX idx_attempts_question ON user_attempts(question_id);
+CREATE INDEX idx_attempts_errorbook ON user_attempts(user_id)
+  WHERE in_error_book = true;
+
+-- ============================================
+-- RLS 策略
+-- ============================================
+ALTER TABLE papers ENABLE ROW LEVEL SECURITY;
+ALTER TABLE paper_questions ENABLE ROW LEVEL SECURITY;
+ALTER TABLE user_attempts ENABLE ROW LEVEL SECURITY;
+
+-- papers: 用户只能看自己上传的（以后加公共库时再调整）
+CREATE POLICY "Users can view own papers"
+  ON papers FOR SELECT
+  USING (auth.uid() = user_id);
+
+CREATE POLICY "Users can insert own papers"
+  ON papers FOR INSERT
+  WITH CHECK (auth.uid() = user_id);
+
+CREATE POLICY "Users can update own papers"
+  ON papers FOR UPDATE
+  USING (auth.uid() = user_id);
+
+CREATE POLICY "Users can delete own papers"
+  ON papers FOR DELETE
+  USING (auth.uid() = user_id);
+
+-- paper_questions: 跟随 paper 的权限
+CREATE POLICY "Users can view questions of own papers"
+  ON paper_questions FOR SELECT
+  USING (
+    EXISTS (
+      SELECT 1 FROM papers
+      WHERE papers.id = paper_questions.paper_id
+      AND papers.user_id = auth.uid()
+    )
+  );
+
+-- service_role 用于后端写入 questions（处理管线用）
+-- 前端不直接写 questions，通过 API 触发后端处理
+
+-- user_attempts: 用户只能看/写自己的
+CREATE POLICY "Users can view own attempts"
+  ON user_attempts FOR SELECT
+  USING (auth.uid() = user_id);
+
+CREATE POLICY "Users can insert own attempts"
+  ON user_attempts FOR INSERT
+  WITH CHECK (auth.uid() = user_id);
+
+CREATE POLICY "Users can update own attempts"
+  ON user_attempts FOR UPDATE
+  USING (auth.uid() = user_id);
+
+-- ============================================
+-- updated_at 自动更新触发器
+-- ============================================
+CREATE OR REPLACE FUNCTION update_updated_at()
+RETURNS TRIGGER AS $$
+BEGIN
+  NEW.updated_at = now();
+  RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE TRIGGER papers_updated_at
+  BEFORE UPDATE ON papers
+  FOR EACH ROW EXECUTE FUNCTION update_updated_at();
+
+CREATE TRIGGER questions_updated_at
+  BEFORE UPDATE ON paper_questions
+  FOR EACH ROW EXECUTE FUNCTION update_updated_at();
+
+-- ============================================
+-- Storage bucket
+-- ============================================
+-- 在 Supabase Dashboard 中手动创建 bucket: "papers"
+-- 或通过 API 创建（后端初始化时处理）
--- a/supabase/migrations/002_course_library_fields.sql
+++ b/supabase/migrations/002_course_library_fields.sql
@@ -0,0 +1,38 @@
+-- ============================================
+-- PastPaper Master — Shared course library fields
+-- Version: 002
+-- Date: 2026-03-24
+-- ============================================
+
+-- Shared library / canonical import metadata on papers
+ALTER TABLE papers
+  ADD COLUMN IF NOT EXISTS source_kind TEXT NOT NULL DEFAULT 'user_upload'
+    CHECK (source_kind IN ('user_upload', 'course_library')),
+  ADD COLUMN IF NOT EXISTS source_exam_key TEXT,
+  ADD COLUMN IF NOT EXISTS part_label TEXT
+    CHECK (part_label IN ('A', 'B')),
+  ADD COLUMN IF NOT EXISTS source_question_filename TEXT,
+  ADD COLUMN IF NOT EXISTS source_answer_filename TEXT;
+
+CREATE UNIQUE INDEX IF NOT EXISTS idx_papers_course_library_exam_key
+  ON papers(source_exam_key)
+  WHERE source_kind = 'course_library' AND source_exam_key IS NOT NULL;
+
+CREATE INDEX IF NOT EXISTS idx_papers_course_lookup
+  ON papers(course_code, year, term, exam_type, part_label);
+
+-- Grading results should persist awarded score
+ALTER TABLE user_attempts
+  ADD COLUMN IF NOT EXISTS score_given INTEGER;
+
+CREATE INDEX IF NOT EXISTS idx_attempts_errorbook_active
+  ON user_attempts(user_id, created_at DESC)
+  WHERE in_error_book = true AND mastered = false;
+
+-- The backend and frontend already support true_false; schema must match.
+ALTER TABLE paper_questions
+  DROP CONSTRAINT IF EXISTS paper_questions_question_type_check;
+
+ALTER TABLE paper_questions
+  ADD CONSTRAINT paper_questions_question_type_check
+  CHECK (question_type IN ('mc', 'true_false', 'fill_blank', 'long_question'));
--- a/supabase/migrations/003_question_taxonomy_fields.sql
+++ b/supabase/migrations/003_question_taxonomy_fields.sql
@@ -0,0 +1,41 @@
+-- ============================================
+-- PastPaper Master — Question taxonomy fields
+-- Version: 003
+-- Date: 2026-03-24
+-- ============================================
+
+-- A question needs multiple classification layers:
+-- 1) question_format: how the student interacts with it
+-- 2) topic_tags / topic_primary / analytics_topic: course knowledge taxonomy
+-- 3) skill_tags: what kind of thinking task the question requires
+ALTER TABLE paper_questions
+  ADD COLUMN IF NOT EXISTS question_format TEXT
+    CHECK (
+      question_format IN (
+        'mc',
+        'true_false',
+        'fill_blank',
+        'short_answer',
+        'long_answer',
+        'coding'
+      )
+    ),
+  ADD COLUMN IF NOT EXISTS topic_primary TEXT,
+  ADD COLUMN IF NOT EXISTS analytics_topic TEXT,
+  ADD COLUMN IF NOT EXISTS topic_tags TEXT[],
+  ADD COLUMN IF NOT EXISTS skill_tags TEXT[];
+
+-- Keep the legacy topics column for backward compatibility for now.
+-- New analytics and retrieval code should gradually move to analytics_topic/topic_tags.
+
+CREATE INDEX IF NOT EXISTS idx_questions_question_format
+  ON paper_questions(question_format);
+
+CREATE INDEX IF NOT EXISTS idx_questions_analytics_topic
+  ON paper_questions(analytics_topic);
+
+CREATE INDEX IF NOT EXISTS idx_questions_topic_tags
+  ON paper_questions USING GIN(topic_tags);
+
+CREATE INDEX IF NOT EXISTS idx_questions_skill_tags
+  ON paper_questions USING GIN(skill_tags);
--- a/supabase/migrations/004_decouple_course_library_from_auth.sql
+++ b/supabase/migrations/004_decouple_course_library_from_auth.sql
@@ -0,0 +1,30 @@
+-- ============================================
+-- PastPaper Master — Decouple course library papers from auth users
+-- Version: 004
+-- Date: 2026-03-24
+-- ============================================
+
+-- Course-library papers should not depend on a concrete auth.users row.
+-- User-uploaded papers still keep user_id populated.
+ALTER TABLE papers
+  ALTER COLUMN user_id DROP NOT NULL;
+
+-- Keep existing FK so user-owned papers can still reference auth.users,
+-- while course-library rows simply use NULL.
+
+-- Tighten the intended invariant with a check constraint:
+-- - user_upload rows must have user_id
+-- - course_library rows must not have user_id
+ALTER TABLE papers
+  DROP CONSTRAINT IF EXISTS papers_source_kind_user_id_check;
+
+ALTER TABLE papers
+  ADD CONSTRAINT papers_source_kind_user_id_check
+  CHECK (
+    (source_kind = 'user_upload' AND user_id IS NOT NULL)
+    OR
+    (source_kind = 'course_library' AND user_id IS NULL)
+  );
+
+-- Existing RLS policies continue to apply to user-owned rows.
+-- Course-library rows are accessed through the backend service role.
--- a/supabase/migrations/005_allow_long_question_format_alias.sql
+++ b/supabase/migrations/005_allow_long_question_format_alias.sql
@@ -0,0 +1,27 @@
+-- ============================================
+-- PastPaper Master — Allow legacy long_question format alias
+-- Version: 005
+-- Date: 2026-03-24
+-- ============================================
+--
+-- Some existing seeds and older generated SQL used `long_question` in the
+-- `question_format` column, while the 003 taxonomy migration introduced
+-- `long_answer` as the canonical value. Allow both temporarily so historical
+-- inserts do not fail. New generators should continue emitting `long_answer`.
+
+ALTER TABLE paper_questions
+  DROP CONSTRAINT IF EXISTS paper_questions_question_format_check;
+
+ALTER TABLE paper_questions
+  ADD CONSTRAINT paper_questions_question_format_check
+  CHECK (
+    question_format IN (
+      'mc',
+      'true_false',
+      'fill_blank',
+      'short_answer',
+      'long_answer',
+      'long_question',
+      'coding'
+    )
+  );
--- a/supabase/migrations/006_make_scores_numeric.sql
+++ b/supabase/migrations/006_make_scores_numeric.sql
@@ -0,0 +1,17 @@
+-- ============================================
+-- PastPaper Master — Make score fields numeric
+-- Version: 006
+-- Date: 2026-04-10
+-- ============================================
+
+ALTER TABLE paper_questions
+  ALTER COLUMN score TYPE NUMERIC
+  USING score::NUMERIC;
+
+ALTER TABLE papers
+  ALTER COLUMN total_score TYPE NUMERIC
+  USING total_score::NUMERIC;
+
+ALTER TABLE user_attempts
+  ALTER COLUMN score_given TYPE NUMERIC
+  USING score_given::NUMERIC;
--- a/supabase/migrations/007_fulltext_search.sql
+++ b/supabase/migrations/007_fulltext_search.sql
@@ -0,0 +1,36 @@
+-- 007: Full-text search on paper_questions.question_text
+--
+-- Adds a tsvector generated column (auto-maintained by PostgreSQL on every
+-- INSERT/UPDATE), a GIN index for fast @@ queries, and a batch-scoring RPC
+-- used by the similar-question retrieval endpoint.
+
+ALTER TABLE paper_questions
+  ADD COLUMN IF NOT EXISTS search_text tsvector
+  GENERATED ALWAYS AS (
+    to_tsvector('english', coalesce(question_text, ''))
+  ) STORED;
+
+CREATE INDEX IF NOT EXISTS idx_pq_search_text
+  ON paper_questions USING gin(search_text);
+
+-- text_similarity_scores(query_text, candidate_ids)
+--   Returns one row per candidate ID with a ts_rank_cd score normalised by
+--   unique word count (normalization flag = 1).  Questions that share no
+--   lexemes with the query still appear in the result with score = 0 so the
+--   caller always gets a complete score map for every candidate.
+CREATE OR REPLACE FUNCTION text_similarity_scores(
+  query_text    text,
+  candidate_ids uuid[]
+)
+RETURNS TABLE (question_id uuid, text_score float4)
+LANGUAGE sql STABLE AS $$
+  SELECT
+    id,
+    ts_rank_cd(
+      search_text,
+      plainto_tsquery('english', query_text),
+      1   -- normalise by unique word count
+    )::float4
+  FROM paper_questions
+  WHERE id = ANY(candidate_ids);
+$$;
--- a/supabase/migrations/008_add_page_y_ratio.sql
+++ b/supabase/migrations/008_add_page_y_ratio.sql
@@ -0,0 +1,2 @@
+ALTER TABLE paper_questions
+  ADD COLUMN IF NOT EXISTS page_y_ratio NUMERIC;
--- a/supabase/migrations/008_fix_storage_url_placeholder.sql
+++ b/supabase/migrations/008_fix_storage_url_placeholder.sql
@@ -0,0 +1,27 @@
+-- 008: Replace __SUPABASE_STORAGE_PUBLIC_BASE_URL__ placeholder in paper URLs
+--
+-- The course-library seed (comp2211_course_library_papers.sql) was inserted
+-- without substituting the placeholder.  This migration replaces it with the
+-- real Supabase Storage public base URL for the `papers` bucket.
+
+UPDATE papers
+SET paper_file_url = REPLACE(
+  paper_file_url,
+  '__SUPABASE_STORAGE_PUBLIC_BASE_URL__',
+  'https://pvcxipwovpwrurebouwg.supabase.co/storage/v1/object/public/papers'
+)
+WHERE paper_file_url LIKE '%__SUPABASE_STORAGE_PUBLIC_BASE_URL__%';
+
+UPDATE papers
+SET answer_file_url = REPLACE(
+  answer_file_url,
+  '__SUPABASE_STORAGE_PUBLIC_BASE_URL__',
+  'https://pvcxipwovpwrurebouwg.supabase.co/storage/v1/object/public/papers'
+)
+WHERE answer_file_url LIKE '%__SUPABASE_STORAGE_PUBLIC_BASE_URL__%';
+
+-- Verify: should return 0 rows
+SELECT id, course_code, year, term, exam_type, paper_file_url, answer_file_url
+FROM papers
+WHERE paper_file_url  LIKE '%__SUPABASE_STORAGE_PUBLIC_BASE_URL__%'
+   OR answer_file_url LIKE '%__SUPABASE_STORAGE_PUBLIC_BASE_URL__%';
--- a/supabase/seeds/comp2211_2022_fall_page_number_backfill.sql
+++ b/supabase/seeds/comp2211_2022_fall_page_number_backfill.sql
@@ -0,0 +1,52 @@
+UPDATE paper_questions
+SET page_number = CASE question_number
+  WHEN '1a' THEN 2
+  WHEN '1b' THEN 2
+  WHEN '1c' THEN 2
+  WHEN '1d' THEN 2
+  WHEN '1e' THEN 2
+  WHEN '1f' THEN 2
+  WHEN '1g' THEN 2
+  WHEN '1h' THEN 2
+  WHEN '1i' THEN 2
+  WHEN '1j' THEN 2
+  WHEN '2a_i' THEN 3
+  WHEN '2a_ii' THEN 3
+  WHEN '2a_iii' THEN 3
+  WHEN '2a_iv' THEN 3
+  WHEN '2a_v' THEN 4
+  WHEN '2a_vi' THEN 4
+  WHEN '2a_vii' THEN 4
+  WHEN '2b_i' THEN 5
+  WHEN '2b_ii' THEN 5
+  WHEN '2b_iii' THEN 5
+  WHEN '2c' THEN 6
+  WHEN '3a_i' THEN 8
+  WHEN '3a_ii' THEN 8
+  WHEN '3b_i' THEN 9
+  WHEN '3b_ii' THEN 9
+  WHEN '3b_iii' THEN 10
+  WHEN '3c' THEN 10
+  WHEN '3d' THEN 11
+  WHEN '4a' THEN 12
+  WHEN '4b' THEN 13
+  WHEN '4c' THEN 13
+  WHEN '4d' THEN 13
+  WHEN '5a' THEN 14
+  WHEN '5b' THEN 14
+  WHEN '5c' THEN 14
+  WHEN '5d' THEN 15
+  WHEN '5e' THEN 15
+  WHEN '5f' THEN 15
+  WHEN '6a' THEN 16
+  WHEN '6b_i' THEN 17
+  WHEN '6b_ii' THEN 17
+  WHEN '7a' THEN 18
+  WHEN '7b' THEN 18
+  ELSE page_number
+END
+WHERE paper_id = (
+  SELECT id
+  FROM papers
+  WHERE source_exam_key = 'COMP2211-2022-fall-midterm'
+);
--- a/supabase/seeds/comp2211_course_library_papers.sql
+++ b/supabase/seeds/comp2211_course_library_papers.sql
@@ -0,0 +1,148 @@
+-- ============================================
+-- PastPaper Master — COMP2211 course library papers
+-- Seed Date: 2026-03-24
+-- ============================================
+--
+-- Before running:
+-- 1. Upload the referenced PDFs into the `papers` bucket using the exact storage paths below.
+-- 2. Replace __SUPABASE_STORAGE_PUBLIC_BASE_URL__ with your project-specific public base URL.
+--
+-- Example base URL:
+-- https://<project-ref>.supabase.co/storage/v1/object/public/papers
+--
+-- This seed only inserts canonical, importable COMP2211 course-library papers.
+
+INSERT INTO papers (
+  user_id,
+  course_code,
+  year,
+  term,
+  exam_type,
+  part_label,
+  paper_file_url,
+  answer_file_url,
+  status,
+  source_kind,
+  source_exam_key,
+  source_question_filename,
+  source_answer_filename
+)
+VALUES
+  (
+    NULL,
+    'COMP2211',
+    2022,
+    'fall',
+    'midterm',
+    NULL,
+    '__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2022-fall-midterm/paper.pdf',
+    '__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2022-fall-midterm/answer.pdf',
+    'uploaded',
+    'course_library',
+    'COMP2211-2022-fall-midterm',
+    '(COMP2211)[2022](f)midterm~=yjz8dxdd^_27002.pdf',
+    '(COMP2211)[2022](f)midterm~=yjz8dxdd^_18747.pdf'
+  ),
+  (
+    NULL,
+    'COMP2211',
+    2022,
+    'spring',
+    'midterm',
+    NULL,
+    '__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2022-spring-midterm/paper.pdf',
+    '__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2022-spring-midterm/answer.pdf',
+    'uploaded',
+    'course_library',
+    'COMP2211-2022-spring-midterm',
+    '(COMP2211)[2022](s)midterm~=b8bidkgs^_14629.pdf',
+    '(COMP2211)[2022](s)midterm~=6ma030^_89587.pdf'
+  ),
+  (
+    NULL,
+    'COMP2211',
+    2022,
+    'spring',
+    'final',
+    'A',
+    '__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2022-spring-final-part-a/paper.pdf',
+    '__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2022-spring-final-part-a/answer.pdf',
+    'uploaded',
+    'course_library',
+    'COMP2211-2022-spring-final-part-a',
+    '(COMP2211)[2022](s)final~=b8bidkgs^_33018.pdf',
+    '(COMP2211)[2022](s)final~=ajou6^_82011.pdf'
+  ),
+  (
+    NULL,
+    'COMP2211',
+    2022,
+    'spring',
+    'final',
+    'B',
+    '__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2022-spring-final-part-b/paper.pdf',
+    '__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2022-spring-final-part-b/answer.pdf',
+    'uploaded',
+    'course_library',
+    'COMP2211-2022-spring-final-part-b',
+    '(COMP2211)[2022](s)final~=b8bidkgs^_40627.pdf',
+    '(COMP2211)[2022](s)final~=ajou6^_51199.pdf'
+  ),
+  (
+    NULL,
+    'COMP2211',
+    2023,
+    'spring',
+    'midterm',
+    NULL,
+    '__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2023-spring-midterm/paper.pdf',
+    '__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2023-spring-midterm/answer.pdf',
+    'uploaded',
+    'course_library',
+    'COMP2211-2023-spring-midterm',
+    '(COMP2211)[2023](s)midterm~=bxbidkmj^_26587.pdf',
+    '(COMP2211)[2023](s)midterm~clchanbg^_17297.pdf'
+  ),
+  (
+    NULL,
+    'COMP2211',
+    2024,
+    'spring',
+    'midterm',
+    NULL,
+    '__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2024-spring-midterm/paper.pdf',
+    '__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2024-spring-midterm/answer.pdf',
+    'uploaded',
+    'course_library',
+    'COMP2211-2024-spring-midterm',
+    '(COMP2211)[2024](s)midterm~=rcidkjgf^_82003.pdf',
+    '(COMP2211)[2024](s)midterm~=ubrzkjmz^_90406.pdf'
+  ),
+  (
+    NULL,
+    'COMP2211',
+    2024,
+    'spring',
+    'final',
+    NULL,
+    '__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2024-spring-final/paper.pdf',
+    '__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2024-spring-final/answer.pdf',
+    'uploaded',
+    'course_library',
+    'COMP2211-2024-spring-final',
+    '(COMP2211)[2024](s)final~=igk5mmg^_90365.pdf',
+    '(COMP2211)[2024](s)final~=igk5mmg^_58857.pdf'
+  )
+ON CONFLICT (source_exam_key)
+WHERE source_kind = 'course_library' AND source_exam_key IS NOT NULL
+DO UPDATE SET
+  course_code = EXCLUDED.course_code,
+  year = EXCLUDED.year,
+  term = EXCLUDED.term,
+  exam_type = EXCLUDED.exam_type,
+  part_label = EXCLUDED.part_label,
+  paper_file_url = EXCLUDED.paper_file_url,
+  answer_file_url = EXCLUDED.answer_file_url,
+  status = EXCLUDED.status,
+  source_question_filename = EXCLUDED.source_question_filename,
+  source_answer_filename = EXCLUDED.source_answer_filename;
--- a/supabase/seeds/comp2211_problem_level_questions.sql
+++ b/supabase/seeds/comp2211_problem_level_questions.sql
--- a/supabase/seeds/comp2211_problem_taxonomy_backfill.sql
+++ b/supabase/seeds/comp2211_problem_taxonomy_backfill.sql
@@ -0,0 +1,109 @@
+-- ============================================
+-- PastPaper Master — COMP2211 problem-level taxonomy backfill
+-- Seed Date: 2026-03-24
+-- ============================================
+--
+-- Purpose:
+-- 1. Backfill coarse taxonomy for COMP2211 question rows after the paper has been
+--    processed into `paper_questions`.
+-- 2. Use the audited cover-page problem mapping as the initial analytics baseline.
+-- 3. Only fill empty taxonomy fields, so later fine-grained per-question curation
+--    can safely overwrite these defaults.
+
+WITH mapping AS (
+  SELECT *
+  FROM (
+    VALUES
+      ('COMP2211-2022-fall-midterm', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
+      ('COMP2211-2022-fall-midterm', '2', 'Python Fundamentals', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals']::TEXT[], ARRAY['code_tracing', 'implementation', 'debugging']::TEXT[], 'coding'),
+      ('COMP2211-2022-fall-midterm', '3', 'Conditional Probability and Bayes Classifier', 'Probabilistic Models', 'Probabilistic Models', ARRAY['Probabilistic Models']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'classification_decision']::TEXT[], 'long_question'),
+      ('COMP2211-2022-fall-midterm', '4', 'K-Nearest Neighbors', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'distance_calculation', 'algorithm_tracing']::TEXT[], 'long_question'),
+      ('COMP2211-2022-fall-midterm', '5', 'K-Means Clustering', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'cluster_update', 'algorithm_tracing']::TEXT[], 'long_question'),
+      ('COMP2211-2022-fall-midterm', '6', 'Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['manual_computation', 'weight_update', 'formula_application']::TEXT[], 'long_question'),
+      ('COMP2211-2022-fall-midterm', '7', 'Multilayer Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['forward_pass', 'backpropagation', 'derivation']::TEXT[], 'long_question'),
+
+      ('COMP2211-2022-spring-midterm', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
+      ('COMP2211-2022-spring-midterm', '2', 'Python Fundamentals', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals']::TEXT[], ARRAY['code_tracing', 'implementation', 'debugging']::TEXT[], 'coding'),
+      ('COMP2211-2022-spring-midterm', '3', 'Conditional Probability and Bayes Classifier', 'Probabilistic Models', 'Probabilistic Models', ARRAY['Probabilistic Models']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'classification_decision']::TEXT[], 'long_question'),
+      ('COMP2211-2022-spring-midterm', '4', 'K-Nearest Neighbors', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'distance_calculation', 'algorithm_tracing']::TEXT[], 'long_question'),
+      ('COMP2211-2022-spring-midterm', '5', 'K-Means Clustering', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'cluster_update', 'algorithm_tracing']::TEXT[], 'long_question'),
+      ('COMP2211-2022-spring-midterm', '6', 'Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['manual_computation', 'weight_update', 'formula_application']::TEXT[], 'long_question'),
+      ('COMP2211-2022-spring-midterm', '7', 'Perceptron and Multilayer Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['forward_pass', 'backpropagation', 'weight_update']::TEXT[], 'long_question'),
+
+      ('COMP2211-2022-spring-final-part-a', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
+      ('COMP2211-2022-spring-final-part-a', '2', 'Na¨ıve Bayes and K-Nearest Neighbors', NULL, 'Probabilistic Models', ARRAY['Probabilistic Models', 'KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'distance_calculation']::TEXT[], 'long_question'),
+      ('COMP2211-2022-spring-final-part-a', '3', 'Multilayer Perceptron (MLP)', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['forward_pass', 'backpropagation', 'derivation']::TEXT[], 'long_question'),
+      ('COMP2211-2022-spring-final-part-a', '4', 'Digital Image Processing', 'Vision and CNN', 'Vision and CNN', ARRAY['Vision and CNN']::TEXT[], ARRAY['manual_computation', 'filter_computation', 'architecture_reasoning']::TEXT[], 'long_question'),
+
+      ('COMP2211-2022-spring-final-part-b', '1', 'Convolutional Neural Network (CNN)', 'Vision and CNN', 'Vision and CNN', ARRAY['Vision and CNN']::TEXT[], ARRAY['forward_pass', 'architecture_reasoning', 'manual_computation']::TEXT[], 'long_question'),
+      ('COMP2211-2022-spring-final-part-b', '2', 'Python Programming: Convolutional Neural Network', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals', 'Vision and CNN']::TEXT[], ARRAY['implementation', 'code_tracing', 'debugging']::TEXT[], 'coding'),
+      ('COMP2211-2022-spring-final-part-b', '3', 'Minimax and Alpha-Beta Pruning', 'Search and Games', 'Search and Games', ARRAY['Search and Games']::TEXT[], ARRAY['tree_search', 'pruning', 'manual_tracing']::TEXT[], 'long_question'),
+      ('COMP2211-2022-spring-final-part-b', '4', 'Ethics of Artificial Intelligence', 'Ethics of AI', 'Ethics of AI', ARRAY['Ethics of AI']::TEXT[], ARRAY['concept_explanation', 'argumentation', 'comparison']::TEXT[], 'short_answer'),
+
+      ('COMP2211-2023-spring-midterm', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
+      ('COMP2211-2023-spring-midterm', '2', 'Python Fundamentals', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals']::TEXT[], ARRAY['code_tracing', 'implementation', 'debugging']::TEXT[], 'coding'),
+      ('COMP2211-2023-spring-midterm', '3', 'Na¨ıve Bayes Classifier', 'Probabilistic Models', 'Probabilistic Models', ARRAY['Probabilistic Models']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'classification_decision']::TEXT[], 'long_question'),
+      ('COMP2211-2023-spring-midterm', '4', 'K-Nearest Neighbors', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'distance_calculation', 'algorithm_tracing']::TEXT[], 'long_question'),
+      ('COMP2211-2023-spring-midterm', '5', 'K-Means Clustering', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'cluster_update', 'algorithm_tracing']::TEXT[], 'long_question'),
+      ('COMP2211-2023-spring-midterm', '6', 'Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['manual_computation', 'weight_update', 'formula_application']::TEXT[], 'long_question'),
+      ('COMP2211-2023-spring-midterm', '7', 'Multilayer Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['forward_pass', 'backpropagation', 'derivation']::TEXT[], 'long_question'),
+
+      ('COMP2211-2024-spring-midterm', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
+      ('COMP2211-2024-spring-midterm', '2', 'Advanced Python for Artificial Intelligence', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals']::TEXT[], ARRAY['code_tracing', 'implementation', 'data_manipulation']::TEXT[], 'coding'),
+      ('COMP2211-2024-spring-midterm', '3', 'Model Evaluation & Advanced Python Programming', 'Evaluation and Validation', 'Evaluation and Validation', ARRAY['Evaluation and Validation', 'Python Fundamentals']::TEXT[], ARRAY['metric_computation', 'experimental_design', 'implementation']::TEXT[], 'coding'),
+      ('COMP2211-2024-spring-midterm', '4', 'Na¨ıve Bayes Classifier', 'Probabilistic Models', 'Probabilistic Models', ARRAY['Probabilistic Models']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'classification_decision']::TEXT[], 'long_question'),
+      ('COMP2211-2024-spring-midterm', '5', 'K-Nearest Neighbors', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'distance_calculation', 'algorithm_tracing']::TEXT[], 'long_question'),
+      ('COMP2211-2024-spring-midterm', '6', 'Leader Clustering', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'cluster_update', 'algorithm_tracing']::TEXT[], 'long_question'),
+      ('COMP2211-2024-spring-midterm', '7', 'D-fold Cross Validation', 'Evaluation and Validation', 'Evaluation and Validation', ARRAY['Evaluation and Validation']::TEXT[], ARRAY['metric_computation', 'experimental_design', 'reasoning']::TEXT[], 'long_question'),
+
+      ('COMP2211-2024-spring-final', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
+      ('COMP2211-2024-spring-final', '2', 'Advanced Python: Image Processing with NumPy', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals', 'Vision and CNN']::TEXT[], ARRAY['implementation', 'data_manipulation', 'filter_computation']::TEXT[], 'coding'),
+      ('COMP2211-2024-spring-final', '3', 'Na¨ıve Bayes, K-Nearest Neighbors and Perceptron', NULL, 'Probabilistic Models', ARRAY['Probabilistic Models', 'KNN and Clustering', 'Perceptron and MLP']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'distance_calculation', 'weight_update']::TEXT[], 'long_question'),
+      ('COMP2211-2024-spring-final', '4', 'Multi-layer Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['forward_pass', 'backpropagation', 'derivation']::TEXT[], 'long_question'),
+      ('COMP2211-2024-spring-final', '5', 'Digital Image Processing', 'Vision and CNN', 'Vision and CNN', ARRAY['Vision and CNN']::TEXT[], ARRAY['manual_computation', 'filter_computation', 'architecture_reasoning']::TEXT[], 'long_question'),
+      ('COMP2211-2024-spring-final', '6', 'Dilated Convolution and Dropout', 'Vision and CNN', 'Vision and CNN', ARRAY['Vision and CNN']::TEXT[], ARRAY['architecture_reasoning', 'forward_pass', 'comparison']::TEXT[], 'long_question'),
+      ('COMP2211-2024-spring-final', '7', 'Convolutional Neural Network', 'Vision and CNN', 'Vision and CNN', ARRAY['Vision and CNN']::TEXT[], ARRAY['architecture_reasoning', 'forward_pass', 'implementation']::TEXT[], 'long_question'),
+      ('COMP2211-2024-spring-final', '8', 'Minimax and Alpha-Beta Pruning', 'Search and Games', 'Search and Games', ARRAY['Search and Games']::TEXT[], ARRAY['tree_search', 'pruning', 'manual_tracing']::TEXT[], 'long_question'),
+      ('COMP2211-2024-spring-final', '9', 'Ethics of Artificial Intelligence', 'Ethics of AI', 'Ethics of AI', ARRAY['Ethics of AI']::TEXT[], ARRAY['concept_explanation', 'argumentation', 'comparison']::TEXT[], 'short_answer')
+  ) AS t (
+    source_exam_key,
+    problem_number,
+    raw_topic,
+    analytics_topic,
+    topic_primary,
+    topic_tags,
+    skill_tags,
+    default_question_format
+  )
+)
+UPDATE paper_questions AS q
+SET analytics_topic = COALESCE(q.analytics_topic, mapping.analytics_topic),
+    topic_primary = COALESCE(q.topic_primary, mapping.topic_primary),
+    topic_tags = CASE
+      WHEN q.topic_tags IS NULL OR cardinality(q.topic_tags) = 0 THEN mapping.topic_tags
+      ELSE q.topic_tags
+    END,
+    skill_tags = CASE
+      WHEN q.skill_tags IS NULL OR cardinality(q.skill_tags) = 0 THEN mapping.skill_tags
+      ELSE q.skill_tags
+    END,
+    topics = CASE
+      WHEN q.topics IS NULL OR cardinality(q.topics) = 0 THEN mapping.topic_tags
+      ELSE q.topics
+    END,
+    question_format = CASE
+      WHEN (q.question_format IS NULL OR q.question_format = '')
+        AND mapping.default_question_format IS NOT NULL
+      THEN mapping.default_question_format
+      ELSE q.question_format
+    END
+FROM papers AS p
+JOIN mapping
+  ON mapping.source_exam_key = p.source_exam_key
+WHERE q.paper_id = p.id
+  AND p.source_kind = 'course_library'
+  AND p.course_code = 'COMP2211'
+  AND (
+    q.question_number = mapping.problem_number
+    OR q.question_number ~ ('^' || mapping.problem_number || '([^0-9].*)?$')
+  );