Initial commit: PastPaper Master full stack

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Zhao
2026-04-21 12:15:35 +07:00
commit 7a09167261
105 changed files with 24799 additions and 0 deletions

View File

@@ -0,0 +1,207 @@
-- ============================================
-- PastPaper Master — 初始数据库 Schema
-- Version: 001
-- Date: 2025-03-11
-- ============================================
-- 启用必要的扩展
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- ============================================
-- Table 1: papers — 上传的试卷
-- ============================================
CREATE TABLE papers (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL REFERENCES auth.users(id) ON DELETE CASCADE,
-- 元信息(用户上传时填写)
course_code TEXT NOT NULL, -- "COMP2011"
year INTEGER NOT NULL, -- 2024
term TEXT NOT NULL CHECK (term IN ('fall', 'spring', 'summer')),
exam_type TEXT NOT NULL CHECK (exam_type IN ('midterm', 'final', 'quiz')),
-- 文件 (Supabase Storage)
paper_file_url TEXT NOT NULL, -- 试卷 PDF
answer_file_url TEXT, -- 答案 PDF可选
-- 处理状态
status TEXT NOT NULL DEFAULT 'uploaded'
CHECK (status IN ('uploaded', 'processing', 'ready', 'error')),
error_message TEXT, -- 处理失败时的错误信息
-- 提取的原始文本(缓存)
paper_extracted_text TEXT,
answer_extracted_text TEXT,
-- 整卷概览AI 生成)
total_score INTEGER,
question_count INTEGER,
topics_summary JSONB, -- {"Linked List": 40, "Recursion": 30}
difficulty_level TEXT CHECK (difficulty_level IN ('easy', 'medium', 'hard')),
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- ============================================
-- Table 2: paper_questions — 逐题数据
-- ============================================
CREATE TABLE paper_questions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
paper_id UUID NOT NULL REFERENCES papers(id) ON DELETE CASCADE,
-- 题目标识
question_number TEXT NOT NULL, -- "1", "1a", "2b"
parent_question TEXT, -- 子题的父题号: "1a" → "1"
display_order INTEGER NOT NULL, -- 显示顺序
-- 题目内容
question_type TEXT NOT NULL
CHECK (question_type IN ('mc', 'fill_blank', 'long_question')),
question_text TEXT NOT NULL, -- 题目原文
score INTEGER, -- 分值
page_number INTEGER, -- PDF 页码(左右联动)
-- 选择题专用
options JSONB, -- [{"label":"A","text":"..."},...]
correct_option TEXT, -- "B"
-- 填空题专用
correct_answer TEXT, -- 正确答案
accept_variants TEXT[], -- 等价表达 ["O(nlogn)","O(n log n)"]
-- 答案 PDF 提取的原始答案(所有题型)
raw_answer_text TEXT,
-- 知识点标签
topics TEXT[], -- ["Linked List","Pointer"]
difficulty TEXT CHECK (difficulty IN ('easy', 'medium', 'hard')),
-- AI 三件套HTML + KaTeX
knowledge_reminder TEXT, -- 知识点 Reminder
ai_hint TEXT, -- AI Hint
solution TEXT, -- Solution逐步 derivation
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- ============================================
-- Table 3: user_attempts — 用户答题记录
-- Phase 4 实现,先建好表结构
-- ============================================
CREATE TABLE user_attempts (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL REFERENCES auth.users(id) ON DELETE CASCADE,
question_id UUID NOT NULL REFERENCES paper_questions(id) ON DELETE CASCADE,
-- 用户的作答
attempt_type TEXT NOT NULL
CHECK (attempt_type IN ('select', 'input', 'photo')),
user_answer TEXT, -- 选项 / 输入的答案
photo_url TEXT, -- 上传的照片
photo_ocr_text TEXT, -- OCR 识别结果
-- AI 判定
is_correct BOOLEAN,
feedback TEXT, -- HTML — 逐步错误分析
error_at_step INTEGER, -- 第几步开始错
-- 错题本
in_error_book BOOLEAN NOT NULL DEFAULT false,
mastered BOOLEAN NOT NULL DEFAULT false,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- ============================================
-- 索引
-- ============================================
CREATE INDEX idx_papers_user ON papers(user_id);
CREATE INDEX idx_papers_course ON papers(course_code);
CREATE INDEX idx_papers_status ON papers(status);
CREATE INDEX idx_questions_paper ON paper_questions(paper_id);
CREATE INDEX idx_questions_type ON paper_questions(question_type);
CREATE INDEX idx_questions_topics ON paper_questions USING GIN(topics);
CREATE INDEX idx_attempts_user ON user_attempts(user_id);
CREATE INDEX idx_attempts_question ON user_attempts(question_id);
CREATE INDEX idx_attempts_errorbook ON user_attempts(user_id)
WHERE in_error_book = true;
-- ============================================
-- RLS 策略
-- ============================================
ALTER TABLE papers ENABLE ROW LEVEL SECURITY;
ALTER TABLE paper_questions ENABLE ROW LEVEL SECURITY;
ALTER TABLE user_attempts ENABLE ROW LEVEL SECURITY;
-- papers: 用户只能看自己上传的(以后加公共库时再调整)
CREATE POLICY "Users can view own papers"
ON papers FOR SELECT
USING (auth.uid() = user_id);
CREATE POLICY "Users can insert own papers"
ON papers FOR INSERT
WITH CHECK (auth.uid() = user_id);
CREATE POLICY "Users can update own papers"
ON papers FOR UPDATE
USING (auth.uid() = user_id);
CREATE POLICY "Users can delete own papers"
ON papers FOR DELETE
USING (auth.uid() = user_id);
-- paper_questions: 跟随 paper 的权限
CREATE POLICY "Users can view questions of own papers"
ON paper_questions FOR SELECT
USING (
EXISTS (
SELECT 1 FROM papers
WHERE papers.id = paper_questions.paper_id
AND papers.user_id = auth.uid()
)
);
-- service_role 用于后端写入 questions处理管线用
-- 前端不直接写 questions通过 API 触发后端处理
-- user_attempts: 用户只能看/写自己的
CREATE POLICY "Users can view own attempts"
ON user_attempts FOR SELECT
USING (auth.uid() = user_id);
CREATE POLICY "Users can insert own attempts"
ON user_attempts FOR INSERT
WITH CHECK (auth.uid() = user_id);
CREATE POLICY "Users can update own attempts"
ON user_attempts FOR UPDATE
USING (auth.uid() = user_id);
-- ============================================
-- updated_at 自动更新触发器
-- ============================================
CREATE OR REPLACE FUNCTION update_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = now();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER papers_updated_at
BEFORE UPDATE ON papers
FOR EACH ROW EXECUTE FUNCTION update_updated_at();
CREATE TRIGGER questions_updated_at
BEFORE UPDATE ON paper_questions
FOR EACH ROW EXECUTE FUNCTION update_updated_at();
-- ============================================
-- Storage bucket
-- ============================================
-- 在 Supabase Dashboard 中手动创建 bucket: "papers"
-- 或通过 API 创建(后端初始化时处理)

View File

@@ -0,0 +1,38 @@
-- ============================================
-- PastPaper Master — Shared course library fields
-- Version: 002
-- Date: 2026-03-24
-- ============================================
-- Shared library / canonical import metadata on papers
ALTER TABLE papers
ADD COLUMN IF NOT EXISTS source_kind TEXT NOT NULL DEFAULT 'user_upload'
CHECK (source_kind IN ('user_upload', 'course_library')),
ADD COLUMN IF NOT EXISTS source_exam_key TEXT,
ADD COLUMN IF NOT EXISTS part_label TEXT
CHECK (part_label IN ('A', 'B')),
ADD COLUMN IF NOT EXISTS source_question_filename TEXT,
ADD COLUMN IF NOT EXISTS source_answer_filename TEXT;
CREATE UNIQUE INDEX IF NOT EXISTS idx_papers_course_library_exam_key
ON papers(source_exam_key)
WHERE source_kind = 'course_library' AND source_exam_key IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_papers_course_lookup
ON papers(course_code, year, term, exam_type, part_label);
-- Grading results should persist awarded score
ALTER TABLE user_attempts
ADD COLUMN IF NOT EXISTS score_given INTEGER;
CREATE INDEX IF NOT EXISTS idx_attempts_errorbook_active
ON user_attempts(user_id, created_at DESC)
WHERE in_error_book = true AND mastered = false;
-- The backend and frontend already support true_false; schema must match.
ALTER TABLE paper_questions
DROP CONSTRAINT IF EXISTS paper_questions_question_type_check;
ALTER TABLE paper_questions
ADD CONSTRAINT paper_questions_question_type_check
CHECK (question_type IN ('mc', 'true_false', 'fill_blank', 'long_question'));

View File

@@ -0,0 +1,41 @@
-- ============================================
-- PastPaper Master — Question taxonomy fields
-- Version: 003
-- Date: 2026-03-24
-- ============================================
-- A question needs multiple classification layers:
-- 1) question_format: how the student interacts with it
-- 2) topic_tags / topic_primary / analytics_topic: course knowledge taxonomy
-- 3) skill_tags: what kind of thinking task the question requires
ALTER TABLE paper_questions
ADD COLUMN IF NOT EXISTS question_format TEXT
CHECK (
question_format IN (
'mc',
'true_false',
'fill_blank',
'short_answer',
'long_answer',
'coding'
)
),
ADD COLUMN IF NOT EXISTS topic_primary TEXT,
ADD COLUMN IF NOT EXISTS analytics_topic TEXT,
ADD COLUMN IF NOT EXISTS topic_tags TEXT[],
ADD COLUMN IF NOT EXISTS skill_tags TEXT[];
-- Keep the legacy topics column for backward compatibility for now.
-- New analytics and retrieval code should gradually move to analytics_topic/topic_tags.
CREATE INDEX IF NOT EXISTS idx_questions_question_format
ON paper_questions(question_format);
CREATE INDEX IF NOT EXISTS idx_questions_analytics_topic
ON paper_questions(analytics_topic);
CREATE INDEX IF NOT EXISTS idx_questions_topic_tags
ON paper_questions USING GIN(topic_tags);
CREATE INDEX IF NOT EXISTS idx_questions_skill_tags
ON paper_questions USING GIN(skill_tags);

View File

@@ -0,0 +1,30 @@
-- ============================================
-- PastPaper Master — Decouple course library papers from auth users
-- Version: 004
-- Date: 2026-03-24
-- ============================================
-- Course-library papers should not depend on a concrete auth.users row.
-- User-uploaded papers still keep user_id populated.
ALTER TABLE papers
ALTER COLUMN user_id DROP NOT NULL;
-- Keep existing FK so user-owned papers can still reference auth.users,
-- while course-library rows simply use NULL.
-- Tighten the intended invariant with a check constraint:
-- - user_upload rows must have user_id
-- - course_library rows must not have user_id
ALTER TABLE papers
DROP CONSTRAINT IF EXISTS papers_source_kind_user_id_check;
ALTER TABLE papers
ADD CONSTRAINT papers_source_kind_user_id_check
CHECK (
(source_kind = 'user_upload' AND user_id IS NOT NULL)
OR
(source_kind = 'course_library' AND user_id IS NULL)
);
-- Existing RLS policies continue to apply to user-owned rows.
-- Course-library rows are accessed through the backend service role.

View File

@@ -0,0 +1,27 @@
-- ============================================
-- PastPaper Master — Allow legacy long_question format alias
-- Version: 005
-- Date: 2026-03-24
-- ============================================
--
-- Some existing seeds and older generated SQL used `long_question` in the
-- `question_format` column, while the 003 taxonomy migration introduced
-- `long_answer` as the canonical value. Allow both temporarily so historical
-- inserts do not fail. New generators should continue emitting `long_answer`.
ALTER TABLE paper_questions
DROP CONSTRAINT IF EXISTS paper_questions_question_format_check;
ALTER TABLE paper_questions
ADD CONSTRAINT paper_questions_question_format_check
CHECK (
question_format IN (
'mc',
'true_false',
'fill_blank',
'short_answer',
'long_answer',
'long_question',
'coding'
)
);

View File

@@ -0,0 +1,17 @@
-- ============================================
-- PastPaper Master — Make score fields numeric
-- Version: 006
-- Date: 2026-04-10
-- ============================================
ALTER TABLE paper_questions
ALTER COLUMN score TYPE NUMERIC
USING score::NUMERIC;
ALTER TABLE papers
ALTER COLUMN total_score TYPE NUMERIC
USING total_score::NUMERIC;
ALTER TABLE user_attempts
ALTER COLUMN score_given TYPE NUMERIC
USING score_given::NUMERIC;

View File

@@ -0,0 +1,36 @@
-- 007: Full-text search on paper_questions.question_text
--
-- Adds a tsvector generated column (auto-maintained by PostgreSQL on every
-- INSERT/UPDATE), a GIN index for fast @@ queries, and a batch-scoring RPC
-- used by the similar-question retrieval endpoint.
ALTER TABLE paper_questions
ADD COLUMN IF NOT EXISTS search_text tsvector
GENERATED ALWAYS AS (
to_tsvector('english', coalesce(question_text, ''))
) STORED;
CREATE INDEX IF NOT EXISTS idx_pq_search_text
ON paper_questions USING gin(search_text);
-- text_similarity_scores(query_text, candidate_ids)
-- Returns one row per candidate ID with a ts_rank_cd score normalised by
-- unique word count (normalization flag = 1). Questions that share no
-- lexemes with the query still appear in the result with score = 0 so the
-- caller always gets a complete score map for every candidate.
CREATE OR REPLACE FUNCTION text_similarity_scores(
query_text text,
candidate_ids uuid[]
)
RETURNS TABLE (question_id uuid, text_score float4)
LANGUAGE sql STABLE AS $$
SELECT
id,
ts_rank_cd(
search_text,
plainto_tsquery('english', query_text),
1 -- normalise by unique word count
)::float4
FROM paper_questions
WHERE id = ANY(candidate_ids);
$$;

View File

@@ -0,0 +1,2 @@
ALTER TABLE paper_questions
ADD COLUMN IF NOT EXISTS page_y_ratio NUMERIC;

View File

@@ -0,0 +1,27 @@
-- 008: Replace __SUPABASE_STORAGE_PUBLIC_BASE_URL__ placeholder in paper URLs
--
-- The course-library seed (comp2211_course_library_papers.sql) was inserted
-- without substituting the placeholder. This migration replaces it with the
-- real Supabase Storage public base URL for the `papers` bucket.
UPDATE papers
SET paper_file_url = REPLACE(
paper_file_url,
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__',
'https://pvcxipwovpwrurebouwg.supabase.co/storage/v1/object/public/papers'
)
WHERE paper_file_url LIKE '%__SUPABASE_STORAGE_PUBLIC_BASE_URL__%';
UPDATE papers
SET answer_file_url = REPLACE(
answer_file_url,
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__',
'https://pvcxipwovpwrurebouwg.supabase.co/storage/v1/object/public/papers'
)
WHERE answer_file_url LIKE '%__SUPABASE_STORAGE_PUBLIC_BASE_URL__%';
-- Verify: should return 0 rows
SELECT id, course_code, year, term, exam_type, paper_file_url, answer_file_url
FROM papers
WHERE paper_file_url LIKE '%__SUPABASE_STORAGE_PUBLIC_BASE_URL__%'
OR answer_file_url LIKE '%__SUPABASE_STORAGE_PUBLIC_BASE_URL__%';

View File

@@ -0,0 +1,52 @@
UPDATE paper_questions
SET page_number = CASE question_number
WHEN '1a' THEN 2
WHEN '1b' THEN 2
WHEN '1c' THEN 2
WHEN '1d' THEN 2
WHEN '1e' THEN 2
WHEN '1f' THEN 2
WHEN '1g' THEN 2
WHEN '1h' THEN 2
WHEN '1i' THEN 2
WHEN '1j' THEN 2
WHEN '2a_i' THEN 3
WHEN '2a_ii' THEN 3
WHEN '2a_iii' THEN 3
WHEN '2a_iv' THEN 3
WHEN '2a_v' THEN 4
WHEN '2a_vi' THEN 4
WHEN '2a_vii' THEN 4
WHEN '2b_i' THEN 5
WHEN '2b_ii' THEN 5
WHEN '2b_iii' THEN 5
WHEN '2c' THEN 6
WHEN '3a_i' THEN 8
WHEN '3a_ii' THEN 8
WHEN '3b_i' THEN 9
WHEN '3b_ii' THEN 9
WHEN '3b_iii' THEN 10
WHEN '3c' THEN 10
WHEN '3d' THEN 11
WHEN '4a' THEN 12
WHEN '4b' THEN 13
WHEN '4c' THEN 13
WHEN '4d' THEN 13
WHEN '5a' THEN 14
WHEN '5b' THEN 14
WHEN '5c' THEN 14
WHEN '5d' THEN 15
WHEN '5e' THEN 15
WHEN '5f' THEN 15
WHEN '6a' THEN 16
WHEN '6b_i' THEN 17
WHEN '6b_ii' THEN 17
WHEN '7a' THEN 18
WHEN '7b' THEN 18
ELSE page_number
END
WHERE paper_id = (
SELECT id
FROM papers
WHERE source_exam_key = 'COMP2211-2022-fall-midterm'
);

View File

@@ -0,0 +1,148 @@
-- ============================================
-- PastPaper Master — COMP2211 course library papers
-- Seed Date: 2026-03-24
-- ============================================
--
-- Before running:
-- 1. Upload the referenced PDFs into the `papers` bucket using the exact storage paths below.
-- 2. Replace __SUPABASE_STORAGE_PUBLIC_BASE_URL__ with your project-specific public base URL.
--
-- Example base URL:
-- https://<project-ref>.supabase.co/storage/v1/object/public/papers
--
-- This seed only inserts canonical, importable COMP2211 course-library papers.
INSERT INTO papers (
user_id,
course_code,
year,
term,
exam_type,
part_label,
paper_file_url,
answer_file_url,
status,
source_kind,
source_exam_key,
source_question_filename,
source_answer_filename
)
VALUES
(
NULL,
'COMP2211',
2022,
'fall',
'midterm',
NULL,
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2022-fall-midterm/paper.pdf',
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2022-fall-midterm/answer.pdf',
'uploaded',
'course_library',
'COMP2211-2022-fall-midterm',
'(COMP2211)[2022](f)midterm~=yjz8dxdd^_27002.pdf',
'(COMP2211)[2022](f)midterm~=yjz8dxdd^_18747.pdf'
),
(
NULL,
'COMP2211',
2022,
'spring',
'midterm',
NULL,
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2022-spring-midterm/paper.pdf',
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2022-spring-midterm/answer.pdf',
'uploaded',
'course_library',
'COMP2211-2022-spring-midterm',
'(COMP2211)[2022](s)midterm~=b8bidkgs^_14629.pdf',
'(COMP2211)[2022](s)midterm~=6ma030^_89587.pdf'
),
(
NULL,
'COMP2211',
2022,
'spring',
'final',
'A',
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2022-spring-final-part-a/paper.pdf',
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2022-spring-final-part-a/answer.pdf',
'uploaded',
'course_library',
'COMP2211-2022-spring-final-part-a',
'(COMP2211)[2022](s)final~=b8bidkgs^_33018.pdf',
'(COMP2211)[2022](s)final~=ajou6^_82011.pdf'
),
(
NULL,
'COMP2211',
2022,
'spring',
'final',
'B',
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2022-spring-final-part-b/paper.pdf',
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2022-spring-final-part-b/answer.pdf',
'uploaded',
'course_library',
'COMP2211-2022-spring-final-part-b',
'(COMP2211)[2022](s)final~=b8bidkgs^_40627.pdf',
'(COMP2211)[2022](s)final~=ajou6^_51199.pdf'
),
(
NULL,
'COMP2211',
2023,
'spring',
'midterm',
NULL,
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2023-spring-midterm/paper.pdf',
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2023-spring-midterm/answer.pdf',
'uploaded',
'course_library',
'COMP2211-2023-spring-midterm',
'(COMP2211)[2023](s)midterm~=bxbidkmj^_26587.pdf',
'(COMP2211)[2023](s)midterm~clchanbg^_17297.pdf'
),
(
NULL,
'COMP2211',
2024,
'spring',
'midterm',
NULL,
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2024-spring-midterm/paper.pdf',
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2024-spring-midterm/answer.pdf',
'uploaded',
'course_library',
'COMP2211-2024-spring-midterm',
'(COMP2211)[2024](s)midterm~=rcidkjgf^_82003.pdf',
'(COMP2211)[2024](s)midterm~=ubrzkjmz^_90406.pdf'
),
(
NULL,
'COMP2211',
2024,
'spring',
'final',
NULL,
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2024-spring-final/paper.pdf',
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__/course-library/COMP2211/COMP2211-2024-spring-final/answer.pdf',
'uploaded',
'course_library',
'COMP2211-2024-spring-final',
'(COMP2211)[2024](s)final~=igk5mmg^_90365.pdf',
'(COMP2211)[2024](s)final~=igk5mmg^_58857.pdf'
)
ON CONFLICT (source_exam_key)
WHERE source_kind = 'course_library' AND source_exam_key IS NOT NULL
DO UPDATE SET
course_code = EXCLUDED.course_code,
year = EXCLUDED.year,
term = EXCLUDED.term,
exam_type = EXCLUDED.exam_type,
part_label = EXCLUDED.part_label,
paper_file_url = EXCLUDED.paper_file_url,
answer_file_url = EXCLUDED.answer_file_url,
status = EXCLUDED.status,
source_question_filename = EXCLUDED.source_question_filename,
source_answer_filename = EXCLUDED.source_answer_filename;

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,109 @@
-- ============================================
-- PastPaper Master — COMP2211 problem-level taxonomy backfill
-- Seed Date: 2026-03-24
-- ============================================
--
-- Purpose:
-- 1. Backfill coarse taxonomy for COMP2211 question rows after the paper has been
-- processed into `paper_questions`.
-- 2. Use the audited cover-page problem mapping as the initial analytics baseline.
-- 3. Only fill empty taxonomy fields, so later fine-grained per-question curation
-- can safely overwrite these defaults.
WITH mapping AS (
SELECT *
FROM (
VALUES
('COMP2211-2022-fall-midterm', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
('COMP2211-2022-fall-midterm', '2', 'Python Fundamentals', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals']::TEXT[], ARRAY['code_tracing', 'implementation', 'debugging']::TEXT[], 'coding'),
('COMP2211-2022-fall-midterm', '3', 'Conditional Probability and Bayes Classifier', 'Probabilistic Models', 'Probabilistic Models', ARRAY['Probabilistic Models']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'classification_decision']::TEXT[], 'long_question'),
('COMP2211-2022-fall-midterm', '4', 'K-Nearest Neighbors', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'distance_calculation', 'algorithm_tracing']::TEXT[], 'long_question'),
('COMP2211-2022-fall-midterm', '5', 'K-Means Clustering', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'cluster_update', 'algorithm_tracing']::TEXT[], 'long_question'),
('COMP2211-2022-fall-midterm', '6', 'Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['manual_computation', 'weight_update', 'formula_application']::TEXT[], 'long_question'),
('COMP2211-2022-fall-midterm', '7', 'Multilayer Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['forward_pass', 'backpropagation', 'derivation']::TEXT[], 'long_question'),
('COMP2211-2022-spring-midterm', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
('COMP2211-2022-spring-midterm', '2', 'Python Fundamentals', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals']::TEXT[], ARRAY['code_tracing', 'implementation', 'debugging']::TEXT[], 'coding'),
('COMP2211-2022-spring-midterm', '3', 'Conditional Probability and Bayes Classifier', 'Probabilistic Models', 'Probabilistic Models', ARRAY['Probabilistic Models']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'classification_decision']::TEXT[], 'long_question'),
('COMP2211-2022-spring-midterm', '4', 'K-Nearest Neighbors', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'distance_calculation', 'algorithm_tracing']::TEXT[], 'long_question'),
('COMP2211-2022-spring-midterm', '5', 'K-Means Clustering', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'cluster_update', 'algorithm_tracing']::TEXT[], 'long_question'),
('COMP2211-2022-spring-midterm', '6', 'Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['manual_computation', 'weight_update', 'formula_application']::TEXT[], 'long_question'),
('COMP2211-2022-spring-midterm', '7', 'Perceptron and Multilayer Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['forward_pass', 'backpropagation', 'weight_update']::TEXT[], 'long_question'),
('COMP2211-2022-spring-final-part-a', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
('COMP2211-2022-spring-final-part-a', '2', 'Na¨ıve Bayes and K-Nearest Neighbors', NULL, 'Probabilistic Models', ARRAY['Probabilistic Models', 'KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'distance_calculation']::TEXT[], 'long_question'),
('COMP2211-2022-spring-final-part-a', '3', 'Multilayer Perceptron (MLP)', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['forward_pass', 'backpropagation', 'derivation']::TEXT[], 'long_question'),
('COMP2211-2022-spring-final-part-a', '4', 'Digital Image Processing', 'Vision and CNN', 'Vision and CNN', ARRAY['Vision and CNN']::TEXT[], ARRAY['manual_computation', 'filter_computation', 'architecture_reasoning']::TEXT[], 'long_question'),
('COMP2211-2022-spring-final-part-b', '1', 'Convolutional Neural Network (CNN)', 'Vision and CNN', 'Vision and CNN', ARRAY['Vision and CNN']::TEXT[], ARRAY['forward_pass', 'architecture_reasoning', 'manual_computation']::TEXT[], 'long_question'),
('COMP2211-2022-spring-final-part-b', '2', 'Python Programming: Convolutional Neural Network', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals', 'Vision and CNN']::TEXT[], ARRAY['implementation', 'code_tracing', 'debugging']::TEXT[], 'coding'),
('COMP2211-2022-spring-final-part-b', '3', 'Minimax and Alpha-Beta Pruning', 'Search and Games', 'Search and Games', ARRAY['Search and Games']::TEXT[], ARRAY['tree_search', 'pruning', 'manual_tracing']::TEXT[], 'long_question'),
('COMP2211-2022-spring-final-part-b', '4', 'Ethics of Artificial Intelligence', 'Ethics of AI', 'Ethics of AI', ARRAY['Ethics of AI']::TEXT[], ARRAY['concept_explanation', 'argumentation', 'comparison']::TEXT[], 'short_answer'),
('COMP2211-2023-spring-midterm', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
('COMP2211-2023-spring-midterm', '2', 'Python Fundamentals', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals']::TEXT[], ARRAY['code_tracing', 'implementation', 'debugging']::TEXT[], 'coding'),
('COMP2211-2023-spring-midterm', '3', 'Na¨ıve Bayes Classifier', 'Probabilistic Models', 'Probabilistic Models', ARRAY['Probabilistic Models']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'classification_decision']::TEXT[], 'long_question'),
('COMP2211-2023-spring-midterm', '4', 'K-Nearest Neighbors', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'distance_calculation', 'algorithm_tracing']::TEXT[], 'long_question'),
('COMP2211-2023-spring-midterm', '5', 'K-Means Clustering', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'cluster_update', 'algorithm_tracing']::TEXT[], 'long_question'),
('COMP2211-2023-spring-midterm', '6', 'Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['manual_computation', 'weight_update', 'formula_application']::TEXT[], 'long_question'),
('COMP2211-2023-spring-midterm', '7', 'Multilayer Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['forward_pass', 'backpropagation', 'derivation']::TEXT[], 'long_question'),
('COMP2211-2024-spring-midterm', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
('COMP2211-2024-spring-midterm', '2', 'Advanced Python for Artificial Intelligence', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals']::TEXT[], ARRAY['code_tracing', 'implementation', 'data_manipulation']::TEXT[], 'coding'),
('COMP2211-2024-spring-midterm', '3', 'Model Evaluation & Advanced Python Programming', 'Evaluation and Validation', 'Evaluation and Validation', ARRAY['Evaluation and Validation', 'Python Fundamentals']::TEXT[], ARRAY['metric_computation', 'experimental_design', 'implementation']::TEXT[], 'coding'),
('COMP2211-2024-spring-midterm', '4', 'Na¨ıve Bayes Classifier', 'Probabilistic Models', 'Probabilistic Models', ARRAY['Probabilistic Models']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'classification_decision']::TEXT[], 'long_question'),
('COMP2211-2024-spring-midterm', '5', 'K-Nearest Neighbors', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'distance_calculation', 'algorithm_tracing']::TEXT[], 'long_question'),
('COMP2211-2024-spring-midterm', '6', 'Leader Clustering', 'KNN and Clustering', 'KNN and Clustering', ARRAY['KNN and Clustering']::TEXT[], ARRAY['manual_computation', 'cluster_update', 'algorithm_tracing']::TEXT[], 'long_question'),
('COMP2211-2024-spring-midterm', '7', 'D-fold Cross Validation', 'Evaluation and Validation', 'Evaluation and Validation', ARRAY['Evaluation and Validation']::TEXT[], ARRAY['metric_computation', 'experimental_design', 'reasoning']::TEXT[], 'long_question'),
('COMP2211-2024-spring-final', '1', 'True/False Questions', 'True/False', 'True/False', ARRAY['True/False']::TEXT[], ARRAY['concept_check', 'rapid_reasoning']::TEXT[], 'true_false'),
('COMP2211-2024-spring-final', '2', 'Advanced Python: Image Processing with NumPy', 'Python Fundamentals', 'Python Fundamentals', ARRAY['Python Fundamentals', 'Vision and CNN']::TEXT[], ARRAY['implementation', 'data_manipulation', 'filter_computation']::TEXT[], 'coding'),
('COMP2211-2024-spring-final', '3', 'Na¨ıve Bayes, K-Nearest Neighbors and Perceptron', NULL, 'Probabilistic Models', ARRAY['Probabilistic Models', 'KNN and Clustering', 'Perceptron and MLP']::TEXT[], ARRAY['manual_computation', 'probability_reasoning', 'distance_calculation', 'weight_update']::TEXT[], 'long_question'),
('COMP2211-2024-spring-final', '4', 'Multi-layer Perceptron', 'Perceptron and MLP', 'Perceptron and MLP', ARRAY['Perceptron and MLP']::TEXT[], ARRAY['forward_pass', 'backpropagation', 'derivation']::TEXT[], 'long_question'),
('COMP2211-2024-spring-final', '5', 'Digital Image Processing', 'Vision and CNN', 'Vision and CNN', ARRAY['Vision and CNN']::TEXT[], ARRAY['manual_computation', 'filter_computation', 'architecture_reasoning']::TEXT[], 'long_question'),
('COMP2211-2024-spring-final', '6', 'Dilated Convolution and Dropout', 'Vision and CNN', 'Vision and CNN', ARRAY['Vision and CNN']::TEXT[], ARRAY['architecture_reasoning', 'forward_pass', 'comparison']::TEXT[], 'long_question'),
('COMP2211-2024-spring-final', '7', 'Convolutional Neural Network', 'Vision and CNN', 'Vision and CNN', ARRAY['Vision and CNN']::TEXT[], ARRAY['architecture_reasoning', 'forward_pass', 'implementation']::TEXT[], 'long_question'),
('COMP2211-2024-spring-final', '8', 'Minimax and Alpha-Beta Pruning', 'Search and Games', 'Search and Games', ARRAY['Search and Games']::TEXT[], ARRAY['tree_search', 'pruning', 'manual_tracing']::TEXT[], 'long_question'),
('COMP2211-2024-spring-final', '9', 'Ethics of Artificial Intelligence', 'Ethics of AI', 'Ethics of AI', ARRAY['Ethics of AI']::TEXT[], ARRAY['concept_explanation', 'argumentation', 'comparison']::TEXT[], 'short_answer')
) AS t (
source_exam_key,
problem_number,
raw_topic,
analytics_topic,
topic_primary,
topic_tags,
skill_tags,
default_question_format
)
)
UPDATE paper_questions AS q
SET analytics_topic = COALESCE(q.analytics_topic, mapping.analytics_topic),
topic_primary = COALESCE(q.topic_primary, mapping.topic_primary),
topic_tags = CASE
WHEN q.topic_tags IS NULL OR cardinality(q.topic_tags) = 0 THEN mapping.topic_tags
ELSE q.topic_tags
END,
skill_tags = CASE
WHEN q.skill_tags IS NULL OR cardinality(q.skill_tags) = 0 THEN mapping.skill_tags
ELSE q.skill_tags
END,
topics = CASE
WHEN q.topics IS NULL OR cardinality(q.topics) = 0 THEN mapping.topic_tags
ELSE q.topics
END,
question_format = CASE
WHEN (q.question_format IS NULL OR q.question_format = '')
AND mapping.default_question_format IS NOT NULL
THEN mapping.default_question_format
ELSE q.question_format
END
FROM papers AS p
JOIN mapping
ON mapping.source_exam_key = p.source_exam_key
WHERE q.paper_id = p.id
AND p.source_kind = 'course_library'
AND p.course_code = 'COMP2211'
AND (
q.question_number = mapping.problem_number
OR q.question_number ~ ('^' || mapping.problem_number || '([^0-9].*)?$')
);