Initial commit: PastPaper Master full stack

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Zhao
2026-04-21 12:15:35 +07:00
commit 7a09167261
105 changed files with 24799 additions and 0 deletions

View File

@@ -0,0 +1,207 @@
-- ============================================
-- PastPaper Master — 初始数据库 Schema
-- Version: 001
-- Date: 2025-03-11
-- ============================================
-- 启用必要的扩展
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- ============================================
-- Table 1: papers — 上传的试卷
-- ============================================
CREATE TABLE papers (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL REFERENCES auth.users(id) ON DELETE CASCADE,
-- 元信息(用户上传时填写)
course_code TEXT NOT NULL, -- "COMP2011"
year INTEGER NOT NULL, -- 2024
term TEXT NOT NULL CHECK (term IN ('fall', 'spring', 'summer')),
exam_type TEXT NOT NULL CHECK (exam_type IN ('midterm', 'final', 'quiz')),
-- 文件 (Supabase Storage)
paper_file_url TEXT NOT NULL, -- 试卷 PDF
answer_file_url TEXT, -- 答案 PDF可选
-- 处理状态
status TEXT NOT NULL DEFAULT 'uploaded'
CHECK (status IN ('uploaded', 'processing', 'ready', 'error')),
error_message TEXT, -- 处理失败时的错误信息
-- 提取的原始文本(缓存)
paper_extracted_text TEXT,
answer_extracted_text TEXT,
-- 整卷概览AI 生成)
total_score INTEGER,
question_count INTEGER,
topics_summary JSONB, -- {"Linked List": 40, "Recursion": 30}
difficulty_level TEXT CHECK (difficulty_level IN ('easy', 'medium', 'hard')),
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- ============================================
-- Table 2: paper_questions — 逐题数据
-- ============================================
CREATE TABLE paper_questions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
paper_id UUID NOT NULL REFERENCES papers(id) ON DELETE CASCADE,
-- 题目标识
question_number TEXT NOT NULL, -- "1", "1a", "2b"
parent_question TEXT, -- 子题的父题号: "1a" → "1"
display_order INTEGER NOT NULL, -- 显示顺序
-- 题目内容
question_type TEXT NOT NULL
CHECK (question_type IN ('mc', 'fill_blank', 'long_question')),
question_text TEXT NOT NULL, -- 题目原文
score INTEGER, -- 分值
page_number INTEGER, -- PDF 页码(左右联动)
-- 选择题专用
options JSONB, -- [{"label":"A","text":"..."},...]
correct_option TEXT, -- "B"
-- 填空题专用
correct_answer TEXT, -- 正确答案
accept_variants TEXT[], -- 等价表达 ["O(nlogn)","O(n log n)"]
-- 答案 PDF 提取的原始答案(所有题型)
raw_answer_text TEXT,
-- 知识点标签
topics TEXT[], -- ["Linked List","Pointer"]
difficulty TEXT CHECK (difficulty IN ('easy', 'medium', 'hard')),
-- AI 三件套HTML + KaTeX
knowledge_reminder TEXT, -- 知识点 Reminder
ai_hint TEXT, -- AI Hint
solution TEXT, -- Solution逐步 derivation
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- ============================================
-- Table 3: user_attempts — 用户答题记录
-- Phase 4 实现,先建好表结构
-- ============================================
CREATE TABLE user_attempts (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL REFERENCES auth.users(id) ON DELETE CASCADE,
question_id UUID NOT NULL REFERENCES paper_questions(id) ON DELETE CASCADE,
-- 用户的作答
attempt_type TEXT NOT NULL
CHECK (attempt_type IN ('select', 'input', 'photo')),
user_answer TEXT, -- 选项 / 输入的答案
photo_url TEXT, -- 上传的照片
photo_ocr_text TEXT, -- OCR 识别结果
-- AI 判定
is_correct BOOLEAN,
feedback TEXT, -- HTML — 逐步错误分析
error_at_step INTEGER, -- 第几步开始错
-- 错题本
in_error_book BOOLEAN NOT NULL DEFAULT false,
mastered BOOLEAN NOT NULL DEFAULT false,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- ============================================
-- 索引
-- ============================================
CREATE INDEX idx_papers_user ON papers(user_id);
CREATE INDEX idx_papers_course ON papers(course_code);
CREATE INDEX idx_papers_status ON papers(status);
CREATE INDEX idx_questions_paper ON paper_questions(paper_id);
CREATE INDEX idx_questions_type ON paper_questions(question_type);
CREATE INDEX idx_questions_topics ON paper_questions USING GIN(topics);
CREATE INDEX idx_attempts_user ON user_attempts(user_id);
CREATE INDEX idx_attempts_question ON user_attempts(question_id);
CREATE INDEX idx_attempts_errorbook ON user_attempts(user_id)
WHERE in_error_book = true;
-- ============================================
-- RLS 策略
-- ============================================
ALTER TABLE papers ENABLE ROW LEVEL SECURITY;
ALTER TABLE paper_questions ENABLE ROW LEVEL SECURITY;
ALTER TABLE user_attempts ENABLE ROW LEVEL SECURITY;
-- papers: 用户只能看自己上传的(以后加公共库时再调整)
CREATE POLICY "Users can view own papers"
ON papers FOR SELECT
USING (auth.uid() = user_id);
CREATE POLICY "Users can insert own papers"
ON papers FOR INSERT
WITH CHECK (auth.uid() = user_id);
CREATE POLICY "Users can update own papers"
ON papers FOR UPDATE
USING (auth.uid() = user_id);
CREATE POLICY "Users can delete own papers"
ON papers FOR DELETE
USING (auth.uid() = user_id);
-- paper_questions: 跟随 paper 的权限
CREATE POLICY "Users can view questions of own papers"
ON paper_questions FOR SELECT
USING (
EXISTS (
SELECT 1 FROM papers
WHERE papers.id = paper_questions.paper_id
AND papers.user_id = auth.uid()
)
);
-- service_role 用于后端写入 questions处理管线用
-- 前端不直接写 questions通过 API 触发后端处理
-- user_attempts: 用户只能看/写自己的
CREATE POLICY "Users can view own attempts"
ON user_attempts FOR SELECT
USING (auth.uid() = user_id);
CREATE POLICY "Users can insert own attempts"
ON user_attempts FOR INSERT
WITH CHECK (auth.uid() = user_id);
CREATE POLICY "Users can update own attempts"
ON user_attempts FOR UPDATE
USING (auth.uid() = user_id);
-- ============================================
-- updated_at 自动更新触发器
-- ============================================
CREATE OR REPLACE FUNCTION update_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = now();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER papers_updated_at
BEFORE UPDATE ON papers
FOR EACH ROW EXECUTE FUNCTION update_updated_at();
CREATE TRIGGER questions_updated_at
BEFORE UPDATE ON paper_questions
FOR EACH ROW EXECUTE FUNCTION update_updated_at();
-- ============================================
-- Storage bucket
-- ============================================
-- 在 Supabase Dashboard 中手动创建 bucket: "papers"
-- 或通过 API 创建(后端初始化时处理)

View File

@@ -0,0 +1,38 @@
-- ============================================
-- PastPaper Master — Shared course library fields
-- Version: 002
-- Date: 2026-03-24
-- ============================================
-- Shared library / canonical import metadata on papers
ALTER TABLE papers
ADD COLUMN IF NOT EXISTS source_kind TEXT NOT NULL DEFAULT 'user_upload'
CHECK (source_kind IN ('user_upload', 'course_library')),
ADD COLUMN IF NOT EXISTS source_exam_key TEXT,
ADD COLUMN IF NOT EXISTS part_label TEXT
CHECK (part_label IN ('A', 'B')),
ADD COLUMN IF NOT EXISTS source_question_filename TEXT,
ADD COLUMN IF NOT EXISTS source_answer_filename TEXT;
CREATE UNIQUE INDEX IF NOT EXISTS idx_papers_course_library_exam_key
ON papers(source_exam_key)
WHERE source_kind = 'course_library' AND source_exam_key IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_papers_course_lookup
ON papers(course_code, year, term, exam_type, part_label);
-- Grading results should persist awarded score
ALTER TABLE user_attempts
ADD COLUMN IF NOT EXISTS score_given INTEGER;
CREATE INDEX IF NOT EXISTS idx_attempts_errorbook_active
ON user_attempts(user_id, created_at DESC)
WHERE in_error_book = true AND mastered = false;
-- The backend and frontend already support true_false; schema must match.
ALTER TABLE paper_questions
DROP CONSTRAINT IF EXISTS paper_questions_question_type_check;
ALTER TABLE paper_questions
ADD CONSTRAINT paper_questions_question_type_check
CHECK (question_type IN ('mc', 'true_false', 'fill_blank', 'long_question'));

View File

@@ -0,0 +1,41 @@
-- ============================================
-- PastPaper Master — Question taxonomy fields
-- Version: 003
-- Date: 2026-03-24
-- ============================================
-- A question needs multiple classification layers:
-- 1) question_format: how the student interacts with it
-- 2) topic_tags / topic_primary / analytics_topic: course knowledge taxonomy
-- 3) skill_tags: what kind of thinking task the question requires
ALTER TABLE paper_questions
ADD COLUMN IF NOT EXISTS question_format TEXT
CHECK (
question_format IN (
'mc',
'true_false',
'fill_blank',
'short_answer',
'long_answer',
'coding'
)
),
ADD COLUMN IF NOT EXISTS topic_primary TEXT,
ADD COLUMN IF NOT EXISTS analytics_topic TEXT,
ADD COLUMN IF NOT EXISTS topic_tags TEXT[],
ADD COLUMN IF NOT EXISTS skill_tags TEXT[];
-- Keep the legacy topics column for backward compatibility for now.
-- New analytics and retrieval code should gradually move to analytics_topic/topic_tags.
CREATE INDEX IF NOT EXISTS idx_questions_question_format
ON paper_questions(question_format);
CREATE INDEX IF NOT EXISTS idx_questions_analytics_topic
ON paper_questions(analytics_topic);
CREATE INDEX IF NOT EXISTS idx_questions_topic_tags
ON paper_questions USING GIN(topic_tags);
CREATE INDEX IF NOT EXISTS idx_questions_skill_tags
ON paper_questions USING GIN(skill_tags);

View File

@@ -0,0 +1,30 @@
-- ============================================
-- PastPaper Master — Decouple course library papers from auth users
-- Version: 004
-- Date: 2026-03-24
-- ============================================
-- Course-library papers should not depend on a concrete auth.users row.
-- User-uploaded papers still keep user_id populated.
ALTER TABLE papers
ALTER COLUMN user_id DROP NOT NULL;
-- Keep existing FK so user-owned papers can still reference auth.users,
-- while course-library rows simply use NULL.
-- Tighten the intended invariant with a check constraint:
-- - user_upload rows must have user_id
-- - course_library rows must not have user_id
ALTER TABLE papers
DROP CONSTRAINT IF EXISTS papers_source_kind_user_id_check;
ALTER TABLE papers
ADD CONSTRAINT papers_source_kind_user_id_check
CHECK (
(source_kind = 'user_upload' AND user_id IS NOT NULL)
OR
(source_kind = 'course_library' AND user_id IS NULL)
);
-- Existing RLS policies continue to apply to user-owned rows.
-- Course-library rows are accessed through the backend service role.

View File

@@ -0,0 +1,27 @@
-- ============================================
-- PastPaper Master — Allow legacy long_question format alias
-- Version: 005
-- Date: 2026-03-24
-- ============================================
--
-- Some existing seeds and older generated SQL used `long_question` in the
-- `question_format` column, while the 003 taxonomy migration introduced
-- `long_answer` as the canonical value. Allow both temporarily so historical
-- inserts do not fail. New generators should continue emitting `long_answer`.
ALTER TABLE paper_questions
DROP CONSTRAINT IF EXISTS paper_questions_question_format_check;
ALTER TABLE paper_questions
ADD CONSTRAINT paper_questions_question_format_check
CHECK (
question_format IN (
'mc',
'true_false',
'fill_blank',
'short_answer',
'long_answer',
'long_question',
'coding'
)
);

View File

@@ -0,0 +1,17 @@
-- ============================================
-- PastPaper Master — Make score fields numeric
-- Version: 006
-- Date: 2026-04-10
-- ============================================
ALTER TABLE paper_questions
ALTER COLUMN score TYPE NUMERIC
USING score::NUMERIC;
ALTER TABLE papers
ALTER COLUMN total_score TYPE NUMERIC
USING total_score::NUMERIC;
ALTER TABLE user_attempts
ALTER COLUMN score_given TYPE NUMERIC
USING score_given::NUMERIC;

View File

@@ -0,0 +1,36 @@
-- 007: Full-text search on paper_questions.question_text
--
-- Adds a tsvector generated column (auto-maintained by PostgreSQL on every
-- INSERT/UPDATE), a GIN index for fast @@ queries, and a batch-scoring RPC
-- used by the similar-question retrieval endpoint.
ALTER TABLE paper_questions
ADD COLUMN IF NOT EXISTS search_text tsvector
GENERATED ALWAYS AS (
to_tsvector('english', coalesce(question_text, ''))
) STORED;
CREATE INDEX IF NOT EXISTS idx_pq_search_text
ON paper_questions USING gin(search_text);
-- text_similarity_scores(query_text, candidate_ids)
-- Returns one row per candidate ID with a ts_rank_cd score normalised by
-- unique word count (normalization flag = 1). Questions that share no
-- lexemes with the query still appear in the result with score = 0 so the
-- caller always gets a complete score map for every candidate.
CREATE OR REPLACE FUNCTION text_similarity_scores(
query_text text,
candidate_ids uuid[]
)
RETURNS TABLE (question_id uuid, text_score float4)
LANGUAGE sql STABLE AS $$
SELECT
id,
ts_rank_cd(
search_text,
plainto_tsquery('english', query_text),
1 -- normalise by unique word count
)::float4
FROM paper_questions
WHERE id = ANY(candidate_ids);
$$;

View File

@@ -0,0 +1,2 @@
ALTER TABLE paper_questions
ADD COLUMN IF NOT EXISTS page_y_ratio NUMERIC;

View File

@@ -0,0 +1,27 @@
-- 008: Replace __SUPABASE_STORAGE_PUBLIC_BASE_URL__ placeholder in paper URLs
--
-- The course-library seed (comp2211_course_library_papers.sql) was inserted
-- without substituting the placeholder. This migration replaces it with the
-- real Supabase Storage public base URL for the `papers` bucket.
UPDATE papers
SET paper_file_url = REPLACE(
paper_file_url,
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__',
'https://pvcxipwovpwrurebouwg.supabase.co/storage/v1/object/public/papers'
)
WHERE paper_file_url LIKE '%__SUPABASE_STORAGE_PUBLIC_BASE_URL__%';
UPDATE papers
SET answer_file_url = REPLACE(
answer_file_url,
'__SUPABASE_STORAGE_PUBLIC_BASE_URL__',
'https://pvcxipwovpwrurebouwg.supabase.co/storage/v1/object/public/papers'
)
WHERE answer_file_url LIKE '%__SUPABASE_STORAGE_PUBLIC_BASE_URL__%';
-- Verify: should return 0 rows
SELECT id, course_code, year, term, exam_type, paper_file_url, answer_file_url
FROM papers
WHERE paper_file_url LIKE '%__SUPABASE_STORAGE_PUBLIC_BASE_URL__%'
OR answer_file_url LIKE '%__SUPABASE_STORAGE_PUBLIC_BASE_URL__%';