Initial commit: PastPaper Master full stack

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-21 12:15:35 +07:00
commit 7a09167261
105 changed files with 24799 additions and 0 deletions
--- a/supabase/migrations/001_init_schema.sql
+++ b/supabase/migrations/001_init_schema.sql
@@ -0,0 +1,207 @@
+-- ============================================
+-- PastPaper Master — 初始数据库 Schema
+-- Version: 001
+-- Date: 2025-03-11
+-- ============================================
+
+-- 启用必要的扩展
+CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
+
+-- ============================================
+-- Table 1: papers — 上传的试卷
+-- ============================================
+CREATE TABLE papers (
+  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+  user_id UUID NOT NULL REFERENCES auth.users(id) ON DELETE CASCADE,
+
+  -- 元信息（用户上传时填写）
+  course_code TEXT NOT NULL,                -- "COMP2011"
+  year INTEGER NOT NULL,                    -- 2024
+  term TEXT NOT NULL CHECK (term IN ('fall', 'spring', 'summer')),
+  exam_type TEXT NOT NULL CHECK (exam_type IN ('midterm', 'final', 'quiz')),
+
+  -- 文件 (Supabase Storage)
+  paper_file_url TEXT NOT NULL,             -- 试卷 PDF
+  answer_file_url TEXT,                     -- 答案 PDF（可选）
+
+  -- 处理状态
+  status TEXT NOT NULL DEFAULT 'uploaded'
+    CHECK (status IN ('uploaded', 'processing', 'ready', 'error')),
+  error_message TEXT,                       -- 处理失败时的错误信息
+
+  -- 提取的原始文本（缓存）
+  paper_extracted_text TEXT,
+  answer_extracted_text TEXT,
+
+  -- 整卷概览（AI 生成）
+  total_score INTEGER,
+  question_count INTEGER,
+  topics_summary JSONB,                     -- {"Linked List": 40, "Recursion": 30}
+  difficulty_level TEXT CHECK (difficulty_level IN ('easy', 'medium', 'hard')),
+
+  created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
+  updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
+);
+
+-- ============================================
+-- Table 2: paper_questions — 逐题数据
+-- ============================================
+CREATE TABLE paper_questions (
+  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+  paper_id UUID NOT NULL REFERENCES papers(id) ON DELETE CASCADE,
+
+  -- 题目标识
+  question_number TEXT NOT NULL,             -- "1", "1a", "2b"
+  parent_question TEXT,                      -- 子题的父题号: "1a" → "1"
+  display_order INTEGER NOT NULL,            -- 显示顺序
+
+  -- 题目内容
+  question_type TEXT NOT NULL
+    CHECK (question_type IN ('mc', 'fill_blank', 'long_question')),
+  question_text TEXT NOT NULL,               -- 题目原文
+  score INTEGER,                             -- 分值
+  page_number INTEGER,                       -- PDF 页码（左右联动）
+
+  -- 选择题专用
+  options JSONB,                             -- [{"label":"A","text":"..."},...]
+  correct_option TEXT,                       -- "B"
+
+  -- 填空题专用
+  correct_answer TEXT,                       -- 正确答案
+  accept_variants TEXT[],                    -- 等价表达 ["O(nlogn)","O(n log n)"]
+
+  -- 答案 PDF 提取的原始答案（所有题型）
+  raw_answer_text TEXT,
+
+  -- 知识点标签
+  topics TEXT[],                             -- ["Linked List","Pointer"]
+  difficulty TEXT CHECK (difficulty IN ('easy', 'medium', 'hard')),
+
+  -- AI 三件套（HTML + KaTeX）
+  knowledge_reminder TEXT,                   -- 知识点 Reminder
+  ai_hint TEXT,                              -- AI Hint
+  solution TEXT,                             -- Solution（逐步 derivation）
+
+  created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
+  updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
+);
+
+-- ============================================
+-- Table 3: user_attempts — 用户答题记录
+-- Phase 4 实现，先建好表结构
+-- ============================================
+CREATE TABLE user_attempts (
+  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+  user_id UUID NOT NULL REFERENCES auth.users(id) ON DELETE CASCADE,
+  question_id UUID NOT NULL REFERENCES paper_questions(id) ON DELETE CASCADE,
+
+  -- 用户的作答
+  attempt_type TEXT NOT NULL
+    CHECK (attempt_type IN ('select', 'input', 'photo')),
+  user_answer TEXT,                          -- 选项 / 输入的答案
+  photo_url TEXT,                            -- 上传的照片
+  photo_ocr_text TEXT,                       -- OCR 识别结果
+
+  -- AI 判定
+  is_correct BOOLEAN,
+  feedback TEXT,                             -- HTML — 逐步错误分析
+  error_at_step INTEGER,                     -- 第几步开始错
+
+  -- 错题本
+  in_error_book BOOLEAN NOT NULL DEFAULT false,
+  mastered BOOLEAN NOT NULL DEFAULT false,
+
+  created_at TIMESTAMPTZ NOT NULL DEFAULT now()
+);
+
+-- ============================================
+-- 索引
+-- ============================================
+CREATE INDEX idx_papers_user ON papers(user_id);
+CREATE INDEX idx_papers_course ON papers(course_code);
+CREATE INDEX idx_papers_status ON papers(status);
+
+CREATE INDEX idx_questions_paper ON paper_questions(paper_id);
+CREATE INDEX idx_questions_type ON paper_questions(question_type);
+CREATE INDEX idx_questions_topics ON paper_questions USING GIN(topics);
+
+CREATE INDEX idx_attempts_user ON user_attempts(user_id);
+CREATE INDEX idx_attempts_question ON user_attempts(question_id);
+CREATE INDEX idx_attempts_errorbook ON user_attempts(user_id)
+  WHERE in_error_book = true;
+
+-- ============================================
+-- RLS 策略
+-- ============================================
+ALTER TABLE papers ENABLE ROW LEVEL SECURITY;
+ALTER TABLE paper_questions ENABLE ROW LEVEL SECURITY;
+ALTER TABLE user_attempts ENABLE ROW LEVEL SECURITY;
+
+-- papers: 用户只能看自己上传的（以后加公共库时再调整）
+CREATE POLICY "Users can view own papers"
+  ON papers FOR SELECT
+  USING (auth.uid() = user_id);
+
+CREATE POLICY "Users can insert own papers"
+  ON papers FOR INSERT
+  WITH CHECK (auth.uid() = user_id);
+
+CREATE POLICY "Users can update own papers"
+  ON papers FOR UPDATE
+  USING (auth.uid() = user_id);
+
+CREATE POLICY "Users can delete own papers"
+  ON papers FOR DELETE
+  USING (auth.uid() = user_id);
+
+-- paper_questions: 跟随 paper 的权限
+CREATE POLICY "Users can view questions of own papers"
+  ON paper_questions FOR SELECT
+  USING (
+    EXISTS (
+      SELECT 1 FROM papers
+      WHERE papers.id = paper_questions.paper_id
+      AND papers.user_id = auth.uid()
+    )
+  );
+
+-- service_role 用于后端写入 questions（处理管线用）
+-- 前端不直接写 questions，通过 API 触发后端处理
+
+-- user_attempts: 用户只能看/写自己的
+CREATE POLICY "Users can view own attempts"
+  ON user_attempts FOR SELECT
+  USING (auth.uid() = user_id);
+
+CREATE POLICY "Users can insert own attempts"
+  ON user_attempts FOR INSERT
+  WITH CHECK (auth.uid() = user_id);
+
+CREATE POLICY "Users can update own attempts"
+  ON user_attempts FOR UPDATE
+  USING (auth.uid() = user_id);
+
+-- ============================================
+-- updated_at 自动更新触发器
+-- ============================================
+CREATE OR REPLACE FUNCTION update_updated_at()
+RETURNS TRIGGER AS $$
+BEGIN
+  NEW.updated_at = now();
+  RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE TRIGGER papers_updated_at
+  BEFORE UPDATE ON papers
+  FOR EACH ROW EXECUTE FUNCTION update_updated_at();
+
+CREATE TRIGGER questions_updated_at
+  BEFORE UPDATE ON paper_questions
+  FOR EACH ROW EXECUTE FUNCTION update_updated_at();
+
+-- ============================================
+-- Storage bucket
+-- ============================================
+-- 在 Supabase Dashboard 中手动创建 bucket: "papers"
+-- 或通过 API 创建（后端初始化时处理）
--- a/supabase/migrations/002_course_library_fields.sql
+++ b/supabase/migrations/002_course_library_fields.sql
@@ -0,0 +1,38 @@
+-- ============================================
+-- PastPaper Master — Shared course library fields
+-- Version: 002
+-- Date: 2026-03-24
+-- ============================================
+
+-- Shared library / canonical import metadata on papers
+ALTER TABLE papers
+  ADD COLUMN IF NOT EXISTS source_kind TEXT NOT NULL DEFAULT 'user_upload'
+    CHECK (source_kind IN ('user_upload', 'course_library')),
+  ADD COLUMN IF NOT EXISTS source_exam_key TEXT,
+  ADD COLUMN IF NOT EXISTS part_label TEXT
+    CHECK (part_label IN ('A', 'B')),
+  ADD COLUMN IF NOT EXISTS source_question_filename TEXT,
+  ADD COLUMN IF NOT EXISTS source_answer_filename TEXT;
+
+CREATE UNIQUE INDEX IF NOT EXISTS idx_papers_course_library_exam_key
+  ON papers(source_exam_key)
+  WHERE source_kind = 'course_library' AND source_exam_key IS NOT NULL;
+
+CREATE INDEX IF NOT EXISTS idx_papers_course_lookup
+  ON papers(course_code, year, term, exam_type, part_label);
+
+-- Grading results should persist awarded score
+ALTER TABLE user_attempts
+  ADD COLUMN IF NOT EXISTS score_given INTEGER;
+
+CREATE INDEX IF NOT EXISTS idx_attempts_errorbook_active
+  ON user_attempts(user_id, created_at DESC)
+  WHERE in_error_book = true AND mastered = false;
+
+-- The backend and frontend already support true_false; schema must match.
+ALTER TABLE paper_questions
+  DROP CONSTRAINT IF EXISTS paper_questions_question_type_check;
+
+ALTER TABLE paper_questions
+  ADD CONSTRAINT paper_questions_question_type_check
+  CHECK (question_type IN ('mc', 'true_false', 'fill_blank', 'long_question'));
--- a/supabase/migrations/003_question_taxonomy_fields.sql
+++ b/supabase/migrations/003_question_taxonomy_fields.sql
@@ -0,0 +1,41 @@
+-- ============================================
+-- PastPaper Master — Question taxonomy fields
+-- Version: 003
+-- Date: 2026-03-24
+-- ============================================
+
+-- A question needs multiple classification layers:
+-- 1) question_format: how the student interacts with it
+-- 2) topic_tags / topic_primary / analytics_topic: course knowledge taxonomy
+-- 3) skill_tags: what kind of thinking task the question requires
+ALTER TABLE paper_questions
+  ADD COLUMN IF NOT EXISTS question_format TEXT
+    CHECK (
+      question_format IN (
+        'mc',
+        'true_false',
+        'fill_blank',
+        'short_answer',
+        'long_answer',
+        'coding'
+      )
+    ),
+  ADD COLUMN IF NOT EXISTS topic_primary TEXT,
+  ADD COLUMN IF NOT EXISTS analytics_topic TEXT,
+  ADD COLUMN IF NOT EXISTS topic_tags TEXT[],
+  ADD COLUMN IF NOT EXISTS skill_tags TEXT[];
+
+-- Keep the legacy topics column for backward compatibility for now.
+-- New analytics and retrieval code should gradually move to analytics_topic/topic_tags.
+
+CREATE INDEX IF NOT EXISTS idx_questions_question_format
+  ON paper_questions(question_format);
+
+CREATE INDEX IF NOT EXISTS idx_questions_analytics_topic
+  ON paper_questions(analytics_topic);
+
+CREATE INDEX IF NOT EXISTS idx_questions_topic_tags
+  ON paper_questions USING GIN(topic_tags);
+
+CREATE INDEX IF NOT EXISTS idx_questions_skill_tags
+  ON paper_questions USING GIN(skill_tags);
--- a/supabase/migrations/004_decouple_course_library_from_auth.sql
+++ b/supabase/migrations/004_decouple_course_library_from_auth.sql
@@ -0,0 +1,30 @@
+-- ============================================
+-- PastPaper Master — Decouple course library papers from auth users
+-- Version: 004
+-- Date: 2026-03-24
+-- ============================================
+
+-- Course-library papers should not depend on a concrete auth.users row.
+-- User-uploaded papers still keep user_id populated.
+ALTER TABLE papers
+  ALTER COLUMN user_id DROP NOT NULL;
+
+-- Keep existing FK so user-owned papers can still reference auth.users,
+-- while course-library rows simply use NULL.
+
+-- Tighten the intended invariant with a check constraint:
+-- - user_upload rows must have user_id
+-- - course_library rows must not have user_id
+ALTER TABLE papers
+  DROP CONSTRAINT IF EXISTS papers_source_kind_user_id_check;
+
+ALTER TABLE papers
+  ADD CONSTRAINT papers_source_kind_user_id_check
+  CHECK (
+    (source_kind = 'user_upload' AND user_id IS NOT NULL)
+    OR
+    (source_kind = 'course_library' AND user_id IS NULL)
+  );
+
+-- Existing RLS policies continue to apply to user-owned rows.
+-- Course-library rows are accessed through the backend service role.
--- a/supabase/migrations/005_allow_long_question_format_alias.sql
+++ b/supabase/migrations/005_allow_long_question_format_alias.sql
@@ -0,0 +1,27 @@
+-- ============================================
+-- PastPaper Master — Allow legacy long_question format alias
+-- Version: 005
+-- Date: 2026-03-24
+-- ============================================
+--
+-- Some existing seeds and older generated SQL used `long_question` in the
+-- `question_format` column, while the 003 taxonomy migration introduced
+-- `long_answer` as the canonical value. Allow both temporarily so historical
+-- inserts do not fail. New generators should continue emitting `long_answer`.
+
+ALTER TABLE paper_questions
+  DROP CONSTRAINT IF EXISTS paper_questions_question_format_check;
+
+ALTER TABLE paper_questions
+  ADD CONSTRAINT paper_questions_question_format_check
+  CHECK (
+    question_format IN (
+      'mc',
+      'true_false',
+      'fill_blank',
+      'short_answer',
+      'long_answer',
+      'long_question',
+      'coding'
+    )
+  );
--- a/supabase/migrations/006_make_scores_numeric.sql
+++ b/supabase/migrations/006_make_scores_numeric.sql
@@ -0,0 +1,17 @@
+-- ============================================
+-- PastPaper Master — Make score fields numeric
+-- Version: 006
+-- Date: 2026-04-10
+-- ============================================
+
+ALTER TABLE paper_questions
+  ALTER COLUMN score TYPE NUMERIC
+  USING score::NUMERIC;
+
+ALTER TABLE papers
+  ALTER COLUMN total_score TYPE NUMERIC
+  USING total_score::NUMERIC;
+
+ALTER TABLE user_attempts
+  ALTER COLUMN score_given TYPE NUMERIC
+  USING score_given::NUMERIC;
--- a/supabase/migrations/007_fulltext_search.sql
+++ b/supabase/migrations/007_fulltext_search.sql
@@ -0,0 +1,36 @@
+-- 007: Full-text search on paper_questions.question_text
+--
+-- Adds a tsvector generated column (auto-maintained by PostgreSQL on every
+-- INSERT/UPDATE), a GIN index for fast @@ queries, and a batch-scoring RPC
+-- used by the similar-question retrieval endpoint.
+
+ALTER TABLE paper_questions
+  ADD COLUMN IF NOT EXISTS search_text tsvector
+  GENERATED ALWAYS AS (
+    to_tsvector('english', coalesce(question_text, ''))
+  ) STORED;
+
+CREATE INDEX IF NOT EXISTS idx_pq_search_text
+  ON paper_questions USING gin(search_text);
+
+-- text_similarity_scores(query_text, candidate_ids)
+--   Returns one row per candidate ID with a ts_rank_cd score normalised by
+--   unique word count (normalization flag = 1).  Questions that share no
+--   lexemes with the query still appear in the result with score = 0 so the
+--   caller always gets a complete score map for every candidate.
+CREATE OR REPLACE FUNCTION text_similarity_scores(
+  query_text    text,
+  candidate_ids uuid[]
+)
+RETURNS TABLE (question_id uuid, text_score float4)
+LANGUAGE sql STABLE AS $$
+  SELECT
+    id,
+    ts_rank_cd(
+      search_text,
+      plainto_tsquery('english', query_text),
+      1   -- normalise by unique word count
+    )::float4
+  FROM paper_questions
+  WHERE id = ANY(candidate_ids);
+$$;
--- a/supabase/migrations/008_add_page_y_ratio.sql
+++ b/supabase/migrations/008_add_page_y_ratio.sql
@@ -0,0 +1,2 @@
+ALTER TABLE paper_questions
+  ADD COLUMN IF NOT EXISTS page_y_ratio NUMERIC;
--- a/supabase/migrations/008_fix_storage_url_placeholder.sql
+++ b/supabase/migrations/008_fix_storage_url_placeholder.sql
@@ -0,0 +1,27 @@
+-- 008: Replace __SUPABASE_STORAGE_PUBLIC_BASE_URL__ placeholder in paper URLs
+--
+-- The course-library seed (comp2211_course_library_papers.sql) was inserted
+-- without substituting the placeholder.  This migration replaces it with the
+-- real Supabase Storage public base URL for the `papers` bucket.
+
+UPDATE papers
+SET paper_file_url = REPLACE(
+  paper_file_url,
+  '__SUPABASE_STORAGE_PUBLIC_BASE_URL__',
+  'https://pvcxipwovpwrurebouwg.supabase.co/storage/v1/object/public/papers'
+)
+WHERE paper_file_url LIKE '%__SUPABASE_STORAGE_PUBLIC_BASE_URL__%';
+
+UPDATE papers
+SET answer_file_url = REPLACE(
+  answer_file_url,
+  '__SUPABASE_STORAGE_PUBLIC_BASE_URL__',
+  'https://pvcxipwovpwrurebouwg.supabase.co/storage/v1/object/public/papers'
+)
+WHERE answer_file_url LIKE '%__SUPABASE_STORAGE_PUBLIC_BASE_URL__%';
+
+-- Verify: should return 0 rows
+SELECT id, course_code, year, term, exam_type, paper_file_url, answer_file_url
+FROM papers
+WHERE paper_file_url  LIKE '%__SUPABASE_STORAGE_PUBLIC_BASE_URL__%'
+   OR answer_file_url LIKE '%__SUPABASE_STORAGE_PUBLIC_BASE_URL__%';