Initial commit: PastPaper Master full stack

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-21 12:15:35 +07:00
commit 7a09167261
105 changed files with 24799 additions and 0 deletions
--- a/supabase/migrations/007_fulltext_search.sql
+++ b/supabase/migrations/007_fulltext_search.sql
@@ -0,0 +1,36 @@
+-- 007: Full-text search on paper_questions.question_text
+--
+-- Adds a tsvector generated column (auto-maintained by PostgreSQL on every
+-- INSERT/UPDATE), a GIN index for fast @@ queries, and a batch-scoring RPC
+-- used by the similar-question retrieval endpoint.
+
+ALTER TABLE paper_questions
+  ADD COLUMN IF NOT EXISTS search_text tsvector
+  GENERATED ALWAYS AS (
+    to_tsvector('english', coalesce(question_text, ''))
+  ) STORED;
+
+CREATE INDEX IF NOT EXISTS idx_pq_search_text
+  ON paper_questions USING gin(search_text);
+
+-- text_similarity_scores(query_text, candidate_ids)
+--   Returns one row per candidate ID with a ts_rank_cd score normalised by
+--   unique word count (normalization flag = 1).  Questions that share no
+--   lexemes with the query still appear in the result with score = 0 so the
+--   caller always gets a complete score map for every candidate.
+CREATE OR REPLACE FUNCTION text_similarity_scores(
+  query_text    text,
+  candidate_ids uuid[]
+)
+RETURNS TABLE (question_id uuid, text_score float4)
+LANGUAGE sql STABLE AS $$
+  SELECT
+    id,
+    ts_rank_cd(
+      search_text,
+      plainto_tsquery('english', query_text),
+      1   -- normalise by unique word count
+    )::float4
+  FROM paper_questions
+  WHERE id = ANY(candidate_ids);
+$$;