Files
PastpaperMaster/supabase/migrations/007_fulltext_search.sql
Zhao 7a09167261 Initial commit: PastPaper Master full stack
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-21 12:27:47 +07:00

37 lines
1.2 KiB
PL/PgSQL

-- 007: Full-text search on paper_questions.question_text
--
-- Adds a tsvector generated column (auto-maintained by PostgreSQL on every
-- INSERT/UPDATE), a GIN index for fast @@ queries, and a batch-scoring RPC
-- used by the similar-question retrieval endpoint.
ALTER TABLE paper_questions
ADD COLUMN IF NOT EXISTS search_text tsvector
GENERATED ALWAYS AS (
to_tsvector('english', coalesce(question_text, ''))
) STORED;
CREATE INDEX IF NOT EXISTS idx_pq_search_text
ON paper_questions USING gin(search_text);
-- text_similarity_scores(query_text, candidate_ids)
-- Returns one row per candidate ID with a ts_rank_cd score normalised by
-- unique word count (normalization flag = 1). Questions that share no
-- lexemes with the query still appear in the result with score = 0 so the
-- caller always gets a complete score map for every candidate.
CREATE OR REPLACE FUNCTION text_similarity_scores(
query_text text,
candidate_ids uuid[]
)
RETURNS TABLE (question_id uuid, text_score float4)
LANGUAGE sql STABLE AS $$
SELECT
id,
ts_rank_cd(
search_text,
plainto_tsquery('english', query_text),
1 -- normalise by unique word count
)::float4
FROM paper_questions
WHERE id = ANY(candidate_ids);
$$;