Initial commit: PastPaper Master full stack
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
36
supabase/migrations/007_fulltext_search.sql
Normal file
36
supabase/migrations/007_fulltext_search.sql
Normal file
@@ -0,0 +1,36 @@
|
||||
-- 007: Full-text search on paper_questions.question_text
|
||||
--
|
||||
-- Adds a tsvector generated column (auto-maintained by PostgreSQL on every
|
||||
-- INSERT/UPDATE), a GIN index for fast @@ queries, and a batch-scoring RPC
|
||||
-- used by the similar-question retrieval endpoint.
|
||||
|
||||
ALTER TABLE paper_questions
|
||||
ADD COLUMN IF NOT EXISTS search_text tsvector
|
||||
GENERATED ALWAYS AS (
|
||||
to_tsvector('english', coalesce(question_text, ''))
|
||||
) STORED;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_pq_search_text
|
||||
ON paper_questions USING gin(search_text);
|
||||
|
||||
-- text_similarity_scores(query_text, candidate_ids)
|
||||
-- Returns one row per candidate ID with a ts_rank_cd score normalised by
|
||||
-- unique word count (normalization flag = 1). Questions that share no
|
||||
-- lexemes with the query still appear in the result with score = 0 so the
|
||||
-- caller always gets a complete score map for every candidate.
|
||||
CREATE OR REPLACE FUNCTION text_similarity_scores(
|
||||
query_text text,
|
||||
candidate_ids uuid[]
|
||||
)
|
||||
RETURNS TABLE (question_id uuid, text_score float4)
|
||||
LANGUAGE sql STABLE AS $$
|
||||
SELECT
|
||||
id,
|
||||
ts_rank_cd(
|
||||
search_text,
|
||||
plainto_tsquery('english', query_text),
|
||||
1 -- normalise by unique word count
|
||||
)::float4
|
||||
FROM paper_questions
|
||||
WHERE id = ANY(candidate_ids);
|
||||
$$;
|
||||
Reference in New Issue
Block a user