-- 007: Full-text search on paper_questions.question_text -- -- Adds a tsvector generated column (auto-maintained by PostgreSQL on every -- INSERT/UPDATE), a GIN index for fast @@ queries, and a batch-scoring RPC -- used by the similar-question retrieval endpoint. ALTER TABLE paper_questions ADD COLUMN IF NOT EXISTS search_text tsvector GENERATED ALWAYS AS ( to_tsvector('english', coalesce(question_text, '')) ) STORED; CREATE INDEX IF NOT EXISTS idx_pq_search_text ON paper_questions USING gin(search_text); -- text_similarity_scores(query_text, candidate_ids) -- Returns one row per candidate ID with a ts_rank_cd score normalised by -- unique word count (normalization flag = 1). Questions that share no -- lexemes with the query still appear in the result with score = 0 so the -- caller always gets a complete score map for every candidate. CREATE OR REPLACE FUNCTION text_similarity_scores( query_text text, candidate_ids uuid[] ) RETURNS TABLE (question_id uuid, text_score float4) LANGUAGE sql STABLE AS $$ SELECT id, ts_rank_cd( search_text, plainto_tsquery('english', query_text), 1 -- normalise by unique word count )::float4 FROM paper_questions WHERE id = ANY(candidate_ids); $$;