-- 007: Full-text search on paper_questions.question_text
--
-- Adds a tsvector generated column (auto-maintained by PostgreSQL on every
-- INSERT/UPDATE), a GIN index for fast @@ queries, and a batch-scoring RPC
-- used by the similar-question retrieval endpoint.

ALTER TABLE paper_questions
  ADD COLUMN IF NOT EXISTS search_text tsvector
  GENERATED ALWAYS AS (
    to_tsvector('english', coalesce(question_text, ''))
  ) STORED;

CREATE INDEX IF NOT EXISTS idx_pq_search_text
  ON paper_questions USING gin(search_text);

-- text_similarity_scores(query_text, candidate_ids)
--   Returns one row per candidate ID with a ts_rank_cd score normalised by
--   unique word count (normalization flag = 1).  Questions that share no
--   lexemes with the query still appear in the result with score = 0 so the
--   caller always gets a complete score map for every candidate.
CREATE OR REPLACE FUNCTION text_similarity_scores(
  query_text    text,
  candidate_ids uuid[]
)
RETURNS TABLE (question_id uuid, text_score float4)
LANGUAGE sql STABLE AS $$
  SELECT
    id,
    ts_rank_cd(
      search_text,
      plainto_tsquery('english', query_text),
      1   -- normalise by unique word count
    )::float4
  FROM paper_questions
  WHERE id = ANY(candidate_ids);
$$;